In [None]:
import h5py
import numpy as np

In [None]:
SECONDS_PER_CLIP = 5
DATA_TYPE = 'unbalanced_data'
CLASS_LABELS = ['Safe', 'Violent', 'Sexual', 'Both']
DPI = 150

HOME_PATH = '/home/shubhams/Hercules/kidstube-data/'
METRICS_PATH = 'metrics/aggregate_{0}_sec/{1}_classifier/evaluation_metric_split_0.hdf5'.format(SECONDS_PER_CLIP, DATA_TYPE)
ROC_PLOT_PATH = 'aggregate_{0}_sec_{1}_classifier_roc.pdf'.format(SECONDS_PER_CLIP, DATA_TYPE)

In [None]:
def load_data(name, path=HOME_PATH+METRICS_PATH):
    f = h5py.File(path, 'r')
    return f[name]

In [None]:
from sklearn.metrics import confusion_matrix

def get_confusion_matrix(y_true, y_pred, normalise=False):
    conf_mat = confusion_matrix(y_true, y_pred)
    if normalise:
        conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
    return conf_mat

In [None]:
def get_error_rates(confusion_matrix):
    tp = np.diag(confusion_matrix)
    fp = np.sum(confusion_matrix, axis=0) - tp
    fn = np.sum(confusion_matrix, axis=1) - tp
    num_classes = confusion_matrix.shape[0]
    tn = []
    for i in range(num_classes):
        temp = np.delete(conf_mat, i, 0)    # delete ith row
        temp = np.delete(temp, i, 1)  # delete ith column
        tn.append(sum(sum(temp)))
    tn = np.array(tn)
    return tp, fp, fn, tn

In [None]:
def get_precision_recall(tp, fp, fn):
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    return precision, recall

In [None]:
def get_binarized_labels(y_true, num_classes):
    binarized_labels = []
    for y in y_true:
        binarized_label = [0 for i in range(num_classes)]
        binarized_label[y] = 1
        binarized_labels.append(binarized_label)
    return np.array(binarized_labels)

In [None]:
y_true = load_data('y_true')
y_pred = load_data('y_pred')
y_pred_score = load_data('y_pred_score')

In [None]:
label_true = [0 for i in range(len(CLASS_LABELS))]
for i in y_true:
    label_true[i] += 1 
label_true

In [None]:
label_pred = [0 for i in range(len(CLASS_LABELS))]
for i in y_pred:
    label_pred[i] += 1 
label_pred

In [None]:
conf_mat = get_confusion_matrix(y_true, y_pred)
print(conf_mat)
np.sum(conf_mat, axis=1)

In [None]:
conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]

In [None]:
tp, fp, fn, tn = get_error_rates(conf_mat)
print('TP: ', tp)
print('FP: ', fp)
print('FN: ', fn)
print('TN: ', tn)
precision, recall = get_precision_recall(tp, fp, fn)
print('\nPrecision: ', precision)
print('Recall: ', recall)

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

y = get_binarized_labels(y_true, len(CLASS_LABELS))
y_hat = np.array(y_pred_score)
n_classes = y.shape[1]

fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y[:, i], y_hat[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

In [None]:
roc_auc

In [None]:
%matplotlib inline

from itertools import cycle
import matplotlib.pyplot as plt

plt.figure()
lw = 2
colors = cycle(['red', 'darkorange', 'cornflowerblue', 'aqua'])
for i, class_label, color in zip(range(n_classes), CLASS_LABELS, colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
             label='ROC curve of {0} (area = {1:0.2f})'
             ''.format(class_label, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.tick_params(labelsize=14)
plt.legend(loc="lower right", fontsize=12)
plt.grid(True)
plt.savefig(ROC_PLOT_PATH, dpi=DPI, bbox_inches='tight')
plt.show()