In [None]:
import json
import itertools
import random
from collections import Counter

from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
import matplotlib.pyplot as plt

from esper.widget import *
import esper.face_embeddings as face_embeddings

def plot_confusion_matrix(y_truth, y_pred, normalize=True):
    cm = confusion_matrix(y_truth, y_pred)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    classes = ['Non-black', 'Black']
    plt.figure(figsize=(5, 5))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds)
    plt.title('Black vs. non-black confusion matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('Hand label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.show()

def print_pred_dist(pred):
    n = len(pred)
    counts = Counter([t[1] for t in pred])
    for k in sorted(counts):
        print('', '{:0.2f}'.format(k), counts[k] / n)

In [None]:
with open('/app/data/black.1k.random.json') as f:
    hand_labels_train = {int(k): int(v) for k, v in json.load(f).items()}
train_ids = [i for i in hand_labels_train]
train_y = [hand_labels_train[i] for i in train_ids]
del hand_labels_train
    
with open('/app/data/black.250.random.json') as f:
    hand_labels_test = {int(k): int(v) for k, v in json.load(f).items()}
test_ids = [i for i in hand_labels_test]
test_y = [hand_labels_test[i] for i in test_ids]
del hand_labels_test

In [None]:
PRED_THRESHOLD = 0.3
K = 15

test_pred_r = face_embeddings.knn_predict(
    train_ids, train_y, k=K, ids=test_ids)
print('Test dist:')
print_pred_dist(test_pred_r)
test_pred_d = {k: 0 if v < PRED_THRESHOLD else 1
               for k, v in test_pred_r}
test_pred = [test_pred_d[i] for i in test_ids]

train_pred_r = face_embeddings.knn_predict(
    train_ids, train_y, k=K, ids=train_ids)
print('train dist:')
print_pred_dist(train_pred_r)
train_pred_d = {k: 0 if v < PRED_THRESHOLD else 1
                for k, v in train_pred_r}
train_pred = [train_pred_d[i] for i in train_ids]

print('Train accuracy:', accuracy_score(train_y, train_pred))
print('Train f1-score:', f1_score(train_y, train_pred))
print('Non-normalized train confusion matrix:')
plot_confusion_matrix(train_y, train_pred, False)
print('Normalized train confusion matrix:')
plot_confusion_matrix(train_y, train_pred)

print('Test accuracy:', accuracy_score(test_y, test_pred))
print('Test f1-score:', f1_score(test_y, test_pred))
print('Non-normalized test confusion matrix:')
plot_confusion_matrix(test_y, test_pred, False)
print('Normalized test confusion matrix:')
plot_confusion_matrix(test_y, test_pred)

# False Positives

In [None]:
esper_widget(
    qs_to_result(Face.objects.filter(
        id__in=[i for i, y_pred, y_true in zip(test_ids, test_pred, test_y)
                if y_pred == 1 and y_true == 0]), limit=1000),
    results_per_page=50)

# False Negative

In [None]:
esper_widget(
    qs_to_result(Face.objects.filter(
        id__in=[i for i, y_pred, y_true in zip(test_ids, test_pred, test_y)
                if y_pred == 0 and y_true == 1]), limit=1000),
    results_per_page=50)