In [169]:
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score
from sklearn.metrics import f1_score as f1

In [183]:
def roc_auc(actual, preds):
    # pred should be a list of predicted probabilities between 0 and 1 of a category
    # actual should be a list of 1s and 0s for the actual target category
    # Returns the area under the curve at each threshold
    actual = np.array(actual)
    tpr = []
    fpr = []
    for thresh in [x / 100.0 for x in range(0, 101)]:
        preds_t = np.array(preds) >= thresh
        tp = sum(preds_t & actual)
        fp = sum(preds_t & ~actual)
        tn = sum(~preds_t & ~actual)
        fn = sum(~preds_t & actual)
        tpr.append(tp/(tp+fn))
        fpr.append(tn/(tn+fp))
        
    auc = 0
    for i in range(0, len(tpr)-1):
        auc += ((tpr[i] + tpr[i+1])/2) * (fpr[i+1]-fpr[i])
    return auc

In [171]:
def accuracy(actual, preds, thresh):
    # pred should be a list of predicted probabilities between 0 and 1 of a category
    # actual should be a list of 1s and 0s for the actual target category
    # thresh should be a float between 0 and 1
    # Returns the accuracy at the given threshold
    preds = np.array(preds)
    actual = np.array(actual)
    preds = (preds >= thresh)
    actual = (actual == 1)
    acc = np.count_nonzero(preds == actual)/len(actual)
    return acc

In [172]:
def f1_score(actual, preds, thresh):
    preds = (np.array(preds) >= thresh)
    actual = (np.array(actual) == 1)
    tp = np.count_nonzero(preds & actual)
    fp = np.count_nonzero(preds & ~actual)
    fn = np.count_nonzero(~preds & actual)
    rec = tp/(tp+fn)
    prec = tp/(tp+fp)
    f1 = (2 * rec * prec)/(rec + prec)
    return(f1)

In [173]:
def precision(actual, preds, thresh):
    preds = (np.array(preds) >= thresh)
    actual = (np.array(actual) == 1)
    tp = np.count_nonzero(preds & actual)
    fp = np.count_nonzero(preds & ~actual)
    prec = tp/(tp+fp)
    return prec

In [174]:
random.seed(42)
rand_preds = []
rand_actual = []
for i in range(1000):
    rand_preds.append(random.randint(0, 100)/100)
    rand_actual.append(random.randint(0, 1))
rand_a_actual = np.array(rand_actual)
rand_a_pred = (np.array(rand_preds) >= 0.5)

In [184]:
roc_auc(rand_actual, rand_preds)

0.5242958662796517

In [185]:
roc_auc_score(rand_a_actual, rand_preds)

0.524327895105595

In [177]:
accuracy(rand_actual, rand_preds, 0.5)

0.505

In [178]:
accuracy_score(rand_a_actual, rand_a_pred)

0.505

In [179]:
f1_score(rand_actual, rand_preds, 0.5)

0.5208131655372702

In [180]:
f1(rand_a_actual, rand_a_pred)

0.5208131655372702

In [181]:
precision(rand_actual, rand_preds, 0.5)

0.5193050193050193

In [182]:
precision_score(rand_a_actual, rand_a_pred)

0.5193050193050193