In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [1]:
''' Performance Functions '''

def accuracy(y, y_hat):
    return np.mean(y == y_hat)

def confusion_matrix(y, y_hat):
    
    n_pos = len(y[y == 1])
    n_neg = len(y[y == 0])
    e11 = np.sum(y_hat[y == 1] == 1)
    e21 = n_pos - e11
    e22 = np.sum(y_hat[y == 0] == 0)
    e12 = n_neg - e22
    cm_df = pd.DataFrame({'Actual Pos': [e11, e21], 'Actual Neg': [e12, e22]})
    return cm_df.rename(index = {0: 'Pred Pos', 1: 'Pred Neg'})

def tpr_fpr(X, y, model, step_thresh = 50, show_curve = False):
    threshs = np.linspace(0., 1., step_thresh)
    TPR = np.zeros(step_thresh)
    FPR = np.zeros(step_thresh)
    for i in range(step_thresh):
        y_hat = model.predict(X, thresh = threshs[i])
        M = confusion_matrix(y, y_hat).to_numpy()
        TPR[i] = M[0, 0]/(M[0, 0] + M[1, 0])
        FPR[i] = M[0, 1]/(M[1, 1] + M[0, 1])
    if show_curve:
        plt.plot(FPR, TPR)
        plt.xlabel('FPR'); plt.ylabel('TPR')
        plt.title('FPR vs TPR', fontsize = 15)
    return {'TPR': TPR, 'FPR': FPR, 'Threshold': threshs}

def min_cost_threshold(X, y, model, step_thresh = 50):
    threshs = np.linspace(0., 1., step_thresh)
    cost = np.zeros(step_thresh)
    for i in range(step_thresh):
        y_hat = model.predict(X, thresh = threshs[i])
        M = confusion_matrix(y, y_hat).to_numpy()
        cost[i] = 500 * M[1, 0] + 10 * M[0, 1]
    return np.min(cost), threshs[np.argmin(cost)]