In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt 
%matplotlib inline

# Model Performance

Input: n x 3 csv matrix, results.csv, with 1st column = the predicted label and the 2nd column = the actual label, 3rd column the probabilities

Output: Model performance metrics (graphs of ROC curves, confusion matrices, etc.)

In [4]:
def model_performance(performance_metric):
    """Function that returns given model performance metric
    
    Parameter Options:
    -----------------
    performance metrix options: 'confusion_matrix', 'recall', 'precision',
    'accuracy', 'f1', 'roc'
    
    if 'all' specified returns list of [accuracy, recall, precision, f1]
    
    """
    
    data = np.loadtxt('results.csv', delimiter=",") #read in data 
    predicted = data[:,0]
    true = data[:,1]
    
    prec, rec, f1, sup = precision_recall_fscore_support(true, predicted, average= 'binary')
    acc = accuracy_score(true, predicted)
    
    if performance_metric == 'confusion_matrix':
        #Count of true negatives is C_{0,0}, false negatives is C_{1,0},
        #true positives is C_{1,1} and false positives is C_{0,1}
        return confusion_matrix(data[:,1], data[:,0], labels = [0,1])
    
    if performance_metric == "recall":
        return rec
    
    if performance_metric == "accuracy":
        return acc

    if performance_metric == "f1":
        return f1
    
    if performance_metric == "precision":
        return prec
    
    if performance_metric == 'roc':
        probabilities = data[:,2]
        fpr, tpr, _ = roc_curve(true, probabilities)
        plt.figure(1)
        plt.plot([0, 1], [0, 1])
        plt.plot(fpr, tpr)
        plt.xlabel('False positive rate')
        plt.ylabel('True positive rate')
        plt.title('ROC curve')
        plt.show()
    
    else:
        return [acc,rec,prec,f1]