In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve, roc_auc_score, classification_report, confusion_matrix, accuracy_score, f1_score, precision_score
import matplotlib.pyplot as plt

def plot_roc(y, preds, model):
    fpr, tpr, thresholds = roc_curve(y, preds)
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]
    print("max(tpr - fpr) w/ th = ", optimal_threshold)
    l1, = plt.plot([0, 1], [0, 1], '--')
    l2, = plt.plot(fpr, tpr, label = 'Random Forest')
    auc = roc_auc_score(y, preds)
    l = plt.legend([l2], [model+str(' AUC: %.2f' % auc)])
    for x, y, txt in zip(fpr[::5], tpr[::5], thresholds[::5]):
        plt.annotate(np.round(txt,5), (x, y-0.04))
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.show()
    
def matrix_info(th, y, probs):
    preds = (probs >= th).astype(np.int)
    print('f1_score:')
    print(f1_score(y, preds, average = 'micro'))
    print('precision_score:')
    print(precision_score(y, preds, average = 'micro'))
    print('Confusion Matrix:')
    print(confusion_matrix(y,preds))
    print(classification_report(y,preds))