In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics

## Helper Functions

#### Function that plots ROC

In [1]:
def plot_roc(y, probs):
    fpr, tpr, threshold = metrics.roc_curve(y, probs)
    roc_auc = metrics.auc(fpr, tpr)
    plt.figure(figsize = (5, 4))
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
             lw=lw, label='ROC curve (area = %0.3f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw = lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc="lower right")
    plt.show()

#### Function looks for the best probability threshold, returns it with ROC-like, F1 score vs. Probability Threshold plot

In [663]:
def search_probability_threshold(y, probs):
    lst_f1_score = []
    lst_threshold = np.linspace(0, 1, 2500)
    for threshold in lst_threshold:
        preds = (probs > threshold) * 1
        lst_f1_score.append(metrics.f1_score(y, preds))

    fig, ax = plt.subplots(figsize = (12, 4))
    pd.DataFrame({'Threshold': lst_threshold, 'F1_score': lst_f1_score}).set_index('Threshold').plot(ax = ax, xlim = (0, 2 * np.std(probs) ),
    title = 'Best Threshold value with highest F1 score is: ' + str(round(lst_threshold[np.argmax(lst_f1_score)], 4)))
    ax.axvline(x = lst_threshold[np.argmax(lst_f1_score)], linewidth = 1, color = 'r', linestyle = '--')
    print("Best F1 Score: " + str(np.max(lst_f1_score)))
    
    return lst_threshold[np.argmax(lst_f1_score)]

#### Function that creates a DataFrame with Accuracy, Recall, Precision, F1 and AUC scores

In [3]:
def all_metrics_together(y, probs, preds):
    accuracy = metrics.accuracy_score(y, preds)
    recall = metrics.recall_score(y, preds)
    precision = metrics.precision_score(y, preds)
    f1 = metrics.f1_score(y, preds)
    auc = metrics.roc_auc_score(y, (probs))

    df = pd.DataFrame({'Accuracy': accuracy, 'Recall': recall, 'Precision': precision, 'F1': f1, 'AUC': auc}, index = ['Score'])
    return df