## Classification Metrics

### Table of Contents

* [Cross validation evaluation](#cv_evaluation)
* [Confusion matrix](#confusion_matrix)
* [Precision-Recall](#precision_recall)
* [ROC curve](#roc)

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
# inline plot 
%matplotlib inline  
# default figure size 
matplotlib.rcParams['figure.figsize'] = (20, 10)
# to make our sets reproducible 
np.random.seed(42) 

In [None]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.target = mnist.target.astype(np.uint8)


In [None]:
# import sklearn.datasets as datasets
# mnist = datasets.load_digits()

X, y = mnist.data, mnist.target
# split 
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
# reduce to binary classification for the digit 5
y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

In [None]:
num_samples, num_features = X_train.shape
print(f'''
Number of samples {num_samples}
Number of features {num_features}
''')

In [None]:
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(max_iter=1000, tol=1e-3, random_state=42)
sgd_clf.fit(X_train, y_train_5)

### Cross validation evaluation <a class="anchor" id="cv_evaluation"></a>

In [None]:
from sklearn.model_selection import cross_val_score
cv_accuracy = cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring="accuracy")
print(f'''cross validations accuracies: {cv_accuracy}''')

### Confusion_matrix <a class="anchor" id="confusion_matrix"></a>

In [None]:
# The data is split according to the cv parameter. 
# Each sample belongs to exactly one test set, and its prediction is computed 
# with an estimator fitted on the corresponding training set.
# Try not to use - only for the sake of example!!!
from sklearn.model_selection import cross_val_predict

y_predictions = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3, method="predict")

from sklearn.metrics import confusion_matrix
confusion_matrix(y_train_5, y_predictions)

### Multiclass confusion matrix 

In [None]:
from sklearn.metrics import confusion_matrix
y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
confusion_matrix(y_true, y_pred, labels=["ant", "bird", "cat", "tal"])


### Multilabel confusion matrix 

In [None]:
from  sklearn.metrics import multilabel_confusion_matrix

# class 1 is the first column predictions and labels (y_true) and class 2 is the second
y_true = np.array([[0,0], [0,1], [1,1], [0,1], [0,1], [1,1]])
y_pred = np.array([[1,1], [0,1], [0,1], [1,0], [0,1], [1,1] ])
# generates confusion matrix per label
m = multilabel_confusion_matrix(y_true, y_pred)
print(f"""
confusion matrix for class 1: 
{m[0]}
confusion matrix for class 2: 
{m[1]}
""")

## Precision - Recall <a class="anchor" id="precision_recall"></a>

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score
y_true = [0, 1, 2, 0, 1, 2, 2]
y_pred = [1, 2, 1, 0, 1, 1, 2]

# 'micro':
# Calculate metrics globally by counting the total true positives,
# false negatives and false positives.
# 'macro':
# Calculate metrics for each label, and find their unweighted
# mean.  This does not take label imbalance into account.
precision_score(y_true, y_pred, average='macro')

In [None]:
# Explicit way to calculate 'macro'
cm = confusion_matrix(y_true, y_pred)
recall = np.diag(cm) / np.sum(cm, axis = 1)
precision = np.diag(cm) / np.sum(cm, axis = 0)
f1 = 2 * recall * precision / (recall + precision)
print(f"""
recall: {np.mean(recall)}
precision: {np.mean(precision)}
f1: {np.mean(f1)}
""")

In [None]:
from sklearn.metrics import f1_score
y_true_b = [0, 1, 1, 0, 1, 0, 0]
y_pred_b = [0, 0, 1, 0, 0, 1, 0]
print(f"""
binary: {f1_score(y_true_b, y_pred_b, average='binary')}
multiclass: {f1_score(y_true, y_pred, average='macro')}
""")

### precision recall tradeoff

In [None]:
from sklearn.metrics import precision_recall_curve

y_scores = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3, method="decision_function")
# thresholds = thresholds used to obtain each precision recall score 
precisions, recalls, thresholds = precision_recall_curve(y_true=y_train_5, probas_pred=y_scores )

def plot_precision_recall_vs_threshold(precisions, recalls, thresholds):
    plt.plot(thresholds, precisions[:-1], "b--", label="Precision", linewidth=2)
    plt.plot(thresholds, recalls[:-1], "g-", label="Recall", linewidth=2)
    plt.legend(loc="center right", fontsize=16) # Not shown in the book
    plt.xlabel("Threshold", fontsize=16)        # Not shown
    plt.grid(True)                              # Not shown
    plt.axis([min(thresholds), max(thresholds), 0, 1])             # Not shown



recall_90_precision = recalls[np.argmax(precisions >= 0.90)]
print(f'recall for 0.9 precision: {recall_90_precision}')
threshold_90_precision = thresholds[np.argmax(precisions >= 0.90)]


plt.figure(figsize=(10, 6))                                                                  
plot_precision_recall_vs_threshold(precisions, recalls, thresholds)
# plot the recall for the 0.9 precision
plt.plot([threshold_90_precision, threshold_90_precision], [0., 0.9], "r:")                 
plt.plot([min(thresholds), threshold_90_precision], [0.9, 0.9], "r:")                                
plt.plot([min(thresholds), threshold_90_precision], [recall_90_precision, recall_90_precision], "r:")
plt.plot([threshold_90_precision], [0.9], "ro")                                             
plt.plot([threshold_90_precision], [recall_90_precision], "ro")                            
plt.show()

In [None]:
def plot_precision_vs_recall(precisions, recalls):
    plt.plot(recalls, precisions, "b-", linewidth=2)
    plt.xlabel("Recall", fontsize=16)
    plt.ylabel("Precision", fontsize=16)
    plt.axis([0, 1, 0, 1])
    plt.grid(True)

plt.figure(figsize=(10, 6))
plot_precision_vs_recall(precisions, recalls)
# this is called recall@precision90
plt.plot([recall_90_precision, recall_90_precision], [0., 0.9], "r:")
plt.plot([0.0, recall_90_precision], [0.9, 0.9], "r:")
plt.plot([recall_90_precision], [0.9], "ro")
plt.show()

## ROC Curve <a class="anchor" id="roc"></a>

In [None]:
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_train_5, y_scores)

In [None]:

def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--') # dashed diagonal
    plt.axis([0, 1, 0, 1])                                    
    plt.xlabel('False Positive Rate (Fall-Out)', fontsize=16) 
    plt.ylabel('True Positive Rate (Recall)', fontsize=16)    
    plt.grid(True)                                            

plt.figure(figsize=(8, 6))                                    
plot_roc_curve(fpr, tpr)
fpr_90 = fpr[np.argmax(tpr >= recall_90_precision)]           
plt.plot([fpr_90, fpr_90], [0., recall_90_precision], "r:")   
plt.plot([0.0, fpr_90], [recall_90_precision, recall_90_precision], "r:")  
plt.plot([fpr_90], [recall_90_precision], "ro")               
plt.show()