# Performance Metrics

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
np.set_printoptions(precision=3, suppress=True)
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.datasets import load_digits

dataset = load_digits()
X, y = dataset.data, dataset.target

for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name,class_count)

In [None]:
# Creating a dataset with imbalanced binary classes:  
# Negative class (0) is 'not digit 1' 
# Positive class (1) is 'digit 1'
y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0

print('Original labels:\t', y[1:30])
print('New binary labels:\t', y_binary_imbalanced[1:30])

In [None]:
np.bincount(y_binary_imbalanced)    # Negative class (0) is the most frequent class

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

# Accuracy of Support Vector Machine classifier
from sklearn.svm import SVC

svm = SVC(kernel='rbf', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

### Dummy Classifiers

DummyClassifier is a classifier that makes predictions using simple rules, which can be useful as a baseline for comparison against actual classifiers, especially with imbalanced classes.

In [None]:
from sklearn.dummy import DummyClassifier

# Negative class (0) is most frequent
dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
# Therefore the dummy 'most_frequent' classifier always predicts class 0
y_dummy_predictions = dummy_majority.predict(X_test)

y_dummy_predictions

In [None]:
dummy_majority.score(X_test, y_test)

In [None]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

### Confusion matrices

#### Binary (two-class) confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Negative class (0) is most frequent
dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
y_majority_predicted = dummy_majority.predict(X_test)
confusion = confusion_matrix(y_test, y_majority_predicted)

print('Most frequent class (dummy classifier)\n', confusion)

print(dummy_majority.score(X_test, y_test))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion,
                              display_labels=dummy_majority.classes_)
disp.plot()
plt.show()

In [None]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm_predicted = svm.predict(X_test)
confusion = confusion_matrix(y_test, svm_predicted)

print('Support vector machine classifier (linear kernel, C=1)\n', confusion)

print(svm.score(X_test, y_test))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion,
                              display_labels=svm.classes_)
disp.plot()
plt.show()

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter = 10000).fit(X_train, y_train)
lr_predicted = lr.predict(X_test)
confusion = confusion_matrix(y_test, lr_predicted)

print('Logistic regression classifier (default settings)\n', confusion)

print(lr.score(X_test, y_test))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion,
                              display_labels=lr.classes_)
disp.plot()
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)
confusion = confusion_matrix(y_test, tree_predicted)

print('Decision tree classifier (max_depth = 2)\n', confusion)

print(dt.score(X_test, y_test))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion,
                              display_labels=dt.classes_)
disp.plot()
plt.show()

### Evaluation metrics for binary classification

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Accuracy = TP + TN / (TP + TN + FP + FN)
# Precision = TP / (TP + FP)
# Recall = TP / (TP + FN)  Also known as sensitivity, or True Positive Rate
# F1 = 2 * Precision * Recall / (Precision + Recall) 
print('Accuracy: {:.2f}'.format(accuracy_score(y_test, tree_predicted)))
print('Precision: {:.2f}'.format(precision_score(y_test, tree_predicted)))
print('Recall: {:.2f}'.format(recall_score(y_test, tree_predicted)))
print('F1: {:.2f}'.format(f1_score(y_test, tree_predicted)))

In [None]:
# Combined report with all above metrics
from sklearn.metrics import classification_report
print('Decision tree\n', 
      classification_report(y_test, tree_predicted, target_names = ['not 1', '1']))

In [None]:
print('Logistic regression\n', 
      classification_report(y_test, lr_predicted, target_names = ['not 1', '1']))
print('SVM\n', 
      classification_report(y_test, svm_predicted, target_names = ['not 1', '1']))

### Decision functions

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)
y_score_list = list(zip(y_test[0:20], y_scores_lr[0:20]))

# show the decision_function scores for first 20 instances
y_score_list

In [None]:
y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)
y_proba_list = list(zip(y_test[0:20], y_proba_lr[0:20,1]))

# show the probability of positive class for first 20 instances
y_proba_list

### Precision-recall curves

In [None]:
from sklearn.metrics import precision_recall_curve
!pip install scikit_plot

In [None]:
# Import scikit-plot
import scikitplot as skplt
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support,PrecisionRecallDisplay

pr_lr = PrecisionRecallDisplay.from_estimator(lr, X_test, y_test, name='LR')

p_lr, r_lr, _, _ = precision_recall_fscore_support(y_test, lr.predict(X_test), average='binary')
plt.plot(r_lr, p_lr, 'o', markersize=10, label="threshold zero", fillstyle="none", c='k', mew=2)
plt.legend()

In [None]:
svm = SVC(kernel='rbf', C=1, probability = True)
svm.fit(X_train, y_train)

pr_lr.plot(name='LR')

pr_svm = PrecisionRecallDisplay.from_estimator(svm, X_test, y_test, name='SVM', ax=plt.gca())

p_svm, r_svm, _, _ = precision_recall_fscore_support(y_test, svm.predict(X_test), average='binary')

plt.plot(r_lr, p_lr, 'o', markersize=10, label="threshold 0 LR", fillstyle="none", c='k', mew=2)
plt.plot(r_svm, p_svm, 'o', markersize=10, label="threshold 0 SVM", fillstyle="none", c='k', mew=2)

plt.legend()

# F1 vs average precision

In [None]:
from sklearn.metrics import f1_score

print("f1_score of LR: {:.3f}".format(
      f1_score(y_test, lr.predict(X_test))))

print("f1_score of SVM: {:.3f}".format(f1_score(y_test, svm.predict(X_test))))

In [None]:
from sklearn.metrics import average_precision_score
ap_lr = average_precision_score(y_test, lr.predict_proba(X_test)[:, 1])
ap_svc = average_precision_score(y_test, svm.decision_function(X_test))
print("Average precision of LR: {:.3f}".format(ap_lr))
print("Average precision of SVM: {:.3f}".format(ap_svc))

# ROC CURVE

In [None]:
# ROC Curve 
import sklearn.metrics as metrics
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt

lr_fpr, lr_tpr, _ = metrics.roc_curve(y_test, lr.predict_proba(X_test)[:, 1])
svm_fpr, svm_tpr, _ = metrics.roc_curve(y_test, svm.decision_function(X_test))
#RocCurveDisplay(fpr=lr_fpr, tpr=lr_tpr)
#RocCurveDisplay(fpr=svm_fpr, tpr=svm_tpr)

plt.plot(lr_fpr, lr_tpr,label ='Logistic Regression')
plt.plot(svm_fpr, svm_tpr,label='svm' )
plt.plot([0, 1], [0, 1], color='grey', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
from sklearn.metrics import roc_auc_score

y_score_svm = svm.predict_proba(X_test)
roc_auc_score(y_test, y_score_svm[:,1])

In [None]:
y_score_lr = lr.predict_proba(X_test)
roc_auc_score(y_test, y_proba_lr[:,1])

# Multi-class

In [None]:
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score

digits = load_digits()
 # data is between 0 and 16
X_train, X_test, y_train, y_test = train_test_split(
    digits.data / 16., digits.target, random_state=0)
lr = LogisticRegression(max_iter = 100000).fit(X_train, y_train)
pred = lr.predict(X_test)
print("Accuracy: {:.3f}".format(accuracy_score(y_test, pred)))
print("Confusion matrix:\n{}".format(confusion_matrix(y_test, pred)))

In [None]:
print(classification_report(y_test, pred))

In [None]:
print('Micro-averaged precision = {:.2f} (treat instances equally)'
      .format(precision_score(y_test, pred, average = 'micro')))
print('Macro-averaged precision = {:.2f} (treat classes equally)'
      .format(precision_score(y_test, pred, average = 'macro')))

In [None]:
print('Micro-averaged recall = {:.2f} (treat instances equally)'
      .format(recall_score(y_test, pred, average = 'micro')))
print('Macro-averaged recall = {:.2f} (treat classes equally)'
      .format(recall_score(y_test, pred, average = 'macro')))

In [None]:
print('Micro-averaged f1 = {:.2f} (treat instances equally)'
      .format(f1_score(y_test, pred, average = 'micro')))
print('Macro-averaged f1 = {:.2f} (treat classes equally)'
      .format(f1_score(y_test, pred, average = 'macro')))

### Model selection using evaluation metrics

#### Cross-validation example

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

dataset = load_digits()
# again, making this a binary problem with 'digit 1' as positive class 
# and 'not 1' as negative class
X, y = dataset.data, dataset.target == 1
clf = SVC(kernel='linear', C=1)

# accuracy is the default scoring metric
print('Cross-validation (accuracy)', cross_val_score(clf, X, y, cv=5))
# use AUC as scoring metric
print('Cross-validation (AUC)', cross_val_score(clf, X, y, cv=5, scoring = 'roc_auc'))
# use recall as scoring metric
print('Cross-validation (recall)', cross_val_score(clf, X, y, cv=5, scoring = 'recall'))

#### Grid search example

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score

dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

clf = SVC(kernel='rbf')
grid_values = {'gamma': [0.001, 0.01, 0.05, 0.1, 1, 10, 100]}

# default metric to optimize over grid parameters: accuracy
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values)
grid_clf_acc.fit(X_train, y_train)
y_decision_fn_scores_acc = grid_clf_acc.decision_function(X_test) 

print('Grid best parameter (max. accuracy): ', grid_clf_acc.best_params_)
print('Grid best score (accuracy): ', grid_clf_acc.best_score_)
print('Test set AUC (accuracy): ', roc_auc_score(y_test, y_decision_fn_scores_acc))

# alternative metric to optimize over grid parameters: AUC
grid_clf_auc = GridSearchCV(clf, param_grid = grid_values, scoring = 'roc_auc')
grid_clf_auc.fit(X_train, y_train)
y_decision_fn_scores_auc = grid_clf_auc.decision_function(X_test) 

print('Grid best parameter (max. AUC): ', grid_clf_auc.best_params_)
print('Grid best score (AUC): ', grid_clf_auc.best_score_)
print('Test set AUC: ', roc_auc_score(y_test, y_decision_fn_scores_auc))
