# Playground for Performance Measure Slides

- Stephen W. Thomas
- Used for MMA 869, MMAI 869, and GMMA 869

In [1]:
import datetime
print(datetime.datetime.now())

2020-11-04 16:09:10.998456


In [9]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import silhouette_score, silhouette_samples

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [42]:
y_test = np.ones((20,), dtype=int)
y_test = np.append(y_test, np.zeros((80,), dtype=int))

y_pred = np.ones((18,), dtype=int)
y_pred = np.append(y_pred, np.zeros((2,), dtype=int))
y_pred = np.append(y_pred, np.zeros((75, ), dtype=int))
y_pred = np.append(y_pred, np.ones((5,), dtype=int))

In [44]:
np.bincount(y_test)
np.bincount(y_pred)

array([80, 20], dtype=int64)

array([77, 23], dtype=int64)

## Model Performance

In [57]:
from sklearn.metrics import zero_one_loss, classification_report, accuracy_score, cohen_kappa_score, f1_score, log_loss, confusion_matrix, precision_score, recall_score, balanced_accuracy_score, brier_score_loss, precision_recall_fscore_support

# C00 = TN
# C10 = FN
# C11 = TP
# C01 = FP
confusion_matrix(y_test, y_pred)

array([[75,  5],
       [ 2, 18]], dtype=int64)

In [63]:
unique_label = np.unique([y_test, y_pred])
cmtx = pd.DataFrame(
confusion_matrix(y_test, y_pred, labels=unique_label), 
index=['  true:{:}'.format(x) for x in unique_label], 
columns=['pred:{:}'.format(x) for x in unique_label])
print(cmtx)

          pred:0  pred:1
  true:0      75       5
  true:1       2      18


In [65]:
print("Accuracy          = {:.3f}".format(accuracy_score(y_test, y_pred)))
print("Zero-one Loss     = {:.3f}".format(zero_one_loss(y_test, y_pred)))
print("Balanced Accuracy = {:.3f}".format(balanced_accuracy_score(y_test, y_pred)))
print("Brier Score       = {:.3f}".format(brier_score_loss(y_test, y_pred)))
print("Precision         = {:.3f}".format(precision_score(y_test, y_pred)))
print("Recall            = {:.3f}".format(recall_score(y_test, y_pred)))
print("F1 Score          = {:.3f}".format(f1_score(y_test, y_pred)))

Accuracy          = 0.930
Zero-one Loss     = 0.070
Balanced Accuracy = 0.919
Brier Score       = 0.070
Precision         = 0.783
Recall            = 0.900
F1 Score          = 0.837


In [60]:
print(precision_recall_fscore_support(y_test, y_pred))

(array([0.97402597, 0.7826087 ]), array([0.9375, 0.9   ]), array([0.95541401, 0.8372093 ]), array([80, 20], dtype=int64))


In [59]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.94      0.96        80
           1       0.78      0.90      0.84        20

    accuracy                           0.93       100
   macro avg       0.88      0.92      0.90       100
weighted avg       0.94      0.93      0.93       100



In [56]:
averages = ['binary', 'micro', 'macro', 'weighted']

for average in averages:
    print("\nAverage = {}".format(average))
    print("  Precision = {:.3f}".format(precision_score(y_test, y_pred, average=average)))
    print("  Recall    = {:.3f}".format(recall_score(y_test, y_pred, average=average)))
    print("  F1        = {:.3f}".format(f1_score(y_test, y_pred, average=average)))



Average = binary
  Precision = 0.783
  Recall    = 0.900
  F1        = 0.837

Average = micro
  Precision = 0.930
  Recall    = 0.930
  F1        = 0.930

Average = macro
  Precision = 0.878
  Recall    = 0.919
  F1        = 0.896

Average = weighted
  Precision = 0.936
  Recall    = 0.930
  F1        = 0.932
