# < Evaluation Metrics >
https://en.wikipedia.org/wiki/Confusion_matrix

## 1. Binary Classification

### - Prepare labels

In [144]:
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn import metrics

np.random.seed(99)

y_true = np.random.randint(2, size=20)             # 20 labels / 10 classes
y_pred = np.random.randint(2, size=20)

print('Labels:      ', y_true)
print('Predictions: ', y_pred)

Labels:       [1 1 1 0 1 0 0 0 1 0 1 1 0 1 1 1 1 0 1 1]
Predictions:  [0 1 0 1 0 0 0 0 0 0 1 1 1 1 0 1 1 0 1 0]


### - Confusion Matrix

In [145]:
#
#                                  Predicted Condition
#                      ---------------------------------------------
#                      |      Positive      |       Negative       |
#           --------------------------------------------------------
# Actual    | Positive | True Positive (TP)  | False Negative (FN) |
#           --------------------------------------------------------
# Condition | Negative | False Positive (FP) | True Negative (TN)  |
#           --------------------------------------------------------


print(confusion_matrix(y_true, y_pred, labels=[1, 0]))

[[7 6]
 [2 5]]


### - Accuracy ($\frac{TP + TN}{TP + FN + FP + TN}$)

In [146]:
print(accuracy_score(y_true, y_pred), ' or ', str(accuracy_score(y_true, y_pred)*100)+'%')



0.6  or  60.0%


### - Precision ($\frac{TP}{TP + FP}$)

In [147]:
print('Class 1: ', precision_score(y_true, y_pred))
# print('Class 0: ', precision_score(y_true, y_pred, pos_label=0))


Class 1:  0.7777777777777778


### - Recall ($\frac{TP}{TP + FN}$)

In [148]:
print('Class 1: ', recall_score(y_true, y_pred))
# print('Class 0: ', recall_score(y_true, y_pred, pos_label=0))

Class 1:  0.5384615384615384


### - F1-Score ($2 * \frac{Precision * Recall}{Precision + Recall} = \frac{TP + TP}{TP + FP + TP + FN} = \frac{2TP}{2TP + FP + FN}$)

In [149]:
print('Class 1: ', f1_score(y_true, y_pred))
# print('Class 0: ', f1_score(y_true, y_pred, pos_label=0))

Class 1:  0.6363636363636364


In [150]:
print(metrics.classification_report(y_true, y_pred, digits=3))

              precision    recall  f1-score   support

           0      0.455     0.714     0.556         7
           1      0.778     0.538     0.636        13

    accuracy                          0.600        20
   macro avg      0.616     0.626     0.596        20
weighted avg      0.665     0.600     0.608        20



### - Model Comparison

In [151]:
y_pred2 = np.random.randint(2, size=20)

print('Labels:  ', y_true)
print('Model 1: ', y_pred)
print('Model 2: ', y_pred2)


Labels:   [1 1 1 0 1 0 0 0 1 0 1 1 0 1 1 1 1 0 1 1]
Model 1:  [0 1 0 1 0 0 0 0 0 0 1 1 1 1 0 1 1 0 1 0]
Model 2:  [0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 1 0 0 0]


In [152]:
print('          Acc.  Prec. Rec.  F1')
print('Model 1:  {:.2f}  {:.2f}  {:.2f}  {:.2f}'.format(accuracy_score(y_true, y_pred), precision_score(y_true, y_pred), recall_score(y_true, y_pred), f1_score(y_true, y_pred)))
print('Model 2:  {:.2f}  {:.2f}  {:.2f}  {:.2f}'.format(accuracy_score(y_true, y_pred2), precision_score(y_true, y_pred2), recall_score(y_true, y_pred2), f1_score(y_true, y_pred2)))


          Acc.  Prec. Rec.  F1
Model 1:  0.60  0.78  0.54  0.64
Model 2:  0.55  0.70  0.54  0.61


### - Classification Report

In [153]:
print(metrics.classification_report(y_true, y_pred, digits=3))


              precision    recall  f1-score   support

           0      0.455     0.714     0.556         7
           1      0.778     0.538     0.636        13

    accuracy                          0.600        20
   macro avg      0.616     0.626     0.596        20
weighted avg      0.665     0.600     0.608        20



## 2. Multi-class Classification

### - Prepare labels

In [154]:
np.random.seed(99)

y_true = np.random.randint(3, size=20)             # 20 labels / 3 classes
y_pred = np.random.randint(3, size=20)

print('Labels:      ', y_true)
print('Predictions: ', y_pred)


Labels:       [1 1 0 1 0 2 0 1 0 1 2 1 1 0 1 2 0 2 0 0]
Predictions:  [0 0 0 1 2 2 1 2 0 0 1 2 1 1 0 1 1 2 2 2]


### - Confusion Matrix

In [155]:
print(confusion_matrix(y_true, y_pred, labels=[0, 1, 2]))


[[2 3 3]
 [4 2 2]
 [0 2 2]]


### - Precision, Recall, F1-Score

In [156]:
for_0_true = y_true.copy()
for_0_pred = y_pred.copy()

for_0_true[for_0_true!=0] = 1
for_0_pred[for_0_pred!=0] = 1

print('Class 0: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(for_0_true, for_0_pred, pos_label=0), recall_score(for_0_true, for_0_pred, pos_label=0), f1_score(for_0_true, for_0_pred, pos_label=0)))

for_1_true = y_true.copy()
for_1_pred = y_pred.copy()

for_1_true[for_1_true!=1] = 0
for_1_pred[for_1_pred!=1] = 0

print('Class 1: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(for_1_true, for_1_pred, pos_label=1), recall_score(for_1_true, for_1_pred, pos_label=1), f1_score(for_1_true, for_1_pred, pos_label=1)))

for_2_true = y_true.copy()
for_2_pred = y_pred.copy()

for_2_true[for_2_true!=2] = 0
for_2_pred[for_2_pred!=2] = 0
for_2_true[for_2_true==2] = 1
for_2_pred[for_2_pred==2] = 1

print('Class 2: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(for_2_true, for_2_pred, pos_label=1), recall_score(for_2_true, for_2_pred, pos_label=1), f1_score(for_2_true, for_2_pred, pos_label=1)))

Class 0: 0.3333  0.2500  0.2857
Class 1: 0.2857  0.2500  0.2667
Class 2: 0.2857  0.5000  0.3636


### - Accuracy

In [157]:
print('Accurayc: {:.2f}'.format(accuracy_score(y_true, y_pred)))

Accurayc: 0.30


### - Macro/Micro-Precision, Recall, F1-Score

In [158]:
print('       Prec. Rec.  F1')
print('Micro: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(y_true, y_pred, average='micro'), recall_score(y_true, y_pred, average='micro'), f1_score(y_true, y_pred, average='micro')))
print('Macro: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(y_true, y_pred, average='macro'), recall_score(y_true, y_pred, average='macro'), f1_score(y_true, y_pred, average='macro')))

       Prec. Rec.  F1
Micro: 0.3000  0.3000  0.3000
Macro: 0.3016  0.3333  0.3053


### - Classification Report

In [159]:
print(metrics.classification_report(y_true, y_pred, digits=3))


              precision    recall  f1-score   support

           0      0.333     0.250     0.286         8
           1      0.286     0.250     0.267         8
           2      0.286     0.500     0.364         4

    accuracy                          0.300        20
   macro avg      0.302     0.333     0.305        20
weighted avg      0.305     0.300     0.294        20



## 3. Multi-label Classification

### - Prepare labels

In [160]:
>>> from sklearn.metrics import multilabel_confusion_matrix

np.random.seed(99)

y_true = np.random.randint(2, size=3*10).reshape([10, 3])             # 10 samples / 3 classes
y_pred = np.random.randint(2, size=3*10).reshape([10, 3])


print('Labels\t\tPredictions')
for true, pred in zip(y_true, y_pred):
    print(true, '\t', pred)


Labels		Predictions
[1 1 1] 	 [1 1 1]
[0 1 0] 	 [1 0 1]
[0 0 1] 	 [1 0 1]
[0 1 1] 	 [0 0 0]
[0 1 1] 	 [1 0 1]
[1 1 0] 	 [1 0 1]
[1 1 0] 	 [1 1 1]
[1 0 1] 	 [1 0 0]
[0 0 0] 	 [0 1 1]
[0 0 0] 	 [0 0 0]


### - Confusion Matrix

In [161]:
multilabel_confusion_matrix(y_true, y_pred, labels=[0, 1, 2])

array([[[3, 3],
        [0, 4]],

       [[3, 1],
        [4, 2]],

       [[1, 4],
        [2, 3]]], dtype=int64)

### - Accuracy

In [162]:
print("Accuracy: ", accuracy_score(y_true, y_pred))

Accuracy:  0.2


### - Precision, Recall, F1-Score

In [163]:
print('         Prec.   Rec.    F1')
print('Class 0: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(y_true[:,0], y_pred[:,0]), recall_score(y_true[:,0], y_pred[:,0]), f1_score(y_true[:,0], y_pred[:,0])))
print('Class 1: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(y_true[:,1], y_pred[:,1]), recall_score(y_true[:,1], y_pred[:,1]), f1_score(y_true[:,1], y_pred[:,1])))
print('Class 2: {:.4f}  {:.4f}  {:.4f}'.format(precision_score(y_true[:,2], y_pred[:,2]), recall_score(y_true[:,2], y_pred[:,2]), f1_score(y_true[:,2], y_pred[:,2])))



         Prec.   Rec.    F1
Class 0: 0.5714  1.0000  0.7273
Class 1: 0.6667  0.3333  0.4444
Class 2: 0.4286  0.6000  0.5000


### - Classification Report

In [164]:
print(metrics.classification_report(y_true, y_pred, digits=4, zero_division=0))


              precision    recall  f1-score   support

           0     0.5714    1.0000    0.7273         4
           1     0.6667    0.3333    0.4444         6
           2     0.4286    0.6000    0.5000         5

   micro avg     0.5294    0.6000    0.5625        15
   macro avg     0.5556    0.6444    0.5572        15
weighted avg     0.5619    0.6000    0.5384        15
 samples avg     0.4167    0.4500    0.4133        15

