# Exercice 2 - System evaluation

## Imports

In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Load data

Define the path of the data file

In [2]:
path = "ex2-system-a.csv"

Read the CSV file using `read_csv`

In [3]:
dataset_a = pd.read_csv(path, sep=";", index_col=False, names=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "y_true"])

Display first rows

In [4]:
dataset_a.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y_true
0,5.34845e-08,7.49348e-10,8.08347e-07,2.08229e-05,5.22236e-10,2.33026e-08,5.24127e-12,0.999965,4.80859e-07,1.3e-05,7
1,0.00133427,3.20296e-05,0.850428,0.00166909,1.54646e-07,0.000241294,0.144828,1.12281e-11,0.00145633,1.1e-05,2
2,3.64305e-06,0.996276,0.00204591,0.000421053,2.19402e-05,1.64413e-05,0.000283816,0.000372296,0.000515012,4.4e-05,1
3,0.99982,2.55039e-10,1.11201e-05,1.6532e-05,5.37573e-10,8.99975e-05,9.38092e-06,4.46447e-05,2.41844e-06,6e-06,0
4,2.09246e-08,7.46422e-08,3.56082e-05,5.4962e-07,0.998896,3.07092e-08,0.000234615,9.74801e-07,1.07161e-06,0.000831,4


Store some useful statistics (class names + number of classes)

In [5]:
class_names = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
nb_classes = len(class_names)

## Exercise's steps

a) Write a function to take classification decisions on such outputs according to Bayes’rule.

In [6]:
def bayes_classification(df):
    """
    Take classification decisions according to Bayes rule.
        
    Parameters
    ----------
    df : Pandas DataFrame of shape (n_samples, n_features + ground truth)
         Dataset.
         
    Returns
    -------
    preds : Numpy array of shape (n_samples,)
            Class labels for each data sample.
    """
    # iloc[:, :-1] selects all rows and all columns except the last one
    return np.argmax(df.iloc[:, :-1].values, axis=1)
    

b) What is the overall error rate of the system ?

In [7]:
# Your code here: compute and print the error rate of the system

preds = bayes_classification(dataset_a)
y_true = dataset_a["y_true"].values

error_rate = np.sum(preds != y_true) / len(y_true)
print("Error rate: {:.2f}%".format(error_rate * 100))

Error rate: 10.73%


c) Compute and report the confusion matrix of the system.

In [27]:
def confusion_matrix(y_true, y_pred, n_classes):
     """
     Compute the confusion matrix.
          
     Parameters
     ----------
     y_true : Numpy array of shape (n_samples,)
               Ground truth.
     y_pred : Numpy array of shape (n_samples,)
               Predictions.
     n_classes : Integer
                    Number of classes.
          
     Returns
     -------
     cm : Numpy array of shape (n_classes, n_classes)
          Confusion matrix.
     """
     cm = np.zeros((n_classes, n_classes))
     for i in range(n_classes):
          for j in range(n_classes):
               cm[i,j] = np.sum((y_true == i) & (y_pred == j))
     return cm

In [28]:
# Your code here: compute and print the confusion matrix
cm = confusion_matrix(y_true, preds, nb_classes)
print("Confusion matrix:")
print(cm)

Confusion matrix:
[[9.440e+02 0.000e+00 1.100e+01 0.000e+00 0.000e+00 2.000e+00 1.000e+01
  7.000e+00 5.000e+00 1.000e+00]
 [0.000e+00 1.112e+03 2.000e+00 3.000e+00 1.000e+00 4.000e+00 3.000e+00
  1.000e+00 9.000e+00 0.000e+00]
 [1.000e+01 6.000e+00 9.210e+02 1.200e+01 1.500e+01 3.000e+00 1.900e+01
  1.500e+01 2.600e+01 5.000e+00]
 [1.000e+00 1.000e+00 3.100e+01 8.620e+02 2.000e+00 7.200e+01 5.000e+00
  1.400e+01 1.200e+01 1.000e+01]
 [2.000e+00 3.000e+00 6.000e+00 2.000e+00 9.100e+02 1.000e+00 1.200e+01
  6.000e+00 4.000e+00 3.600e+01]
 [1.200e+01 3.000e+00 6.000e+00 2.900e+01 1.900e+01 7.680e+02 1.900e+01
  9.000e+00 2.100e+01 6.000e+00]
 [1.400e+01 3.000e+00 2.100e+01 2.000e+00 2.200e+01 2.800e+01 8.650e+02
  0.000e+00 3.000e+00 0.000e+00]
 [0.000e+00 1.400e+01 3.000e+01 9.000e+00 7.000e+00 2.000e+00 1.000e+00
  9.290e+02 3.000e+00 3.300e+01]
 [1.200e+01 1.600e+01 1.800e+01 2.600e+01 2.400e+01 4.600e+01 2.200e+01
  1.900e+01 7.720e+02 1.900e+01]
 [1.000e+01 4.000e+00 6.000e+00 2.200

d) What are the worst and best classes in terms of precision and recall ?

In [29]:
def precision_per_class(cm):
     """
     Compute the precision per class.
          
     Parameters
     ----------
     cm : Numpy array of shape (n_classes, n_classes)
          Confusion matrix.
          
     Returns
     -------
     precisions : Numpy array of shape (n_classes,)
                    Precision per class.
     """
     return np.diag(cm) / np.sum(cm, axis=0)

In [30]:
def recall_per_class(cm):
     """
     Compute the recall per class.
          
     Parameters
     ----------
     cm : Numpy array of shape (n_classes, n_classes)
          Confusion matrix.
          
     Returns
     -------
     recalls : Numpy array of shape (n_classes,)
               Recall per class.
     """
     return np.diag(cm) / np.sum(cm, axis=1)

In [31]:
# Your code here: find and print the worst and best classes in terms of precision
precisions = precision_per_class(cm)
print("Precision per class:" , precisions)

print("Worst class in terms of precision: ", np.argmin(precisions))
print("Best class in terms of precision: ", np.argmax(precisions))

Precision per class: [0.93930348 0.95697074 0.87547529 0.89141675 0.86419753 0.81355932
 0.90481172 0.88645038 0.89871944 0.88469602]
Worst class in terms of precision:  5
Best class in terms of precision:  1


In [32]:
# Your code here: find and print the worst and best classes in terms of recall
recalls = recall_per_class(cm)
print("Recall per class:" , recalls)

print("Worst class in terms of recall: ", np.argmin(recalls))
print("Best class in terms of recall: ", np.argmax(recalls))

Recall per class: [0.96326531 0.97973568 0.89244186 0.85346535 0.92668024 0.86098655
 0.90292276 0.9036965  0.7926078  0.83647175]
Worst class in terms of recall:  8
Best class in terms of recall:  1


e) In file `ex1-system-b.csv` you find the output of a second system B. What is the best system between (a) and (b) in terms of error rate and F1.

In [33]:
# Your code here: load the data of the system B
dataset_b = pd.read_csv("ex2-system-b.csv", sep=";", index_col=False, names=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "y_true"])

In [34]:
def system_accuracy(cm):
     """
     Compute the system accuracy.
          
     Parameters
     ----------
     cm : Numpy array of shape (n_classes, n_classes)
          Confusion matrix.
          
     Returns
     -------
     accuracy : Float
               Accuracy of the system.
     """
     return np.sum(np.diag(cm)) / np.sum(cm)

In [35]:
def system_f1_score(cm):
     """
     Compute the system F1 score.
          
     Parameters
     ----------
     cm : Numpy array of shape (n_classes, n_classes)
          Confusion matrix.
          
     Returns
     -------
     f1_score : Float
               F1 score of the system.
     """
     precisions = precision_per_class(cm)
     recalls = recall_per_class(cm)
     return 2 * precisions * recalls / (precisions + recalls)

In [39]:
# Your code here: compute and print the accuracy and the F1 score of the system A
print("System A accuracy: {:.2f}%".format(system_accuracy(cm) * 100))
print("System A F1 score average : ", np.mean(system_f1_score(cm)))

System A accuracy: 89.27%
System A F1 score average :  0.8907308492877297


In [40]:
# Your code here: compute and print the accuracy and the F1 score of the system B
preds_b = bayes_classification(dataset_b)
y_true_b = dataset_b["y_true"].values
cm_b = confusion_matrix(y_true_b, preds_b, nb_classes)
print("System B accuracy: {:.2f}%".format(system_accuracy(cm_b) * 100))
print("System B F1 score average: ", np.mean(system_f1_score(cm_b)))

System B accuracy: 96.13%
System B F1 score average:  0.9608568150389065


The system B is more performant than the system A because it has a lower error rate and a higher F1 score.