# Métricas de Evaluación

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
%matplotlib inline

### Datos de prueba

Vamos a suponer que entrenamos un algoritmo predictivo y compararemos el outcome real de la variable "y" versus el outcome predicho por el algoritmo

In [3]:
y_true = pd.Series([1,0,1,1,0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0])
y_pred = pd.Series([0,0,1,1,0,1,1,0,1,0,1,1,1,1,0,0,0,0,1,1,0,0,1,1,1,1])

### Matriz de Confusión

Primero vamos a crear una matriz de confusión con la función crosstab (crea una tabla pivoteada con la frecuencia de los elementos)

In [4]:
pdf = pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
# imprima pdf

In [5]:
pdf

Predicted,0,1,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6,7,13
1,5,8,13
All,11,15,26


### Reporte de Clasificación

Ahora vamos a armar un reporte de clasificación. Para eso, primero calculamos los falsos/verdaderos positivos/negativo

In [13]:
# Complete los valores de los arreglos

# True Negative
TN = pdf[0][0]

# True Positive
TP = pdf[1][1]

# False Negative
FN = pdf[1][0]

# False Positive
FP = pdf[0][1]

# Total
TOT = TN + TP + FN + FP

In [14]:
print("TP:{}, TN:{}, FP:{}, FN:{}, TOT:{}".format(TP,TN,FP,FN,TOT))

TP:8, TN:6, FP:5, FN:7, TOT:26


Ahora armamos las fórmulas de las métricas de precisión, exactitud, sensibilidad, especificidad, etc..

In [17]:
# Accuracy (Precisión)
acc =(TP+TN)/TOT
print("Accurracy is {}", round(acc,2))

Accurracy is {} 0.54


In [19]:
# Precision (Exactitud)
prec =TP/(FP+TP)
print("Precision is {}", round(prec,2))

Precision is {} 0.62


In [20]:
# Recall (Sensibilidad)
rec =TP/(TP+FN)
print("Recall is {}", round(rec,2))

Recall is {} 0.53


In [21]:
# Specifity (Especificidad)
spe =TN/(TN+FP)
print("Specifity is {}", round(spe,2))

Specifity is {} 0.55


In [22]:
# F1-Score = 2 * (precision x recall / (precision + recall))
# F1 Score takes into account precision and the recall. It is created by finding the the harmonic mean of precision and recall.
f1 =2*(prec*rec)/(prec+rec)
print("F1 Score is {}", round(f1,2))

F1 Score is {} 0.57


Empleando el package metrics de sklearn

In [29]:
# Accuracy Score
from sklearn.metrics import accuracy_score
print("Accuracy score: {}", round(accuracy_score(y_true,y_pred),2))

Accuracy score: {} 0.54


In [13]:
# Precision – Accuracy of positive predictions.
# Precision = TP/(TP + FP)
from sklearn.metrics import precision_score
print("Precision score: {}", round(precision_score(y_true,y_pred),2))

Precision score: {} 0.53


In [32]:
# Recall (aka sensitivity or true positive rate): Fraction of positives That were correctly identified.
# Recall = TP/(TP+FN)
from sklearn.metrics import recall_score
print("Recall score: {}", round(recall_score(y_true,y_pred),2))

Recall score: {} 0.62


In [31]:
# F1 Score (aka F-Score or F-Measure) – A helpful metric for comparing two classifiers. 
from sklearn.metrics import f1_score
print("F1 Score: {}", round(f1_score(y_true,y_pred),2))

F1 Score: {} 0.57


### Reporte de Clasificación

Ahora comparamos con el reporte de clasificación

In [23]:
from sklearn.metrics import classification_report

In [22]:
# Report which includes Precision, Recall and F1-Score.
print(classification_report(y_true,y_pred))

              precision    recall  f1-score   support

           0       0.55      0.46      0.50        13
           1       0.53      0.62      0.57        13

    accuracy                           0.54        26
   macro avg       0.54      0.54      0.54        26
weighted avg       0.54      0.54      0.54        26

