# 4.1 - Evaluación

### Regresión

In [None]:
import warnings
warnings.simplefilter('ignore')

import pandas as pd

from sklearn.datasets import load_diabetes

from sklearn.ensemble import RandomForestRegressor as RFR

from sklearn.model_selection import train_test_split as tts

In [None]:
data=load_diabetes()

In [None]:
X_train, X_test, y_train, y_test = tts(data.data, data.target)

In [None]:
rf=RFR(n_estimators=500)
rf.fit(X_train, y_train)

y_pred=rf.predict(X_test)

In [None]:
y_pred[:10]

###### MSE


$$MSE = \frac{1}{n}\sum_{i=1}^{n}(y_i-\hat{y}_i)^{2}$$


pertenece al intervalo [0, +$\infty$)

In [None]:
from sklearn.metrics import mean_squared_error as mse

mse(y_test, y_pred)

###### RMSE


$$RMSE = \sqrt{\frac{1}{n}\sum_{i=1}^{n}(y_i-\hat{y}_i)^{2}}$$


pertenece al intervalo [0, +$\infty$)

In [None]:
mse(y_test, y_pred, squared=False)  #mse(y_test, y_pred)**0.5

###### RMSLE


$$RMSLE = \sqrt{\frac{1}{n}\sum_{i=1}^{n}(log(y_i)-log(\hat{y}_i))^{2}}$$


pertenece al intervalo [0, +$\infty$)

In [None]:
from sklearn.metrics import mean_squared_log_error as msle

msle(y_test, y_pred)**0.5

###### MAE


$$MAE = \frac{1}{n}\sum_{i=1}^{n}|y_i-\hat{y}_i|$$


pertenece al intervalo [0, +$\infty$)

In [None]:
from sklearn.metrics import mean_absolute_error as mae

mae(y_test, y_pred)

###### R2


$$R2 = 1 - \frac{\sum_{i=1}^{n}(y_i-\hat{y}_i)^{2}}{\sum_{i=1}^{n}(y_i-\bar{y})^{2}}$$

###### Adjusted R2

$$AdjustedR2 = 1-(1-R^{2})\frac{n-1}{n-p-1}$$


donde:
+ n = tamaño de la muestra
+ p = nº de variables del modelo


pertenecen al intervalo (-$\infty$, 1]

In [None]:
from sklearn.metrics import r2_score as r2

r2(y_test, y_pred)

In [None]:
rf.score(X_test, y_test)

### Clasificación

In [None]:
from sklearn.datasets import load_wine
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier as RFC

data=load_wine()

X_train, X_test, y_train, y_test=tts(data.data, data.target)

In [None]:
'''

Viejo modelo

svc=SVC().fit(X_train, y_train)

y_pred=svc.predict(X_test)
''' 

In [None]:
rfc=RFC().fit(X_train, y_train)

y_pred=rfc.predict(X_test)

In [None]:
y_test[:10]

+ TP := True Positive (aciertos clase 1)
+ TN := True Negative (aciertos clase 0)
+ FP := False Positive (Error tipo I, decir 1 cuando es 0)
+ FN := False Negative (Error tipo II, decir 0 cuando es 1)

+ Accuracy  := (TP+TN)/(TP+TN+FP+FN) (acierto)  ($\frac{1}{n}\sum 1(\hat{y_i}=y_i$))
+ Precision := TP/(TP+FP)
+ Recall    := TP/(TP+FN)  (Sensibilidad, TPR)
+ F1_Score  := 2·Recall·Precision/(Recall+Precision)

(F1 funciona mejor que el accuracy cuando los datos no están balanceados y cuando FP y FN son muy diferentes)

![f1](images/f1.png)

##### Accuracy

In [None]:
from sklearn.metrics import accuracy_score as acc

acc(y_test, y_pred)

In [None]:
#svc.score(y_test, y_pred)

##### Precision

In [None]:
from sklearn.metrics import precision_score as prec

prec(y_test, y_pred, average='micro')

##### Recall

In [None]:
from sklearn.metrics import recall_score as rec

rec(y_test, y_pred, average='macro')

##### F1_Score

In [None]:
from sklearn.metrics import f1_score as f1

f1(y_test, y_pred, average='macro')

In [None]:
def suma(x:int)->int:  # tipado de funciones
    return x

In [None]:
suma('hola')

##### Matriz de Confusión

![conf_matrix](images/conf_matrix.jpeg)

In [None]:
from sklearn.metrics import confusion_matrix as cm

cm(y_test, y_pred)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
sum(sum(cm(y_test, y_pred)))

In [None]:
ax=sns.heatmap(cm(y_test, y_pred)/sum(sum(cm(y_test, y_pred))), 
               annot=True)

#b, t=ax.get_ylim()  # esto era porque me salia recortado
#ax.set_ylim(b+0.5, t-0.5)

plt.title('Matriz confusion')
plt.ylabel('Verdad')
plt.xlabel('Prediccion')
plt.show();

In [None]:
sum(sum(cm(y_test, y_pred)/sum(sum(cm(y_test, y_pred)))))

##### ROC-AUC  (Característica operativa del receptor y área debajo de la curva)

+ TPR := TP/(TP+FN)
+ FPR := FP/(TN+FP)


![roc](images/roc.png)

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC

data=load_breast_cancer()

X_train, X_test, y_train, y_test=tts(data.data, data.target)


svc=SVC(probability=True).fit(X_train, y_train)

y_pred=svc.predict(X_test) # predice etiqueta

y_prob=svc.predict_proba(X_test)[::, 1]  # devuelve la prob

In [None]:
from sklearn.metrics import roc_curve as roc
from sklearn.metrics import roc_auc_score as auc

In [None]:
# mal pintao, y_pred

#with plt.xkcd():
    
fpr, tpr, umbrales=roc(y_test, y_pred)  # cuidao, no y_pred
a=auc(y_test, y_pred)

plt.plot(fpr, tpr)
plt.plot(fpr, fpr, 'r--')

plt.xlabel('FPR')
plt.ylabel('TPR')

plt.title('Binary ROC Curve --- AUC={:.3f}'.format(a))  # {:.3f} formato del numero

plt.show();

In [None]:
# bien pintado

with plt.xkcd():
    
    fpr, tpr, umbrales=roc(y_test, y_prob)  # cuidado con y_prob tiene que ser
    a=auc(y_test, y_prob)

    plt.plot(fpr, tpr)
    plt.plot(fpr, fpr, 'r--')

    plt.xlabel('FPR')
    plt.ylabel('TPR')

    plt.title('Binary ROC Curve --- AUC={:.3f}'.format(a))  # {:.3f} formato del numero

    plt.show();

In [None]:
acc(y_test, y_pred)  # accuracy

In [None]:
umbrales

In [None]:
!pip install scikit-plot

In [None]:
import scikitplot as skplt

In [None]:
data=load_wine()

X_train, X_test, y_train, y_test=tts(data.data, data.target)

svc=SVC(probability=True).fit(X_train, y_train)

y_pred=svc.predict(X_test)

y_prob=svc.predict_proba(X_test)

In [None]:
with plt.xkcd():
    skplt.metrics.plot_roc(y_test, y_prob);

In [None]:
acc(y_test, y_pred)