# 4.3.12 Receiver operating characteristic

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from itertools import cycle
import matplotlib.pyplot as plt
import numpy as np
from scipy import interp
from sklearn import datasets, svm
from sklearn.metrics import auc, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import label_binarize

import warnings
warnings.filterwarnings("ignore")

![curva_roc.png](attachment:curva_roc.png)

Es una gráfica que compara el desempeño de un clasificador binario cuando se varia la frontera de decisión. La curva se construye al graficar la tasa de verdaderos positivos (TPR, sensibilidad o recall), en el eje Y, vs la tasa de falsos positivos (FPR), en el eje X, al variar la frontera de decisión del clasificador. <br>
Es decir, se expresa la TPR como una función de la FPR.

A partir de la siguiente matriz de confusión:

![matrix.PNG](attachment:matrix.PNG)

los valores a graficar se obtienen como:

$$ TPR = \frac{TP}{TP+FP} $$

$$ FPR = \frac{FP}{FP+TN} $$

- La FPR representa la tasa de falsa alarma, esto es: la cantidad de Falsos Positivos sobre la totalidad de negativos.
- La TPR es la tasa de casos declarados como positivos, que verdaderamente presetan la condición.

De la gráfica, usualmente se computa el área bajo la curva. A mayor área, es mejor el clasificador.

Link: https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#sphx-glr-auto-examples-model-selection-plot-roc-py

In [None]:
#
# Carga del dataset de datos
#
iris = datasets.load_iris()
X = iris.data
y = iris.target
y

In [None]:
#
# Se binariza la salida para generar tres columnas. La implementación
# está restringida al caso binario.
#
y = label_binarize(y, classes=[0, 1, 2])
n_classes = y.shape[1]
y[:5, :]

In [None]:
#
# Adiciona nuevas variables aleatorias
#
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

In [None]:
#
# Conjuntos de entrenamiento y validación
#
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.5,
    random_state=0,
)

y_train[:10, :]

In [None]:
#
# Construye el clasificador que usa la estrategia OneVsRest
#
classifier = OneVsRestClassifier(
    svm.SVC(
        kernel="linear",
        probability=True,
        random_state=random_state,
    )
)


#
# decision_function: Return the distance of each sample from the decision
# boundary for each class.
#
y_score = classifier.fit(X_train, y_train).decision_function(X_test)

#
# Cálculo de la curva RO
#
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    # -------------------------------------------------------------------------
    # fpr: Increasing false positive rates such that element i is the false
    #    positive rate of predictions with score >= thresholds[i].
    # tpr: Increasing true positive rates such that element i is the true
    #    positive rate of predictions with score >= thresholds[i].
    # thresholds: Decreasing thresholds on the decision function used to
    #   compute fpr and tpr.
    fpr[i], tpr[i], _ = roc_curve(
        # ---------------------------------------------------------------------
        # True binary labels, {-1, 1} or {0, 1}
        y_true=y_test[:, i],
        # ---------------------------------------------------------------------
        # Target scores, can either be probability estimates of the positive
        # class, confidence values, or non-thresholded measure of decisions (as
        # returned by “decision_function” on some classifiers).
        y_score=y_score[:, i],
    )

    # -------------------------------------------------------------------------
    # Compute Area Under the Curve (AUC) using the trapezoidal rule.
    roc_auc[i] = auc(
        x=fpr[i],
        y=tpr[i],
    )

In [None]:
#
# False positive rate
#
fpr

In [None]:
#
# True positive rate
#
tpr

In [None]:
roc_auc

In [None]:
plt.figure(figsize=(6, 6))

plt.plot(
    fpr[2],
    tpr[2],
    color="darkorange",
    lw=2,
    label="ROC curve (area = %0.2f)" % roc_auc[2],
)
plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")

plt.title("Receiver operating characteristic example")

plt.legend(loc="lower right")
plt.show()

In [None]:
print('ok_')