# Área sobre a curva

### Importar dataset Mnist

In [None]:
from sklearn.datasets import fetch_openml
import numpy as np

mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)
mnist.target = mnist.target.astype(np.int8)

X, y = mnist["data"], mnist["target"]

np.save('mnistX', X)
np.save('mnisty', y)

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

### Definindo uma classe como verdadeira e demais como falsa

In [None]:
y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

### Ajustando um classificador e os parâmetros para AUC

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import roc_curve
from sklearn.model_selection import cross_val_score, cross_val_predict

sgd_clf = SGDClassifier(loss='sag', max_iter=5, tol=-np.infty, random_state=42)
sgd_clf.fit(X_train, y_train_5)
y_scores = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3, method="decision_function")
fpr, tpr, thresholds = roc_curve(y_train_5, y_scores)

### Definindo função para plotar curva ROC

In [None]:
def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)

### Plotar a curva ROC e o score AUC

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

plt.figure(figsize=(8, 6))
plot_roc_curve(fpr, tpr)
plt.show()

print("ROC AUC Score - SGD-logloss: ", roc_auc_score(y_train_5, y_scores))

### Vamos comparar modelos

In [None]:
from sklearn.ensemble import RandomForestClassifie

forest_clf = RandomForestClassifier(n_estimators=10, random_state=42)
y_probas_forest = cross_val_predict(forest_clf, X_train, y_train_5, cv=3,
                                    method="predict_proba")
y_scores_forest = y_probas_forest[:, 1] # score = probabilidade de classes positivas
fpr_forest, tpr_forest, thresholds_forest = roc_curve(y_train_5, y_scores_forest)

### Saídas

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, "b:", linewidth=2, label="SGD-sag")
plot_roc_curve(fpr_forest, tpr_forest, "Random Forest")
plt.legend(loc="lower right", fontsize=12)
plt.show()

print("ROC AUC Score - SGD-sag: ", roc_auc_score(y_train_5, y_scores))
print("ROC AUC Score - RandomForest: ", roc_auc_score(y_train_5, y_scores_forest))