# Matriz de Confusão

### Importar dataset Mnist

In [None]:
from sklearn.datasets import fetch_openml
import numpy as np

mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)
mnist.target = mnist.target.astype(np.int8)

X, y = mnist["data"], mnist["target"]

np.save('mnistX', X)
np.save('mnisty', y)

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

### Ajustando um classificador

In [None]:
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(loss='hinge', max_iter=5, tol=-np.infty, random_state=42)
sgd_clf.fit(X_train, y_train_5)

### Predição no conjunto de teste, sem Validação Cruzada

In [None]:
y_pred1 = sgd_clf.predict(X_test)

### Predição no conjunto de teste, com Validação Cruzada

In [None]:
from sklearn.model_selection import cross_val_score, cross_val_predict

print("Acurácia k-fold SGD: ")
print(cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring="accuracy"))
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)

### Matriz de Confusão

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt

print("Matriz de Confusão - treinamento com CrossValidation")
print(confusion_matrix(y_train_5, y_train_pred))
print(ConfusionMatrixDisplay.from_estimator(sgd_clf, X_test, y_test_5))
plt.show()

### Uma matriz de confusão perfeita

In [None]:
print("Matriz de confusão perfeita")
print(confusion_matrix(y_train_5, y_train_5))

### Métricas

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

print("Precisão: ", precision_score(y_train_5, y_train_pred))
print("Recall: ", recall_score(y_train_5, y_train_pred))
print("F1-Score: ", f1_score(y_train_5, y_train_pred))

## Verificação do Limiar para Recall x Precision

In [None]:
classe = 5 # Escolher uma classe
enum = (y_test == classe)
dig = np.where(enum == True)
digito = dig[0][1] # Alterar para ver as saídas
y_scores = sgd_clf.decision_function([X_test[digito]])
print(y_scores)

In [None]:
threshold = 0
y_some_digit_pred = (y_scores > threshold)
print("É um 5?: ", y_some_digit_pred)

In [None]:
threshold = 200000
y_some_digit_pred = (y_scores > threshold)
print("É um 5?: ", y_some_digit_pred)

In [None]:
from sklearn.metrics import precision_recall_curve

y_scores = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3, method="decision_function")
precisions, recalls, thresholds = precision_recall_curve(y_train_5, y_scores)

### Definição de função para Precision x Limiar

In [None]:
def plot_precision_recall_vs_threshold(precisions, recalls, thresholds):
    plt.plot(thresholds, precisions[:-1], "b--", label="Precision", linewidth=2)
    plt.plot(thresholds, recalls[:-1], "g-", label="Recall", linewidth=2)
    plt.xlabel("Threshold", fontsize=16)
    plt.legend(loc="upper left", fontsize=16)
    plt.ylim([0, 1])

In [None]:
plt.figure(figsize=(8, 4))
plot_precision_recall_vs_threshold(precisions, recalls, thresholds)
plt.xlim([-700000, 700000])
plt.show()