In [None]:
from sklearn.datasets import fetch_openml

In [None]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.keys()

In [None]:
X, y = mnist["data"], mnist["target"]
X.shape

In [None]:
y.shape

## Visualization

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
some_digit = X[0]
some_digit_image = some_digit.reshape(28, 28)

plt.imshow(some_digit_image, cmap=mpl.cm.binary, interpolation="nearest")
plt.axis("off")
plt.show()

In [None]:
# Split data
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]  

In [None]:
# Training a binary classifier
# Five-detector

y_train_5 = (y_train == "5")
y_test_5 = (y_test == "5") 

In [None]:
# Import model 
from sklearn.linear_model import SGDClassifier

In [None]:
# Instance model
sdg_clf = SGDClassifier(random_state=42)
sdg_clf.fit(X_train, y_train_5)

In [None]:
# Review quality model
sdg_clf.predict([some_digit])

In [None]:
# Performance mesaures using Cross validation
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

In [None]:
skfolds = StratifiedKFold(n_splits=3, random_state=42, shuffle=True)

In [None]:
len(y_test_5)

In [None]:
for train_index, test_index in skfolds.split(X_train, y_test_5):
    clone_clf = clone(sdg_clf)
    X_train_folds = X_train[train_index]
    y_train_folds = y_train_5[train_index]
    X_test_folds = X_train[test_index]
    y_test_folds = y_train_5[test_index]
    
    clone_clf.fit(X_test_folds, y_test_folds)
    y_pred = clone_clf.predcit(X_test_folds)
    n_correct = sum(y_pred == y_test_folds)
    print(n_correct / len(y_pred))
    

In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score(sdg_clf, X_train, y_train_5, cv=3, scoring="accuracy")

In [None]:
from sklearn.base import BaseEstimator
import numpy as np

class Never5Classfier(BaseEstimator):
    def fit(self, X, y=None):
        pass
    def predict(self, X):
        return np.zeros((len(X), 1), dtype=bool)

In [None]:
never_5_clf = Never5Classfier()
cross_val_score(never_5_clf, X_train, y_train_5, cv=3, scoring="accuracy")

### Confusion matrix

In [None]:
from sklearn.model_selection import cross_val_predict

In [None]:
y_train_pred = cross_val_predict(sdg_clf, X_train, y_train_5, cv=3)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_train_5, y_train_pred)

### Precision and recall