## loading the mnist image dataset for classication

In [None]:
from sklearn.datasets import fetch_openml
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

In [None]:

mnist = fetch_openml('mnist_784', version=1)

In [None]:
mnist.keys()
X, y = mnist["data"], mnist["target"]
y = y.astype(np.uint8)
print(X.shape, y.shape)

In [None]:
some_digit = X.to_numpy()[0]
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(some_digit_image, cmap='binary')
plt.axis('off')
print(y[0])
plt.show()

## test train split

In [None]:
# the images are already shuffled and split into traning and test sections
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

## creating a 5 detector binary classifier

In [None]:
y_train_5 = (y_train == 5) #these two create vectors of True and Falses to help train the binary classifier
y_test_5 = (y_test == 5)

from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train_5)


In [None]:
sgd_clf.predict([some_digit]) #this should results in true because it is a five

## Measuring Model Accuracy

In [None]:
#homemade cross_validation
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

skfolds = StratifiedKFold(n_splits=3)#, random_state=42)

for train_index, test_index in skfolds.split(X_train, y_train_5):
    clone_clf = clone(sgd_clf)
    X_train_folds = X_train[train_index]
    y_train_folds = y_train_5[train_index]
    X_test_fold = X_train[test_index]
    y_test_fold = y_train_5[test_index]

    clone_clf.fit(X_train_folds, y_train_folds)
    y_pred = clone_clf.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))

In [34]:
#built in cross validation score
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring='accuracy')
#the accuracy from this looks really good, but the majority of datapoints already aren't 5, 
#so even if you guess false every time you're still 90% accurate, we need a better evaluation metric

array([0.95035, 0.96035, 0.9604 ])

In [35]:
#confusion matrix - better form of evaluation
from sklearn.model_selection import cross_val_predict
#this is used to get clean predictions that haven't seen test data
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)
y_train_pred

array([ True, False, False, ...,  True, False, False])

In [36]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train_5, y_train_pred)

array([[53892,   687],
       [ 1891,  3530]], dtype=int64)

![](PrecisionRecallDiagram.png)

Precision = # of TP / (TP + FP) 
    this is what percentage of the true/positive class was actually detected

Recall = # of TP / (TP + FN)
    % of the time the classifier is correct when it says something is a 5 / positive class

In [40]:
from sklearn.metrics import precision_score, recall_score
ps = precision_score(y_train_5, y_train_pred)
rs = recall_score(y_train_5, y_train_pred)
print(ps, rs) #these lower scores show that the classifier isn't as good as we had previously thought

0.8370879772350012 0.6511713705958311


### F1 Score

used to combine precision and recall into one metric.

is a harmonic mean, which treats lower values with more weight.

so the only way you get a high F1 score is if both precision and recall are high.

however, this can also favor classifiers that produce similar precisions and recalls, which isn't always good

In [41]:
from sklearn.metrics import f1_score
f1_score(y_train_5, y_train_pred)

0.7325171197343846