# Chapter 3 of HOML 2nd edition

In [9]:
import numpy as np
import pandas as pd

In [1]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version = 1)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [10]:
X, y = mnist["data"], mnist["target"]
y = y.astype(np.uint8) #make it an integer, not a string
X_train, X_test, y_train, y_test = X[:60000], X[60000: ], y[:60000], y[60000:]

In [11]:
y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

In [12]:
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train_5)

SGDClassifier(random_state=42)

In [13]:
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring = "accuracy")

array([0.95035, 0.96035, 0.9604 ])

However, accuracy is not a great measure. Always guessing that a digit is not a 5 would be 90% accurate since only 10% of the data are 5s. To set up for a confusion matrix requires two steps

In [15]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)

from sklearn.metrics import confusion_matrix
confusion_matrix(y_train_5, y_train_pred)

array([[53892,   687],
       [ 1891,  3530]])

In [16]:
sk_matrix = confusion_matrix(y_train_5, y_train_pred)

687

In [25]:
pd.DataFrame(sk_matrix, columns = ["Negative", "Positive"], index = ["Negative", "Positive"])

Unnamed: 0,Negative,Positive
Negative,53892,687
Positive,1891,3530


In [27]:
def format_confusion_matrix(sk_matrix):
    labeled_df = pd.DataFrame(sk_matrix, columns = ["Negative", "Positive"], index = ["Negative", "Positive"])
    
    return labeled_df

def confusion_metrics(sk_matrix):
    tn = sk_matrix[0, 0]
    fp = sk_matrix[0, 1]
    fn = sk_matrix[1, 0]
    tp = sk_matrix[1, 1]

    precision = tp/(tp + fp) #out of those you think are positive, how often are you correct?
    recall = tp/(tp + fn) #out of all positives in the population, how many are you catching? 
    # For recall, consider a quality assurance case where you need to recall faulty products. How many of those that should be recalled
    # is your model actually catching
    f_score = 2/ ((1/precision) + (1/recall)) #harmonic mean, weighs low scores more highly than an arithmetic mean
    # In general you should not use an f_score uncritically. The actual situation determines if we care more about
    # recall or precision, but it's generally bad to just put them together in one metric

    metric_df = pd.DataFrame({'metric': [precision, recall, f_score]}, index = ["Precision", "Recall", "F Score"])

    return metric_df


In [29]:
confusion_metrics(sk_matrix)

Unnamed: 0,metric
Precision,0.837088
Recall,0.651171
F Score,0.732517
