# Mnist Classification - Part A~C

## Part A

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version = 1)
mnist.keys()

In [None]:
X, y = mnist["data"].values, mnist["target"].values
print(X.shape, y.shape)

In [None]:
import matplotlib.pyplot as plt

some_digit = X[1]
some_digit_image = some_digit.reshape(28,28)
plt.imshow(some_digit_image, cmap = "binary")
plt.show()

## Part B

In [None]:
import numpy as np
y = y.astype(np.uint8)
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train_5)

from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)

from sklearn.metrics import confusion_matrix, precision_score, recall_score
print(confusion_matrix(y_train_5, y_train_pred))
print(precision_score(y_train_5, y_train_pred))
print(recall_score(y_train_5, y_train_pred))

In [None]:
y_scores = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3, method = "decision_function")

from sklearn.metrics import precision_recall_curve
precisions, recalls, thresholds= precision_recall_curve(y_train_5, y_scores)

plt.plot(recalls, precisions, linewidth=2, c="red")
plt.xlabel("Recall", fontsize = 20)
plt.ylabel("Precision", fontsize = 20)
plt.xlim([0,1])
plt.ylim([0,1])
plt.figure(figsize = (16,8))
plt.show()

In [None]:
threshold = 4000
y_train_pred_thres = (y_scores > threshold)

print(precision_score(y_train_5, y_train_pred_thres))
print(recall_score(y_train_5, y_train_pred_thres))

## Part C

In [None]:
from sklearn.linear_model import SGDClassifier

sgd_clf.fit(X_train, y_train)
print(sgd_clf.predict([some_digit]))
print(sgd_clf.decision_function([some_digit]))

In [None]:
y_test_pred = sgd_clf.predict(X_test)
conf_mx = confusion_matrix(y_test, y_test_pred)
plt.matshow(conf_mx, cmap=plt.cm.Blues)
plt.show