## Fetching Dataset

In [None]:
from sklearn.datasets import fetch_openml

In [None]:
mnist = fetch_openml('mnist_784')

In [None]:
x, y = mnist['data'], mnist['target']

In [None]:
x.shape

In [None]:
y.shape

In [None]:
%matplotlib inline

In [None]:
import matplotlib
import matplotlib.pyplot as plt

In [None]:
some_digit = x[3601]
some_digit_image = some_digit.reshape(28, 28) # lets reshape it to plot it

In [None]:
plt.imshow(some_digit_image, cmap=matplotlib.cm.binary, interpolation="nearest")
plt.axis("off")

In [None]:
y[3601]

In [None]:
x_train, x_test = x[0:6000], x[6000:7000]

In [None]:
y_train, y_test = y[0:6000], y[6000:7000]

In [None]:
import numpy as np
shuffle_index = np.random.permutation(6000)
x_train, y_train = x_train[shuffle_index], y_train[shuffle_index]

## Creating a 2 detector

In [None]:
y_train = y_train.astype(np.int8)
y_test = y_test.astype(np.int8)
y_train_2 = (y_train==2)
y_test_2 = (y_test==2)

In [None]:
y_train

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf = LogisticRegression(tol = 0.1, solver='lbfgs')

In [None]:
clf.fit(x_train, y_train_2)

In [None]:
clf.predict([some_digit])

In [None]:
from sklearn.model_selection import cross_val_score
a = cross_val_score(clf, x_train, y_train_2, cv=3, scoring="accuracy")

In [None]:
a.mean()

## Quiz

In [None]:
# Create a classifier which will classify a digit always as "not 2"
# 90% --> not 2

In [None]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(clf, x_train, y_train_2, cv=3)

In [None]:
y_train_pred


## Calculating confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_train_2, y_train_pred)

In [None]:
confusion_matrix(y_train_2, y_train_2) # This is the confusion matrix for perfect predictions

## Precision and Recall

In [None]:
from sklearn.metrics import precision_score, recall_score

In [None]:
precision_score(y_train_2, y_train_pred) # This is my precision score

In [None]:
recall_score(y_train_2, y_train_pred) # This is my recall score

## F1 - Score

In [None]:
from sklearn.metrics import f1_score

In [None]:
f1_score(y_train_2, y_train_pred)

## Precision Recall Curve

In [None]:
from sklearn.metrics import precision_recall_curve

In [None]:
y_scores = cross_val_predict(clf, x_train, y_train_2, cv=3, method="decision_function")

In [None]:
y_scores

In [None]:
precisions, recalls, thresholds = precision_recall_curve(y_train_2, y_scores)

In [None]:
precisions

In [None]:
recalls

In [None]:
thresholds

## Plotting the Precision Recall Curve

In [None]:
plt.plot(thresholds, precisions[:-1], "b--", label="Precision")
plt.plot(thresholds, recalls[:-1], "g-", label="Recall")
plt.xlabel("Thresholds")
plt.legend(loc="upper left")
plt.ylim([0,1])
plt.show()