In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("train.csv")
X = df.drop(['label'], axis=1)
y = df['label']
print(X.shape)
print(y.shape)

In [None]:
some_digit = X.iloc[7]
some_digi_img = np.array(some_digit).reshape(28, 28)
plt.imshow(some_digi_img, cmap=mpl.cm.binary, interpolation="nearest")
plt.axis("off")
plt.show()

In [None]:
print(y[7])

In [None]:
y = y.astype(np.uint8)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

In [None]:
y_train_3 = (y_train == 3)
y_test_3 = (y_test == 3)

In [None]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train_3)

In [None]:
sgd_clf.predict([some_digit])

In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, X_train, y_train_3, cv=3, scoring="accuracy")

In [None]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(sgd_clf,
                                 X_train,
                                 y_train_3,
                                 cv=3)

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train_3, y_train_pred)

In [None]:
y_train_perfect_predictions = y_train_3
confusion_matrix(y_train_3, y_train_perfect_predictions)

In [None]:
from sklearn.metrics import precision_score, recall_score
precision_score(y_train_3, y_train_pred)

In [None]:
recall_score(y_train_3, y_train_pred)

In [None]:
from sklearn.metrics import f1_score
f1_score(y_train_3, y_train_pred)

In [None]:
y_scores = cross_val_predict(sgd_clf, X_train, y_train_3, cv=3, method="decision_function")

In [None]:
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_train_3, y_scores)

In [None]:
def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')

plot_roc_curve(fpr, tpr)
plt.show()

In [None]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_train_3, y_scores)

In [None]:
from sklearn.ensemble import RandomForestClassifier
forest_clf = RandomForestClassifier(random_state=42)
y_probs_forest = cross_val_predict(forest_clf, X_train, y_train_3, cv=3, method="predict_proba")


In [None]:
y_scores_forest = y_probs_forest[:, 1]
fpr_forest, tpr_forest, thresholds_forest = roc_curve(y_train_3, y_scores_forest)

In [None]:
plt.plot(fpr, tpr, "b:", label="SGD")
plt.plot(fpr_forest, tpr_forest, label="Random Forest")
plt.legend(loc="lower right")
plt.show()

In [None]:
roc_auc_score(y_train_3, y_scores_forest)

In [None]:
y_train_pred_forest = cross_val_predict(forest_clf, X_train, y_train_3, cv=3)
print(precision_score(y_train_3, y_train_pred_forest))
print(recall_score(y_train_3, y_train_pred_forest))

In [None]:
sgd_clf.fit(X_train, y_train)
sgd_clf.predict([some_digit])

In [None]:
some_digit_scores = sgd_clf.decision_function([some_digit])
some_digit_scores

In [None]:
np.argmax(some_digit_scores)

In [None]:
sgd_clf.classes_

In [None]:
sgd_clf.classes_[3]

In [None]:
from sklearn.multiclass import OneVsOneClassifier
ovo_clf = OneVsOneClassifier(SGDClassifier(random_state=42))
ovo_clf.fit(X_train, y_train)
ovo_clf.predict([some_digit])

In [None]:
len(ovo_clf.estimators_)

In [None]:
forest_clf.fit(X_train, y_train)
forest_clf.predict([some_digit])

In [None]:
forest_clf.predict_proba([some_digit])

In [None]:
cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring="accuracy")

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))
# cross_val_score(sgd_clf, X_train_scaled, y_train, cv=3, scoring="accuracy")


In [None]:
y_train_pred = cross_val_predict(sgd_clf, X_train_scaled, y_train, cv=3)
conf_mx = confusion_matrix(y_train, y_train_pred)
conf_mx

In [None]:
plt.matshow(conf_mx, cmap=plt.cm.gray)
plt.show()