# Import all required libraries


In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline


# Load train and test data

## Load the train data and rapid check

In [None]:
train = pd.read_csv("./input/train.csv")
test = pd.read_csv("./input/test.csv")

X_train = train.iloc[:, 1:]
y_train = train.iloc[:,0]
X_test = test.iloc[:, 1:]
y_test = test.iloc[:,0]

## Rapid EDA

In [None]:
some_digit = X_train.iloc[500, :].values
some_digit_image = some_digit.reshape(28, 28)

plt.imshow(some_digit_image,
           cmap=plt.get_cmap('gray'))
plt.axis("off")
plt.show()

# Using SGD Classifier

Scikit-Learn detects when you try to use a binary classification algorithm for a multiclass classification task, and it automatically runs OvA

In [None]:
from sklearn.linear_model import SGDClassifier

# Initiate classifier
sgd_clf = SGDClassifier(random_state=42, max_iter=1000, tol=1e-3)
sgd_clf.fit(X_train, y_train)

In [None]:
print(sgd_clf.predict([some_digit]))

some_digit_scores = sgd_clf.decision_function([some_digit])
some_digit_scores

# OneVSOneClassifier


In [None]:
from sklearn.multiclass import OneVsOneClassifier
ovo_clf = OneVsOneClassifier(SGDClassifier(max_iter=1000, tol=1e-3, random_state=42))

ovo_clf.fit(X_train, y_train)

In [None]:
print(len(ovo_clf.estimators_))
ovo_clf.predict([some_digit])


# Random Forest Classifier


In [None]:
from sklearn.ensemble import RandomForestClassifier

forest_clf = RandomForestClassifier(n_estimators = 100, random_state=42)
forest_clf.fit(X_train, y_train)

In [None]:
forest_clf.predict([some_digit])

In [None]:
forest_clf.predict_proba([some_digit])

# Evaluate Classifier Performance

In [None]:
from sklearn.model_selection import cross_val_score

print(cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring="accuracy"))
print(cross_val_score(ovo_clf, X_train, y_train, cv=3, scoring="accuracy"))
print(cross_val_score(forest_clf, X_train, y_train, cv=3, scoring="accuracy"))

# Scaling the input

Objective: increase accuracy

## Scaling the input

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))


## Evaluate Classifier performance after scaling

In [None]:
print(cross_val_score(sgd_clf, X_train_scaled, y_train, cv=3, scoring="accuracy"))
print(cross_val_score(ovo_clf, X_train_scaled, y_train, cv=3, scoring="accuracy"))
print(cross_val_score(forest_clf, X_train_scaled, y_train, cv=3, scoring="accuracy"))