# MNIST Classification

## Grid Search for Best Hyperparameters

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn import svm
import multiprocessing

In [2]:
cpus = multiprocessing.cpu_count()
print(cpus)

8


In [3]:
params = [
    {
        'kernel':['linear', 'rbf', 'poly', 'sigmoid'],
        'C':[0.1, 0.2, 0.5, 1.0, 2.0, 5.0]
    }
]

#### Dataset Preparation

In [4]:
data = load_digits()
X = data.data
Y = data.target
print(X.shape, Y.shape)

(1797, 64) (1797,)


#### Logistic Regression

In [5]:
lr = LogisticRegression(solver='lbfgs',multi_class='auto',max_iter=5000)
lr.fit(X, Y)
print("Cross Validation Accuracy : ", cross_val_score(lr, X, Y, scoring='accuracy', cv=5).mean())

Cross Validation Accuracy :  0.9137604456824512


### SVM

In [6]:
svc = svm.SVC(gamma='auto')
svc.fit(X,Y)
print("Cross Validation Accuracy : ", cross_val_score(svc, X, Y, scoring='accuracy', cv=5).mean())

Cross Validation Accuracy :  0.448545341999381


## Finding Right Set of Parameters - Grid Search

In [7]:
gs = GridSearchCV(estimator=svm.SVC(gamma='auto'), param_grid=params,scoring="accuracy",cv=5,n_jobs = cpus)

In [8]:
gs.fit(X, Y)

GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=8,
             param_grid=[{'C': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0],
                          'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [9]:
print(gs.best_estimator_)

SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)


In [10]:
print(gs.best_score_)

0.96884246363355
