## Grid Search on MNIST Classification 

In [1]:
from sklearn.datasets import load_digits
from sklearn.model_selection import GridSearchCV,cross_val_score
from sklearn import svm
from sklearn.linear_model import LogisticRegression

In [2]:
#Dataset Preparation
data=load_digits()
X=data.data
Y=data.target
print(X.shape,Y.shape)

(1797, 64) (1797,)


### Logistic Regression

In [5]:
lr=LogisticRegression(solver="liblinear",multi_class="auto")
lr.fit(X,Y)
cross_val_score(lr,X,Y,scoring="accuracy",cv=5).mean()

0.9210188113360701

### Support Vector Machines

In [7]:
svc=svm.SVC(gamma="scale")
svc.fit(X,Y)
cross_val_score(svc,X,Y,scoring="accuracy",cv=5).mean()

0.9638434678923486

In [8]:
svc=svm.SVC(gamma="auto")
svc.fit(X,Y)
cross_val_score(svc,X,Y,scoring="accuracy",cv=5).mean()

0.44878680061604637

## Grid Search is used to tune the hyperparameters of an estimator
#### It consists of -
- estimator
- parameters
- cross validation function
- score function

In [9]:
import multiprocessing
cpus=multiprocessing.cpu_count()
print(cpus)

8


In [13]:
params={
    'kernel':['linear','rbf','poly','sigmoid'],
    'C':[0.1,0.2,0.5,1.0,2.0,5.0]
}

In [14]:
gs=GridSearchCV(estimator=svm.SVC(),param_grid=params,scoring="accuracy",cv=5,n_jobs=cpus)
gs

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=8,
             param_grid={'C': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0],
                         'kernel': ['linear', 'rbf', 'poly', 'sigmoid']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [15]:
gs.fit(X,Y)



GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=8,
             param_grid={'C': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0],
                         'kernel': ['linear', 'rbf', 'poly', 'sigmoid']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [16]:
gs.best_estimator_

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='poly', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [17]:
gs.best_score_

0.9699499165275459