## Grid Search Cross Validation - SKLearn
In this notebook we will use SKlearn and its inbuilt Grid Search to determine the best possible values for C and gamma in
SVM. This is a very important part of the training process of any SVM based algorithm since optimal values of C and gamma can do wonders for our score/accuracy.

In [1]:
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [2]:
iris = datasets.load_iris()
X = iris.data
Y = iris.target

X.shape, Y.shape

((150, 4), (150,))

In [3]:
x_tr, x_ts, y_tr, y_ts = train_test_split(X, Y, random_state=0)

In [4]:
x_tr.shape, x_ts.shape

((112, 4), (38, 4))

## Grid Search using KNN

In [5]:
clf = KNeighborsClassifier()

# We need to pass the base clf to the GridSearchCV object and a dictionary that will contain the various values
# Of the parameter that we want to determine the optimal value of 
grid = {"n_neighbors": [3, 5, 7, 9, 11]}
abc = GridSearchCV(clf, grid)

# fit on different values of the param
abc.fit(x_tr, y_tr)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': [3, 5, 7, 9, 11]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [6]:
# Looking at the best val of K
abc.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')

In [7]:
# looking at results/scores of all vals of k
abc.cv_results_



{'mean_fit_time': array([0.00066694, 0.00100557, 0.        , 0.00066868, 0.00033466]),
 'std_fit_time': array([4.71605422e-04, 2.02616831e-06, 0.00000000e+00, 4.72831805e-04,
        4.73281011e-04]),
 'mean_score_time': array([0.00134826, 0.00033434, 0.00100096, 0.00101288, 0.00135008]),
 'std_score_time': array([4.87723418e-04, 4.72831444e-04, 9.98958356e-07, 1.49493498e-05,
        4.69427124e-04]),
 'param_n_neighbors': masked_array(data=[3, 5, 7, 9, 11],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_neighbors': 3},
  {'n_neighbors': 5},
  {'n_neighbors': 7},
  {'n_neighbors': 9},
  {'n_neighbors': 11}],
 'split0_test_score': array([0.92307692, 0.94871795, 0.94871795, 0.97435897, 0.97435897]),
 'split1_test_score': array([1.        , 0.97297297, 1.        , 0.97297297, 0.97297297]),
 'split2_test_score': array([0.94444444, 0.94444444, 0.91666667, 0.91666667, 0.91666667]),
 'mean_test_score': array([0.9553

## Grid Search SVM

In [9]:
clf2 = SVC()

#  here our grid will have two params : C and gamma
grid = {'C':[1e2, 1e3, 5e3, 1e4, 5e4, 1e5],
       'gamma':[1e-3, 5e-4, 1e-4, 5e-3]}

abc2 = GridSearchCV(clf2, grid)
abc2.fit(x_tr, y_tr)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [100.0, 1000.0, 5000.0, 10000.0, 50000.0, 100000.0], 'gamma': [0.001, 0.0005, 0.0001, 0.005]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [10]:
# Looking at optimal vals
abc2.best_estimator_

SVC(C=1000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)