Sklearn grid_search to evaluate optimal C and gamma parameters

C is the cost of classification 
Gamma is the parameter of a Gaussian Kernel (to handle non-linear classification).


- A large C gives you low bias and high variance. Low bias because you penalize the cost of missclasification a lot.
- A small C gives you higher bias and lower variance.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# open dataset
from sklearn.datasets import load_iris

iris = load_iris()
X, y = iris.data, iris.target

# create model
from sklearn.svm import SVC

Cs = [0.001, 0.01, 0.1, 1, 10]
gammas = [0.001, 0.01, 0.1, 1]

# cross-validation model for each combination
from sklearn.cross_validation import cross_val_score
for C in Cs:
    for gamma in gammas:
        svc_model = SVC(C=C, gamma=gamma)
        scores = cross_val_score(svc_model, X, y)
        print('C: {0:.3f}, gamma: {1:.3f}, average score: {2:.3f}'.format(C, gamma, np.mean(scores)))

C: 0.001, gamma: 0.001, average score: 0.914
C: 0.001, gamma: 0.010, average score: 0.914
C: 0.001, gamma: 0.100, average score: 0.920
C: 0.001, gamma: 1.000, average score: 0.954
C: 0.010, gamma: 0.001, average score: 0.914
C: 0.010, gamma: 0.010, average score: 0.914
C: 0.010, gamma: 0.100, average score: 0.920
C: 0.010, gamma: 1.000, average score: 0.954
C: 0.100, gamma: 0.001, average score: 0.914
C: 0.100, gamma: 0.010, average score: 0.914
C: 0.100, gamma: 0.100, average score: 0.927
C: 0.100, gamma: 1.000, average score: 0.967
C: 1.000, gamma: 0.001, average score: 0.914
C: 1.000, gamma: 0.010, average score: 0.947
C: 1.000, gamma: 0.100, average score: 0.973
C: 1.000, gamma: 1.000, average score: 0.974
C: 10.000, gamma: 0.001, average score: 0.947
C: 10.000, gamma: 0.010, average score: 0.973
C: 10.000, gamma: 0.100, average score: 0.974
C: 10.000, gamma: 1.000, average score: 0.974


In [37]:
from sklearn.grid_search import GridSearchCV

param_grid = {'C': Cs, 'gamma': gammas}
print('param_grid = {}'.format(param_grid))

param_grid = {'gamma': [0.001, 0.01, 0.1, 1], 'C': [0.001, 0.01, 0.1, 1, 10]}


In [39]:
grid_search = GridSearchCV(SVC(), param_grid, verbose=3, cv=5)
grid_search.fit(X, y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] gamma=0.001, C=0.001 ............................................
[CV] ................... gamma=0.001, C=0.001, score=0.866667 -   0.0s
[CV] gamma=0.001, C=0.001 ............................................
[CV] ................... gamma=0.001, C=0.001, score=0.966667 -   0.0s
[CV] gamma=0.001, C=0.001 ............................................
[CV] ................... gamma=0.001, C=0.001, score=0.833333 -   0.0s
[CV] gamma=0.001, C=0.001 ............................................
[CV] ................... gamma=0.001, C=0.001, score=0.966667 -   0.0s
[CV] gamma=0.001, C=0.001 ............................................
[CV] ................... gamma=0.001, C=0.001, score=0.933333 -   0.0s
[CV] gamma=0.01, C=0.001 .............................................
[CV] .................... gamma=0.01, C=0.001, score=0.866667 -   0.0s
[CV] gamma=0.01, C=0.001 .............................................
[CV] ..........

[Parallel(n_jobs=1)]: Done  31 tasks       | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.2s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'gamma': [0.001, 0.01, 0.1, 1], 'C': [0.001, 0.01, 0.1, 1, 10]},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=3)

In [40]:
print(grid_search.best_params_)
print(grid_search.best_score_)

{'gamma': 0.1, 'C': 1}
0.98
