In [2]:
from sklearn import datasets, svm
digits = datasets.load_digits()
x_digits = digits.data
y_digits = digits.target
svc = svm.SVC(C=1, kernel='linear')
svc.fit(x_digits[:-100], y_digits[:-100]).score(x_digits[-100:], y_digits[-100:])

0.97999999999999998

## K-Fold Cross-Validation

In [23]:
from sklearn.model_selection import KFold, cross_val_score
k = 10
k_fold = KFold(n_splits = k)
[svc.fit(x_digits[train], y_digits[train]).score(x_digits[test], y_digits[test])
for train, test in k_fold.split(x_digits)]

# Method 2
# n_jobs=-1 means that the computation will be dispatched on all the CPUs of the computer.
cross_val_score(svc, X_digits, y_digits, cv=k_fold, n_jobs=-1)


array([ 0.93888889,  0.99444444,  0.93333333,  0.96666667,  0.96111111,
        0.98888889,  0.96666667,  0.98882682,  0.93296089,  0.96648045])

## Exercise
Find the cross-validation score of a SVC estimator with an linear kernel as a function of parameter C (use a logarithmic grid of points, from 1 to 10).

In [4]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn import datasets, svm

digits = datasets.load_digits()
X = digits.data
y = digits.target

svc = svm.SVC(kernel='linear')
C_s = np.logspace(-10, 0, 10)

scores = [np.mean(cross_val_score(svc.set_params(C = i), X, y, n_jobs = 1)) for i in C_s]



[0.15552937214547155,
 0.15552937214547155,
 0.15552937214547155,
 0.15552937214547155,
 0.15552937214547155,
 0.90260270247335728,
 0.9482070815179453,
 0.94490714734074643,
 0.94379972762867548,
 0.94379972762867548]

## Grid-search

In [14]:
from sklearn.model_selection import GridSearchCV, cross_val_score
Cs = np.logspace(-6, -1, 10)
clf = GridSearchCV(estimator=svc, param_grid=dict(C=Cs),
                   n_jobs=-1)
clf.fit(x_digits[:1000], y_digits[:1000])        

print(clf.best_score_)                                  
print(clf.best_estimator_.C)

print(clf.score(x_digits[1000:], y_digits[1000:]))    

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'C': array([  1.00000e-06,   3.59381e-06,   1.29155e-05,   4.64159e-05,
         1.66810e-04,   5.99484e-04,   2.15443e-03,   7.74264e-03,
         2.78256e-02,   1.00000e-01])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)
0.925
0.00774263682681
0.943538268507


## Cross-validated estimators

In [18]:
from sklearn import linear_model, datasets
lasso = linear_model.LassoCV()
diabetes = datasets.load_diabetes()
x = diabetes.data
y = diabetes.target
lasso.fit(x,y)
lasso.alpha_

0.012291895087486161