In [1]:
import pandas as pd
import time

In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [3]:
digits = load_digits()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.33, random_state=42)

In [5]:
svm = SVC()
params = {"C":[0.1, 1, 10], "gamma":[0.001, 0.01, 0.1]}
clf = GridSearchCV(svm, params, cv=5, iid=True, return_train_score=False)

In [6]:
t1 = time.time()
clf.fit(X_train, y_train)
t2 = time.time()

In [7]:
print('take {0:.4f}sec'.format(t2 -t1))

take 8.8846sec


In [8]:
result_df = pd.DataFrame(clf.cv_results_)

In [9]:
result_df.sort_values(by="rank_test_score", inplace=True)

In [10]:
print(result_df[["rank_test_score", "params", "mean_test_score"]])

   rank_test_score                      params  mean_test_score
6                1   {'C': 10, 'gamma': 0.001}         0.989194
3                2    {'C': 1, 'gamma': 0.001}         0.987531
0                3  {'C': 0.1, 'gamma': 0.001}         0.955112
7                4    {'C': 10, 'gamma': 0.01}         0.720698
4                5     {'C': 1, 'gamma': 0.01}         0.694098
1                6   {'C': 0.1, 'gamma': 0.01}         0.127182
8                7     {'C': 10, 'gamma': 0.1}         0.109726
5                8      {'C': 1, 'gamma': 0.1}         0.108894
2                9    {'C': 0.1, 'gamma': 0.1}         0.105569


In [11]:
best_model = clf.best_estimator_
pred = pd.DataFrame(best_model.predict(X_test))

In [12]:
pred_pd = pd.DataFrame(pred)

In [13]:
pred_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [14]:
target_test_pd = pd.DataFrame(y_test)

In [15]:
target_test_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [16]:
from scipy import stats
from sklearn.model_selection import RandomizedSearchCV

In [17]:
params = {"C":stats.expon(scale=1), "gamma":stats.expon(scale=0.01)}

In [18]:
clf = RandomizedSearchCV(svm, params, cv=5, iid=True, return_train_score=False, n_iter=30)

In [19]:
t1 = time.time()
clf.fit(X_train, y_train)
t2 = time.time()

In [20]:
print('take {0:.4f}sec'.format(t2 -t1))

take 26.0572sec


In [21]:
result_df = pd.DataFrame(clf.cv_results_)

In [22]:
result_best_parameter = clf.best_params_

In [23]:
result_best_parameter

{'C': 0.6366986474795883, 'gamma': 0.0012562456581469237}

In [24]:
result_best_score = clf.best_score_

In [25]:
result_best_score

0.9833748960931006

In [26]:
result_df.sort_values(by="rank_test_score", inplace=True)

In [27]:
print(result_df[["rank_test_score", "param_C", "param_gamma", "mean_test_score"]])

    rank_test_score     param_C  param_gamma  mean_test_score
28                1    0.636699   0.00125625         0.983375
1                 2    0.652845   0.00202755         0.981712
29                3     1.44986   0.00323747         0.980050
13                4    0.582641  0.000657733         0.979219
5                 5    0.439779   0.00138273         0.977556
0                 6    0.220923    0.0011255         0.970906
12                7    0.160766  0.000861852         0.965087
2                 8     1.01791   0.00446284         0.954281
10                9     0.32845   0.00311996         0.944306
18               10     3.17578   0.00715047         0.847880
25               11     3.91969   0.00912531         0.755611
23               12    0.824058   0.00935155         0.585204
19               13     1.64895    0.0142624         0.503741
20               14     1.52493    0.0154243         0.443890
14               15     1.08131    0.0190864         0.330840
3       

In [28]:
best_model = clf.best_estimator_
pred = pd.DataFrame(best_model.predict(X_test))

In [29]:
pred_pd = pd.DataFrame(pred)

In [30]:
pred_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [31]:
target_test_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [32]:
params = {"C":stats.expon(scale=3), "gamma":stats.expon(scale=0.001)}

In [33]:
clf = RandomizedSearchCV(svm, params, cv=5, iid=True, return_train_score=False, n_iter=30)

In [34]:
t1 = time.time()
clf.fit(X_train, y_train)
t2 = time.time()

In [35]:
print('take {0:.4f}sec'.format(t2 -t1))

take 15.6384sec


In [36]:
result_df = pd.DataFrame(clf.cv_results_)

In [37]:
result_best_parameter = clf.best_params_

In [38]:
result_best_parameter

{'C': 1.508473945884405, 'gamma': 0.0008411761719782974}

In [39]:
result_best_score = clf.best_score_

In [40]:
result_best_score

0.9891936824605154