In [1]:
import pandas as pd
import time

In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [3]:
digits = load_digits()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.33, random_state=42)

In [5]:
svm = SVC()
params = {"C":[0.1, 1, 10], "gamma":[0.001, 0.01, 0.1]}
clf = GridSearchCV(svm, params, cv=5, iid=True, return_train_score=False)

In [6]:
t1 = time.time()
clf.fit(X_train, y_train)
t2 = time.time()

In [7]:
print('take {0:.4f}sec'.format(t2 -t1))

take 9.0575sec


In [8]:
result_df = pd.DataFrame(clf.cv_results_)

In [9]:
result_df.sort_values(by="rank_test_score", inplace=True)

In [10]:
print(result_df[["rank_test_score", "params", "mean_test_score"]])

   rank_test_score                      params  mean_test_score
6                1   {'C': 10, 'gamma': 0.001}         0.989194
3                2    {'C': 1, 'gamma': 0.001}         0.987531
0                3  {'C': 0.1, 'gamma': 0.001}         0.955112
7                4    {'C': 10, 'gamma': 0.01}         0.720698
4                5     {'C': 1, 'gamma': 0.01}         0.694098
1                6   {'C': 0.1, 'gamma': 0.01}         0.127182
8                7     {'C': 10, 'gamma': 0.1}         0.109726
5                8      {'C': 1, 'gamma': 0.1}         0.108894
2                9    {'C': 0.1, 'gamma': 0.1}         0.105569


In [11]:
best_model = clf.best_estimator_
pred = pd.DataFrame(best_model.predict(X_test))

In [12]:
pred_pd = pd.DataFrame(pred)

In [13]:
pred_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [14]:
target_test_pd = pd.DataFrame(y_test)

In [15]:
target_test_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [16]:
from scipy import stats
from sklearn.model_selection import RandomizedSearchCV

In [17]:
params = {"C":stats.expon(scale=1), "gamma":stats.expon(scale=0.01)}

In [18]:
clf = RandomizedSearchCV(svm, params, cv=5, iid=True, return_train_score=False, n_iter=30)

In [19]:
t1 = time.time()
clf.fit(X_train, y_train)
t2 = time.time()

In [20]:
print('take {0:.4f}sec'.format(t2 -t1))

take 29.5459sec


In [21]:
result_df = pd.DataFrame(clf.cv_results_)

In [22]:
result_best_parameter = clf.best_params_

In [23]:
result_best_parameter

{'C': 1.7467454519717596, 'gamma': 0.0039707972111646984}

In [None]:
result_best_score = clf.best_score_

In [None]:
result_best_score

In [24]:
result_df.sort_values(by="rank_test_score", inplace=True)

In [25]:
print(result_df[["rank_test_score", "param_C", "param_gamma", "mean_test_score"]])

    rank_test_score    param_C  param_gamma  mean_test_score
5                 1    1.74675    0.0039708         0.970906
29                2   0.411792   0.00342519         0.941812
21                3  0.0682659  0.000561807         0.935993
18                4   0.503418   0.00429286         0.896924
7                 5    1.46571   0.00617502         0.893599
25                6   0.236131  5.29322e-05         0.877805
28                7   0.635349    0.0052451         0.862012
4                 8    3.30522   0.00752599         0.831255
2                 9     2.4979    0.0076699         0.826268
22               10  0.0361465   0.00118011         0.710723
0                11   0.682891   0.00672223         0.708229
16               12     1.0011   0.00978993         0.702411
17               13    1.07205    0.0105981         0.696592
6                14   0.551505   0.00704784         0.581879
11               15   0.181435   0.00440841         0.545303
15               16    2

In [26]:
best_model = clf.best_estimator_
pred = pd.DataFrame(best_model.predict(X_test))

In [27]:
pred_pd = pd.DataFrame(pred)

In [28]:
pred_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [29]:
target_test_pd.head()

Unnamed: 0,0
0,6
1,9
2,3
3,7
4,2


In [30]:
params = {"C":stats.expon(scale=3), "gamma":stats.expon(scale=0.001)}

In [31]:
clf = RandomizedSearchCV(svm, params, cv=5, iid=True, return_train_score=False, n_iter=30)

In [32]:
t1 = time.time()
clf.fit(X_train, y_train)
t2 = time.time()

In [33]:
print('take {0:.4f}sec'.format(t2 -t1))

take 12.4881sec


In [34]:
result_df = pd.DataFrame(clf.cv_results_)

In [35]:
result_best_parameter = clf.best_params_

In [36]:
result_best_parameter

{'C': 1.418754516160091, 'gamma': 0.0006452045905607823}

In [None]:
result_best_score = clf.best_score_

In [None]:
result_best_score