## 方法1：通过GradSearch搜索进行网格搜索

In [6]:
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
import pandas as pd
# pd.set_option('display.height',1000)

# 导入数据
iris = datasets.load_iris()
# 定义超参搜索空间
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
# 初始化模型
svc = svm.SVC()
# 网格搜索
clf = GridSearchCV(estimator = svc,
                   param_grid = parameters,
                   scoring = 'accuracy',
                   n_jobs = -1,
                   cv = 5)
clf.fit(iris.data, iris.target)

# 打印结果


GridSearchCV(cv=5, estimator=SVC(), n_jobs=-1,
             param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')},
             scoring='accuracy')

In [7]:
pd.DataFrame.from_dict(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001275,0.000194,0.000557,0.000121,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.001562,0.000172,0.000666,7.7e-05,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,0.966667,0.966667,0.933333,1.0,0.966667,0.021082,4
2,0.00123,0.000192,0.000483,8.3e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,3
3,0.00096,8.7e-05,0.000458,1.6e-05,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1


In [8]:
print('最佳分类器:\n', clf.best_estimator_)
print('最佳分数:\n', clf.best_score_)
print('最佳参数:\n', clf.best_params_)

最佳分类器:
 SVC(C=1, kernel='linear')
最佳分数:
 0.9800000000000001
最佳参数:
 {'C': 1, 'kernel': 'linear'}


## 方法2：随机搜索 

In [12]:
from sklearn import datasets, svm
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
from scipy.stats import uniform
iris = datasets.load_iris()
distributions = {'kernel':['linear','rbf'],'C':uniform(loc=1,scale=9)}
svc = svm.SVC()
clf = RandomizedSearchCV(
    estimator=svc,
    param_distributions=distributions,
    n_iter=4,
    scoring='accuracy',
    cv=5,
    n_jobs=-1,
    random_state=2021
)
clf.fit(iris.data,iris.target)

RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=4, n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f852c642070>,
                                        'kernel': ['linear', 'rbf']},
                   random_state=2021, scoring='accuracy')

In [13]:
pd.DataFrame.from_dict(clf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001468,6.8e-05,0.000696,5.4e-05,6.4538,rbf,"{'C': 6.453804509266643, 'kernel': 'rbf'}",0.966667,1.0,1.0,0.966667,1.0,0.986667,0.01633,1
1,0.001757,0.000655,0.001777,0.002277,4.99782,linear,"{'C': 4.9978242311665735, 'kernel': 'linear'}",1.0,1.0,0.933333,0.966667,1.0,0.98,0.026667,3
2,0.001479,3e-05,0.000662,1.4e-05,3.81406,rbf,"{'C': 3.8140577546921826, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,3
3,0.003356,0.000989,0.001282,0.000382,5.36286,rbf,"{'C': 5.362861410926739, 'kernel': 'rbf'}",0.966667,1.0,1.0,0.966667,1.0,0.986667,0.01633,1


In [14]:
print(clf.best_estimator_)
print(clf.best_params_)
print(clf.best_score_)

SVC(C=6.453804509266643)
{'C': 6.453804509266643, 'kernel': 'rbf'}
0.9866666666666667


## 方法3：使用贝叶斯优化

In [1]:
from sklearn import svm, datasets
from sklearn.model_selection import cross_val_score
from hyperopt import hp, fmin, tpe, space_eval
import pandas as pd

# 导入数据
iris = datasets.load_iris()

# step1: 定义目标函数
def objective(params):
      # 初始化模型并交叉验证
      svc = svm.SVC(**params)
      cv_scores = cross_val_score(svc, iris.data, iris.target, cv=5)
      # 返回loss = 1 - accuracy (loss必须被最小化)
      loss = 1 - cv_scores.mean()
      return loss

# step2: 定义超参搜索空间
space = {'kernel':hp.choice('kernel', ['linear', 'rbf']),
         'C':hp.uniform('C', 1, 100)}

# step3: 在给定超参搜索空间下，最小化目标函数
best = fmin(objective, space, algo=tpe.suggest, max_evals=100)

# step4: 打印结果
print(best)

100%|██████████| 100/100 [00:00<00:00, 124.02trial/s, best loss: 0.013333333333333308]
{'C': 6.195719675022276, 'kernel': 1}
