GridSearchCV

In [2]:
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV

iris = datasets.load_iris()

parameters = {'kernel': ('rbf', 'linear'), 'C': [1, 5, 10]}

svr = svm.SVC()
clf = GridSearchCV(svr, parameters)

clf.fit(iris.data, iris.target)
print(clf.best_estimator_)


SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


RandomizedSearchCV

In [1]:
import numpy as np
from time import time
#随机产生均匀分布的整数
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

#用于报告超参数搜索的最好结果的函数

def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print('Model with rank:{0}'.format(i))
            print('Mean validation score:{0:.3f} (std: {1:.3f})'.format(
                results['mean_test_score'][candidate],
                results['std_test_score'][candidate]
            ))
            print('Parameters:{0}'.format(results['params'][candidate]))
            print('')

digits = load_digits()

X, y = digits.data, digits.target

clf = RandomForestClassifier(n_estimators=20)

param_dist = {
    'max_depth': [3, None],
    'max_features': sp_randint(1, 11),
    'min_samples_split': sp_randint(2, 11),
    'min_samples_leaf': sp_randint(1, 11),
    'bootstrap': [True, False],
    'criterion': ['gini', 'entropy']

}

n_iter_search = 20
random_search = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=n_iter_search)
start = time()
random_search.fit(X, y)
print('RandomizedSearchCV took %.2f seconds for %d candidates parameter settings.' % ((time() - start), n_iter_search))

report(random_search.cv_results_)


RandomizedSearchCV took 1.95 seconds for 20 candidates parameter settings.
Model with rank:1
Mean validation score:0.919 (std: 0.020)
Parameters:{'bootstrap': True, 'criterion': 'entropy', 'max_depth': None, 'max_features': 8, 'min_samples_leaf': 3, 'min_samples_split': 6}

Model with rank:2
Mean validation score:0.914 (std: 0.007)
Parameters:{'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'max_features': 2, 'min_samples_leaf': 2, 'min_samples_split': 6}

Model with rank:3
Mean validation score:0.900 (std: 0.006)
Parameters:{'bootstrap': False, 'criterion': 'gini', 'max_depth': None, 'max_features': 2, 'min_samples_leaf': 8, 'min_samples_split': 5}

