![Model Tuning](images/model-tuning.png)

**import libraries**

In [1]:
import numpy as np
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler  # 对数据进行预处理（标准化）
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
from sklearn import datasets

**load data**

In [2]:
boston = datasets.load_boston()
X = boston.data
y = boston.target
features = boston.feature_names

**base model**

In [3]:
# Pipeline 和 make_pipeline的区别：
#    Campare with Pipeline, the only difference is that make_pipeline generates names for steps antomatically.
# StandardScaler, 去均值和方差，且针对每一个特征维度来做，而不是样本
# cross_val_score：使用了交叉验证
pipe_SVR = make_pipeline(StandardScaler(), SVR())
score = cross_val_score(estimator=pipe_SVR, 
                       X = X,
                       y = y,
                       scoring = 'r2',
                       cv = 10)
print('CV accuracy: %.3f +/- %.3f' % (np.mean(score), np.std(score)))
print(score)

CV accuracy: 0.187 +/- 0.649
[ 0.74943112  0.72244189  0.18237941  0.04934372  0.56317173  0.05674098
  0.59932148  0.20889779 -1.61288394  0.35310766]


**Tuning**

In [8]:
# 网格搜索 GridsearvhCV
# SVR要调整的超参数：C ,kernel, gamma, 
pipe_svr = Pipeline([("StanScaler", StandardScaler()),
                    ('svr', SVR())
                    ])
param_range = [0.0001,0.001,0.01,0.1,1.0,10.0,100.0,1000.0]
# using a dedicated <estimator>__<parameter> syntax
param_grid = [
    {"svr__C":param_range, 'svr__kernel':['linear']},
    {'svr__C': param_range, 'svr__gamma':param_range, 'svr__kernel':['rbf']}
]

gs = GridSearchCV(
    estimator=pipe_svr,
    param_grid=param_grid,
    scoring='r2',  # 评估模型指标
    n_jobs= -1,  # 使用所有CPU核心
    cv =10  # 交叉验证的次数
)
gs = gs.fit(X, y)
print(gs.best_score_, gs.best_params_)

0.6081303070817723 {'svr__C': 1000.0, 'svr__gamma': 0.001, 'svr__kernel': 'rbf'}


In [9]:
# 随机搜索 RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform  #  引入均匀分布设置参数

pipe_svr = Pipeline([("StanScaler", StandardScaler()),
                    ('svr', SVR())
                    ])
distributions = {
    'svr__C': uniform(loc=1.0,scale=4),  # 必须提供rvs抽样方法
    'svr__gamma': uniform(loc=0, scale=4), 
    'svr__kernel':['rbf']
}

rs = RandomizedSearchCV(
    estimator=pipe_svr,
    param_distributions=distributions,
    scoring = 'r2',
    n_jobs = -1,
    cv = 10
)

rs = rs.fit(X, y)

print(rs.best_score_, rs.best_params_)

0.5117224743197005 {'svr__C': 3.2463227052033377, 'svr__gamma': 0.02452854772065871, 'svr__kernel': 'rbf'}


In [12]:
uniform(loc=1.0,scale=4).rvs()

3.63572665341015