## 15. パラメータチューニング
機械学習には以下の2種類のパラメータがある。  
- 訓練データから学習するパラメータ
- 個別に最適化する学習アルゴリズムのパラメータ  

後者はモデルのチューニングパラメータでり、**ハイパーパラメータ**と呼ばれる。  
 

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

#### グリッドサーチ
網羅的探索手法であり、さまざまなハイパーパラメータの値からなるリストを指定すると、それらの組み合わせごとにモデルの性能を評価する。

In [10]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

pipe_svc = make_pipeline(StandardScaler(), SVC())
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
param_grid = [{'svc__C':param_range, 'svc__kernel':['linear']},
              {'svc__C':param_range, 'svc__gamma':param_range,
               'svc__kernel':['rbf']}]

# refit=Trueで訓練データ全体を使って最適な設定で自動的に再適合する
gridsearch = GridSearchCV(pipe_svc, param_grid, cv=5, verbose=0, refit=True)

best_model = gridsearch.fit(X_train, y_train)

In [11]:
print(best_model.best_score_)
print(best_model.best_params_)

0.980952380952381
{'svc__C': 1.0, 'svc__kernel': 'linear'}


#### ランダムサーチ

In [12]:
from sklearn.model_selection import RandomizedSearchCV

# refit=Trueで訓練データ全体を使って最適な設定で自動的に再適合する
randomsearch = RandomizedSearchCV(pipe_svc, param_grid, cv=5, verbose=0, refit=True)

best_model = randomsearch.fit(X_train, y_train)

In [13]:
print(best_model.best_score_)
print(best_model.best_params_)

0.9714285714285713
{'svc__kernel': 'rbf', 'svc__gamma': 0.1, 'svc__C': 10.0}


#### 複数モデルからの選択

In [17]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

pipe = Pipeline([("scaler", StandardScaler()), ("classifier",RandomForestClassifier())])

search_space = [{"classifier": [SVC()],
                 'classifier__C':param_range,
                 'classifier__kernel':['linear']},
                {"classifier": [RandomForestClassifier()],
                 "classifier__n_estimators": [10, 100, 1000],
                 "classifier__max_features": [1, 2, 3]}]

gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0)

best_model = gridsearch.fit(X_train, y_train)

In [18]:
print(best_model.best_score_)
print(best_model.best_params_)

0.980952380952381
{'classifier': SVC(kernel='linear'), 'classifier__C': 1.0, 'classifier__kernel': 'linear'}


#### hyperopt
https://qiita.com/nazoking@github/items/f67f92dc60001a43b7dc

In [57]:
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(features, target, stratify=target)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train)

In [72]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss
import xgboost as xgb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

def score(params):
    print("Training with params: ")
    print(params)
    num_round = int(params['n_estimators'])
    # del params['n_estimators']
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dvalid = xgb.DMatrix(X_val, label=y_val)
    watchlist = [(dvalid, 'eval'), (dtrain, 'train')]
    gbm_model = xgb.train(params, dtrain,
                          evals=watchlist,
                          num_boost_round=num_round,
                          early_stopping_rounds=10,
                          verbose_eval=True)
    predictions = gbm_model.predict(dvalid,
                                    ntree_limit=gbm_model.best_iteration + 1)
    score = accuracy_score(y_val, predictions)
    # TODO: Add the importance for the selected features
    print("\tScore {0}\n\n".format(score))
    # The score function should return the loss (1-score)
    # since the optimize function looks for the minimum
    loss = 1 - score
    return {'loss': loss, 'status': STATUS_OK}

In [73]:
param_space = {
        'n_estimators': hp.quniform('n_estimators', 100, 1000, 1),
        'eta': hp.quniform('eta', 0.025, 0.5, 0.025),
        # A problem with max_depth casted to float instead of int with
        # the hp.quniform method.
        'max_depth':  hp.choice('max_depth', np.arange(1, 14, dtype=int)),
        'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1),
        'subsample': hp.quniform('subsample', 0.5, 1, 0.05),
        'gamma': hp.quniform('gamma', 0.5, 1, 0.05),
        'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1, 0.05),
        'eval_metric': 'mlogloss',
        'objective': 'multi:softmax',
        #'objective': 'multi:softprob',
        # Increase this number if you have more cores. Otherwise, remove it and it will default 
        # to the maxium number. 
        'nthread': 4,
        'booster': 'gbtree',
        'tree_method': 'exact',
        'silent': 1,
        'num_class': 3,
        'seed': 1
    }

max_evals= 10
trials = Trials()
history = []
best = fmin(score, param_space, algo=tpe.suggest,
            trials=trials,
            max_evals=max_evals)

Training with params:                                                                                                  
{'booster': 'gbtree', 'colsample_bytree': 0.8, 'eta': 0.225, 'eval_metric': 'mlogloss', 'gamma': 0.8, 'max_depth': 6, 'min_child_weight': 4.0, 'n_estimators': 436.0, 'nthread': 4, 'num_class': 3, 'objective': 'multi:softmax', 'seed': 1, 'silent': 1, 'subsample': 0.65, 'tree_method': 'exact'}
Parameters: { n_estimators, silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	eval-mlogloss:0.86087	train-mlogloss:0.83430                                                                       

[1]	eval-mlogloss:0.68518	train-mlogloss:0.65754                                                                       

[2]	eval-mlogloss:0.57350	train-mlogloss:0.52292 

Parameters: { n_estimators, silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	eval-mlogloss:0.96094	train-mlogloss:0.94558                                                                       

[1]	eval-mlogloss:0.84651	train-mlogloss:0.82288                                                                       

[2]	eval-mlogloss:0.74959	train-mlogloss:0.72111                                                                       

[3]	eval-mlogloss:0.66960	train-mlogloss:0.63604                                                                       

[4]	eval-mlogloss:0.61621	train-mlogloss:0.57054                                                                       

[5]	eval-mlogloss:0.55310	train-mlogloss:0.50670                                                   

[0]	eval-mlogloss:0.86143	train-mlogloss:0.83373                                                                       

[1]	eval-mlogloss:0.75765	train-mlogloss:0.72118                                                                       

[2]	eval-mlogloss:0.62226	train-mlogloss:0.58398                                                                       

[3]	eval-mlogloss:0.51843	train-mlogloss:0.47423                                                                       

[4]	eval-mlogloss:0.45794	train-mlogloss:0.40330                                                                       

[5]	eval-mlogloss:0.38721	train-mlogloss:0.33347                                                                       

[6]	eval-mlogloss:0.34370	train-mlogloss:0.27757                                                                       

[7]	eval-mlogloss:0.31281	train-mlogloss:0.24091                                                                       

[8]	eval-mlogloss:0.29061	train-

[7]	eval-mlogloss:0.30250	train-mlogloss:0.20673                                                                       

[8]	eval-mlogloss:0.28296	train-mlogloss:0.17966                                                                       

[9]	eval-mlogloss:0.26905	train-mlogloss:0.15435                                                                       

[10]	eval-mlogloss:0.25194	train-mlogloss:0.13348                                                                      

[11]	eval-mlogloss:0.24354	train-mlogloss:0.12036                                                                      

[12]	eval-mlogloss:0.23131	train-mlogloss:0.11147                                                                      

[13]	eval-mlogloss:0.23566	train-mlogloss:0.10098                                                                      

[14]	eval-mlogloss:0.22910	train-mlogloss:0.09148                                                                      

[15]	eval-mlogloss:0.21883	train

[27]	eval-mlogloss:0.22791	train-mlogloss:0.10756                                                                      

[28]	eval-mlogloss:0.22793	train-mlogloss:0.10757                                                                      

	Score 0.9333333333333333                                                                                              


Training with params:                                                                                                  
{'booster': 'gbtree', 'colsample_bytree': 0.6000000000000001, 'eta': 0.375, 'eval_metric': 'mlogloss', 'gamma': 0.65, 'max_depth': 2, 'min_child_weight': 5.0, 'n_estimators': 589.0, 'nthread': 4, 'num_class': 3, 'objective': 'multi:softmax', 'seed': 1, 'silent': 1, 'subsample': 0.8, 'tree_method': 'exact'}
Parameters: { n_estimators, silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not use

In [76]:
from hyperopt import space_eval

space_eval(param_space, best)

{'booster': 'gbtree',
 'colsample_bytree': 0.8,
 'eta': 0.225,
 'eval_metric': 'mlogloss',
 'gamma': 0.8,
 'max_depth': 6,
 'min_child_weight': 4.0,
 'n_estimators': 436.0,
 'nthread': 4,
 'num_class': 3,
 'objective': 'multi:softmax',
 'seed': 1,
 'silent': 1,
 'subsample': 0.65,
 'tree_method': 'exact'}

In [79]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

from sklearn.svm import SVC

def objective(X, y, args):
    """最小化したい目的関数"""
    classifiers = {
        'svm': SVC,
        'rf': RandomForestClassifier,
        'logit': LogisticRegression,
    }
    classifier = classifiers.get(args['model_type'])
    del args['model_type']
    model = classifier(**args)
    # Stratified 5 Fold Cross Validation
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_validate(model, X=X, y=y, cv=kf)
    # 最小化なので符号を反転する
    return -1 * scores['test_score'].mean()

In [81]:
from functools import partial
from hyperopt.pyll.base import scope

f = partial(objective, X_train, y_train)
# 変数の値域を定義する
space = hp.choice('algorithms', [
    {
        'model_type': 'rf',
        'n_estimators': scope.int(hp.uniform('n_estimators', 1e+1, 1e+3)),
        'max_depth': scope.int(hp.uniform('max_depth', 1e+1, 1e+3)),
    },
    {
        'model_type': 'svm',
        'C': hp.uniform('C', 1e+0, 1e+2),
        'gamma': hp.lognormal('gamma', 1e-2, 1e+1),
    },
    {
        'model_type': 'logit',
        'solver': 'lbfgs',
        'multi_class': 'auto',
        'max_iter': 1000,
    }
])
# 探索過程を記録するオブジェクト
trials = Trials()
# 目的関数を最小化するパラメータを探索する
best = fmin(fn=f, space=space, algo=tpe.suggest, max_evals=100, trials=trials)
# 結果を出力する
print(space_eval(space, best))

100%|█████████████████████████████████████████████| 100/100 [02:50<00:00,  1.70s/trial, best loss: -0.9904761904761905]
{'C': 96.38239880971929, 'gamma': 0.007667030695280854, 'model_type': 'svm'}


#### optuna


In [89]:
import optuna
from functools import partial

def objective(X, y, trial):
    """最小化する目的関数"""
    params = {
        'kernel': trial.suggest_categorical('kernel', ['rbf', 'sigmoid', 'linear']),
        'C': trial.suggest_loguniform('C', 1e+0, 1e+2),
        'gamma': trial.suggest_loguniform('gamma', 1e-2, 1e+1),
    }

    # モデルを作る
    model = SVC(**params)

    # 5-Fold CV / Accuracy でモデルを評価する
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_validate(model, X=X, y=y, cv=kf)
    # 最小化なので 1.0 からスコアを引く
    return 1.0 - scores['test_score'].mean()


# 目的関数にデータを適用する
f = partial(objective, X_train, y_train)
# 最適化のセッションを作る
study = optuna.create_study()
# 100 回試行する
study.optimize(f, n_trials=100)
# 最適化したパラメータを出力する
print('params:', study.best_params)

params: {'kernel': 'rbf', 'C': 1.3098008537935169, 'gamma': 0.39345898625869047}


In [90]:
d=study.best_params
model = SVC(**d)
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.9555555555555556

In [91]:
optuna.logging.disable_default_handler()

def objective(X, y, trial):
    """最小化する目的関数"""

    # 使う分類器は SVM or RF
    classifier = trial.suggest_categorical('classifier', ['SVC', 'RandomForestClassifier'])

    # 選ばれた分類器で分岐する
    if classifier == 'SVC':
        # SVC のとき
        params = {
            'kernel': trial.suggest_categorical('kernel', ['rbf', 'sigmoid', 'linear']),
            'C': trial.suggest_loguniform('C', 1e+0, 1e+2),
            'gamma': trial.suggest_loguniform('gamma', 1e-2, 1e+1),
        }
        model = SVC(**params)
    else:
        # RF のとき
        params = {
            'n_estimators': int(trial.suggest_int('n_estimators', 1e+2, 1e+3, log=True)),
            'max_depth': int(trial.suggest_int('max_depth', 2, 32, log=True)),
        }
        model = RandomForestClassifier(**params)

    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_validate(model, X=X, y=y, cv=kf, n_jobs=-1)
    return 1.0 - scores['test_score'].mean()

In [92]:
f = partial(objective, X_train, y_train)
study = optuna.create_study()
study.optimize(f, n_trials=100)
print('params:', study.best_params)

params: {'classifier': 'SVC', 'kernel': 'linear', 'C': 1.0583638053977193, 'gamma': 0.012862494473148573}


In [93]:
d=study.best_params
del d['classifier']
d

{'kernel': 'linear', 'C': 1.0583638053977193, 'gamma': 0.012862494473148573}

In [94]:
model = SVC(**d)
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.9111111111111111