# Optunaの動作確認サンプル

ハイパーパラメータの自動チューニングができるライブラリ[Optuna](https://www.preferred.jp/ja/projects/optuna/)の動作確認を行う．  
機械学習モデルの開発において，精度向上の最後の一押しを効率化できる．

[GBDTの試行](https://qiita.com/DS27/items/aa3f6d0f03a8053e5810#6-gbdt%E5%8B%BE%E9%85%8D%E3%83%96%E3%83%BC%E3%82%B9%E3%83%86%E3%82%A3%E3%83%B3%E3%82%B0%E6%9C%A8%E3%81%AB%E3%81%A4%E3%81%84%E3%81%A6)で高い精度が得られており，このモデルをハイパーパラメータチューニングのベースラインとする．

## データセットの読み込み(ボストン住宅価格)

In [1]:
from sklearn.datasets import load_boston
import pandas as pd

boston = load_boston()
df_x_boston = pd.DataFrame(boston['data'], columns=boston['feature_names'])
df_y_boston = pd.DataFrame(boston['target'], columns=['MEDV'])
df_boston = pd.concat([df_x_boston, df_y_boston], axis=1)

### 欠損値確認

In [2]:
df_boston.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
MEDV       0
dtype: int64

### 学習データと評価データの分割

In [3]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df_x_boston, df_y_boston, test_size=0.2, random_state=1)

### データの標準化

In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(x_train)
x_train_std = sc.transform(x_train)
x_test_std = sc.transform(x_test)

## GBDTの学習と評価

In [5]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error

gbr = GradientBoostingRegressor()
gbr.fit(x_train_std, y_train)

pred_gbr = gbr.predict(x_test_std)
r2_gbr = r2_score(y_test, pred_gbr)
mae_gbr = mean_absolute_error(y_test, pred_gbr)

print("R2 : %.3f" % r2_gbr)
print("MAE : %.3f" % mae_gbr)

R2 : 0.922
MAE : 2.186


  return f(*args, **kwargs)


### Optuna

GradientBoostingRegressorデフォルト値

```
class sklearn.ensemble.GradientBoostingRegressor(*, loss='squared_error', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0, init=None, random_state=None, max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)
```


In [6]:
import optuna

In [7]:
def objective(trial):
    lr = trial.suggest_loguniform('learning_rate', 0.001, 0.1)
    n_estimators = trial.suggest_int('n_estimators', 10, 1000)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
    max_depth = trial.suggest_int('max_depth', 1, 10)
    
    gbr = GradientBoostingRegressor(
        learning_rate=lr, 
        n_estimators=n_estimators, 
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_depth=max_depth)
    gbr.fit(x_train_std, y_train)
    
    pred_gbr = gbr.predict(x_test_std)
    r2_gbr = r2_score(y_test, pred_gbr)

    return 1-r2_gbr

n_trials = 100
study = optuna.create_study()
study.optimize(objective, n_trials=n_trials)

[32m[I 2021-12-05 11:43:18,045][0m A new study created in memory with name: no-name-e80700c8-817f-461d-adba-840f67709913[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:18,273][0m Trial 0 finished with value: 0.09385112682768326 and parameters: {'learning_rate': 0.0249345854659195, 'n_estimators': 191, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': 6}. Best is trial 0 with value: 0.09385112682768326.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:18,353][0m Trial 1 finished with value: 0.6758689137472803 and parameters: {'learning_rate': 0.002572748234032556, 'n_estimators': 89, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_depth': 4}. Best is trial 0 with value: 0.09385112682768326.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:18,586][0m Trial 2 finished with value: 0.16710390152768895 and parameters: {'learning_rate': 0.005695711947584472, 'n_estimators': 275, 'min_samples_split': 2, 'min_samples_leaf': 7, 'max_depth': 4}. Best i

[32m[I 2021-12-05 11:43:26,835][0m Trial 15 finished with value: 0.08076777676549962 and parameters: {'learning_rate': 0.01268756042172586, 'n_estimators': 691, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_depth': 8}. Best is trial 8 with value: 0.08010294297988496.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:27,658][0m Trial 16 finished with value: 0.09585153000252344 and parameters: {'learning_rate': 0.013802154172676813, 'n_estimators': 633, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_depth': 7}. Best is trial 8 with value: 0.08010294297988496.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:28,037][0m Trial 17 finished with value: 0.07416644134837624 and parameters: {'learning_rate': 0.04222682064029239, 'n_estimators': 410, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_depth': 4}. Best is trial 17 with value: 0.07416644134837624.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:28,063][0m Trial 18 finished with value: 0.50168

  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:31,423][0m Trial 31 finished with value: 0.07808171219092142 and parameters: {'learning_rate': 0.035888473419065425, 'n_estimators': 187, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_depth': 3}. Best is trial 22 with value: 0.07281073046064679.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:31,521][0m Trial 32 finished with value: 0.07590159743920066 and parameters: {'learning_rate': 0.05584137440148288, 'n_estimators': 91, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_depth': 4}. Best is trial 22 with value: 0.07281073046064679.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:31,656][0m Trial 33 finished with value: 0.07112519630987879 and parameters: {'learning_rate': 0.05658218417679322, 'n_estimators': 130, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_depth': 4}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:31,764][0m Trial 34

[32m[I 2021-12-05 11:43:33,695][0m Trial 46 finished with value: 0.09783476821973502 and parameters: {'learning_rate': 0.0815813547742361, 'n_estimators': 195, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_depth': 2}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:33,827][0m Trial 47 finished with value: 0.24236966064204757 and parameters: {'learning_rate': 0.008071742323439539, 'n_estimators': 129, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_depth': 4}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:34,146][0m Trial 48 finished with value: 0.22284467330178992 and parameters: {'learning_rate': 0.004875959190805487, 'n_estimators': 294, 'min_samples_split': 5, 'min_samples_leaf': 10, 'max_depth': 6}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:34,212][0m Trial 49 finished with value: 0.14

  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:39,602][0m Trial 62 finished with value: 0.07272034136214922 and parameters: {'learning_rate': 0.03843775809537316, 'n_estimators': 544, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_depth': 3}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:39,996][0m Trial 63 finished with value: 0.07665849479543674 and parameters: {'learning_rate': 0.03967823822153461, 'n_estimators': 558, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_depth': 3}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:40,351][0m Trial 64 finished with value: 0.07409971152743355 and parameters: {'learning_rate': 0.030331257392101726, 'n_estimators': 490, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_depth': 3}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:40,579][0m Trial 6

[32m[I 2021-12-05 11:43:45,007][0m Trial 77 finished with value: 0.07820197732672096 and parameters: {'learning_rate': 0.07304570622387144, 'n_estimators': 285, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_depth': 3}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:45,455][0m Trial 78 finished with value: 0.08424997351084484 and parameters: {'learning_rate': 0.03899472473300241, 'n_estimators': 518, 'min_samples_split': 5, 'min_samples_leaf': 5, 'max_depth': 4}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:45,494][0m Trial 79 finished with value: 0.1599151126660311 and parameters: {'learning_rate': 0.057576306601588245, 'n_estimators': 29, 'min_samples_split': 5, 'min_samples_leaf': 6, 'max_depth': 3}. Best is trial 33 with value: 0.07112519630987879.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:45,594][0m Trial 80 finished with value: 0.07727

  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:48,671][0m Trial 93 finished with value: 0.0764329568707025 and parameters: {'learning_rate': 0.05994059465715607, 'n_estimators': 79, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_depth': 4}. Best is trial 85 with value: 0.07049276503444557.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:48,848][0m Trial 94 finished with value: 0.07937023108631958 and parameters: {'learning_rate': 0.03761712788318248, 'n_estimators': 153, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_depth': 5}. Best is trial 85 with value: 0.07049276503444557.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:48,941][0m Trial 95 finished with value: 0.07865737839422349 and parameters: {'learning_rate': 0.0520146809920584, 'n_estimators': 107, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_depth': 3}. Best is trial 85 with value: 0.07049276503444557.[0m
  return f(*args, **kwargs)
[32m[I 2021-12-05 11:43:49,184][0m Trial 96 fi

In [8]:
study.best_trial

FrozenTrial(number=85, values=[0.07049276503444557], datetime_start=datetime.datetime(2021, 12, 5, 11, 43, 46, 910635), datetime_complete=datetime.datetime(2021, 12, 5, 11, 43, 47, 128356), params={'learning_rate': 0.042780517081778706, 'n_estimators': 239, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_depth': 4}, distributions={'learning_rate': LogUniformDistribution(high=0.1, low=0.001), 'n_estimators': IntUniformDistribution(high=1000, low=10, step=1), 'min_samples_split': IntUniformDistribution(high=10, low=2, step=1), 'min_samples_leaf': IntUniformDistribution(high=10, low=1, step=1), 'max_depth': IntUniformDistribution(high=10, low=1, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=85, state=TrialState.COMPLETE, value=None)

In [9]:
study.best_trial.params

{'learning_rate': 0.042780517081778706,
 'n_estimators': 239,
 'min_samples_split': 6,
 'min_samples_leaf': 4,
 'max_depth': 4}

In [10]:
params = study.best_trial.params
lr = params['learning_rate']
n_estimators = params['n_estimators']
min_samples_split = params['min_samples_split']
min_samples_leaf = params['min_samples_leaf']
max_depth = params['max_depth']

gbr = GradientBoostingRegressor(
    learning_rate=lr, 
    n_estimators=n_estimators, 
    min_samples_split=min_samples_split,
    min_samples_leaf=min_samples_leaf,
    max_depth=max_depth)
gbr.fit(x_train_std, y_train)

pred_gbr = gbr.predict(x_test_std)
r2_gbr = r2_score(y_test, pred_gbr)
mae_gbr = mean_absolute_error(y_test, pred_gbr)

print("R2 : %.3f" % r2_gbr)
print("MAE : %.3f" % mae_gbr)

  return f(*args, **kwargs)


R2 : 0.929
MAE : 2.028
