## Optuna : hyperparmeter_tuning framework

In [14]:
import pandas as pd
df_raw=pd.read_csv('../../../raw_data/raw_titanic.csv',index_col=0)

In [15]:
df=df_raw.select_dtypes(['int','float'])

target=df['Survived']
data = df.drop(['Survived'],axis=1)
data['Age'].fillna(data['Age'].mean(),inplace=True)

In [16]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [17]:
XGBClassifier().get_params

<bound method XGBModel.get_params of XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, ...)>

In [18]:
ssc = StandardScaler()
data_ssc=ssc.fit_transform(data)
x_train,x_test,y_train,y_test = train_test_split(data_ssc,target,test_size=0.2)

In [20]:
# base_line optuna
import optuna
def my_objective(trial):
    """
    1. make parameter dictionary
    2. define model for the parameter
    3. fit model
    4. get score based on purpose
    5. return
    """
    #1
    prms={
    'booster': trial.suggest_categorical('booster',['gbtree','gblinear','dart']),
    'learning_rate': trial.suggest_float('learning_rate',0,1) ,
    'max_delta_step': trial.suggest_float('max_delta_step',0,100),
    'max_depth': trial.suggest_int('max_depth',0,100),
    'min_child_weight': trial.suggest_float('min_child_weight',0,10),
    'n_estimators': trial.suggest_int('n_estimators',100,300)
    }
    # 2.
    xgbc = XGBClassifier(**prms)
    # 3.
    xgbc.fit(x_train,y_train)
    y_pred=xgbc.predict(x_test)
    acc = accuracy_score(y_test,y_pred)
    return acc

my_study = optuna.create_study(direction='maximize')
my_study.optimize(my_objective,n_trials=20)

[32m[I 2022-11-15 13:12:32,551][0m A new study created in memory with name: no-name-d9a05eb8-18ee-4ec9-a612-36421cf94774[0m
[32m[I 2022-11-15 13:12:32,688][0m Trial 0 finished with value: 0.6983240223463687 and parameters: {'booster': 'gblinear', 'learning_rate': 0.25954219237068643, 'max_delta_step': 8.623449271200966, 'max_depth': 32, 'min_child_weight': 5.719603440018747, 'n_estimators': 268}. Best is trial 0 with value: 0.6983240223463687.[0m


Parameters: { "max_delta_step", "max_depth", "min_child_weight" } are not used.



[32m[I 2022-11-15 13:12:37,330][0m Trial 1 finished with value: 0.6983240223463687 and parameters: {'booster': 'dart', 'learning_rate': 0.37671631044496967, 'max_delta_step': 17.09352190352248, 'max_depth': 13, 'min_child_weight': 5.077687666907665, 'n_estimators': 166}. Best is trial 0 with value: 0.6983240223463687.[0m
[32m[I 2022-11-15 13:12:44,802][0m Trial 2 finished with value: 0.7206703910614525 and parameters: {'booster': 'dart', 'learning_rate': 0.2040287005700141, 'max_delta_step': 69.7682353767769, 'max_depth': 66, 'min_child_weight': 4.945361727195343, 'n_estimators': 216}. Best is trial 2 with value: 0.7206703910614525.[0m
[32m[I 2022-11-15 13:12:45,645][0m Trial 3 finished with value: 0.6983240223463687 and parameters: {'booster': 'gbtree', 'learning_rate': 0.3621063946553894, 'max_delta_step': 82.33088252458442, 'max_depth': 93, 'min_child_weight': 8.139292855014657, 'n_estimators': 243}. Best is trial 2 with value: 0.7206703910614525.[0m
[32m[I 2022-11-15 13:1

Parameters: { "max_delta_step", "max_depth", "min_child_weight" } are not used.



[32m[I 2022-11-15 13:13:08,024][0m Trial 11 finished with value: 0.7318435754189944 and parameters: {'booster': 'gbtree', 'learning_rate': 0.6919952379640222, 'max_delta_step': 36.945398445105255, 'max_depth': 72, 'min_child_weight': 9.727166707833195, 'n_estimators': 204}. Best is trial 11 with value: 0.7318435754189944.[0m
[32m[I 2022-11-15 13:13:14,855][0m Trial 12 finished with value: 0.7262569832402235 and parameters: {'booster': 'dart', 'learning_rate': 0.6326298838971623, 'max_delta_step': 38.996826656918884, 'max_depth': 66, 'min_child_weight': 9.70488346217287, 'n_estimators': 213}. Best is trial 11 with value: 0.7318435754189944.[0m
[32m[I 2022-11-15 13:13:15,427][0m Trial 13 finished with value: 0.7262569832402235 and parameters: {'booster': 'gbtree', 'learning_rate': 0.6555009439487921, 'max_delta_step': 30.101647683413226, 'max_depth': 37, 'min_child_weight': 9.76729960311442, 'n_estimators': 204}. Best is trial 11 with value: 0.7318435754189944.[0m
[32m[I 2022-1

Parameters: { "max_delta_step", "max_depth", "min_child_weight" } are not used.



[32m[I 2022-11-15 13:13:15,941][0m Trial 15 finished with value: 0.7430167597765364 and parameters: {'booster': 'gbtree', 'learning_rate': 0.7938477667971604, 'max_delta_step': 30.917630952425025, 'max_depth': 46, 'min_child_weight': 9.973224170719536, 'n_estimators': 147}. Best is trial 15 with value: 0.7430167597765364.[0m
[32m[I 2022-11-15 13:13:16,338][0m Trial 16 finished with value: 0.7150837988826816 and parameters: {'booster': 'gbtree', 'learning_rate': 0.7983150332299199, 'max_delta_step': 20.496539620675183, 'max_depth': 39, 'min_child_weight': 8.378922603250153, 'n_estimators': 140}. Best is trial 15 with value: 0.7430167597765364.[0m
[32m[I 2022-11-15 13:13:16,794][0m Trial 17 finished with value: 0.7039106145251397 and parameters: {'booster': 'gbtree', 'learning_rate': 0.8151672906341154, 'max_delta_step': 55.73548123761386, 'max_depth': 28, 'min_child_weight': 6.609109297386268, 'n_estimators': 146}. Best is trial 15 with value: 0.7430167597765364.[0m
[32m[I 202

In [21]:
print(my_study.best_params)
print(my_study.best_value)

{'booster': 'gbtree', 'learning_rate': 0.7938477667971604, 'max_delta_step': 30.917630952425025, 'max_depth': 46, 'min_child_weight': 9.973224170719536, 'n_estimators': 147}
0.7430167597765364


In [22]:
optuna.visualization.plot_optimization_history(my_study)

In [23]:
optuna.visualization.plot_parallel_coordinate(my_study)

In [24]:
optuna.visualization.plot_param_importances(my_study)

In [27]:
best_prms=my_study.best_params
xgbc=XGBClassifier(**best_prms)
xgbc.fit(x_train,y_train)

In [31]:
pd.DataFrame(xgbc.feature_importances_,index=data.columns).transpose()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare
0,0.495644,0.093267,0.133511,0.176349,0.101228
