In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import optuna

In [None]:
useful_features = [c for c in df.columns if c not in ("id", "target", "kfold")]
object_cols = [col for col in useful_features if col.startswith('cat')]
df_test = df_test[useful_features]

In [None]:
# xtrain xvalid を作成して、xgbを使用する関数を作成
def run(trial):
    scores = []
    fold = 0
    
    #　params生成
    learning_rate = trial.suggest_float('learning_rate', 1e-2, 0.25, log=True)
    reg_lambda = trial.suggest_loguniform('reg_lambda', 1e-8, 100)
    reg_alpha = trial.suggest_loguniform('reg_alpha', 1e-8, 100)
    subsample = trial.suggest_float('subsample', 0.1, 1.0)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.1, 1.0)
    max_depth = trial.suggest_int('max_depth', 1, 7)

    xtrain =  df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)

    ytrain = xtrain.target
    yvalid = xvalid.target

    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]

    ordinal_encoder = preprocessing.OrdinalEncoder()
    xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
    xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])

    model = XGBRegressor(random_state=42,
                        tree_method='gpu_hist',
                        gpu_id=1,
                        predictor='gpu_predictor',
                        n_estimator=7000,
                        learning_rate=learning_rate,
                        reg_lambda=reg_lambda,
                        reg_alpha=reg_alpha,
                        subsample=subsample,
                        colsample_bytree=colsample_bytree,
                        max_depth=max_depth,
                        )
    model.fit(xtrain, ytrain, early_stopping_rounds=300, eval_set=[(xvalid, yvalid)], verbose=1000)
    preds_valid = model.predict(xvalid)
#         test_preds = model.predict(xtest)
    rmse = mean_squared_error(yvalid, preds_valid, squared=False)
    return rmse

In [None]:
# 最適なparamsの探索
study = optuna.create_study(direction='minimize')
study.optimize(run, n_trials=5)

In [None]:
# 探索結果の表示
study.best_params