In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import optuna
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_df = pd.read_csv('./output/中間データ/train_preprocessed.csv')
train_df.head()

Unnamed: 0,datetime,y,week,soldout,name,kcal,remarks,payday,weather,precipitation,...,y_rolling_min_14,new_y,day,kcal_missing_flag,new_new_y,curry,popular,unpopular,event_キャリアアップ支援セミナー,event_ママの会
0,2014-01-09,129.0,-0.932992,1,鶏チリソース,1.048743,0,0.0,-1.143733,-0.181365,...,1.481382,0.321153,-0.808939,0,0.634808,0,1,0,False,False
1,2014-01-10,87.0,-0.908046,0,手作りロースカツ,1.217961,0,1.0,-0.012464,-0.181365,...,1.481382,-1.848836,-0.695212,0,-1.58719,0,1,1,False,False
2,2014-01-14,129.0,-0.073296,1,鶏の照り焼きマスタード,-0.948028,0,0.0,1.223188,-0.181365,...,1.481382,0.410879,-0.240304,0,0.443303,0,0,0,False,False
3,2014-01-15,134.0,0.220722,0,さんま辛味焼,1.556397,0,0.0,-1.143733,-0.181365,...,1.481382,0.718736,-0.126577,0,0.660572,0,1,0,False,False
4,2014-01-16,107.0,-0.932992,0,カレイ唐揚げ野菜あんかけ,0.371872,0,0.0,-0.012464,-0.181365,...,1.481382,-0.661561,-0.012851,0,-0.367739,0,0,1,False,False


In [3]:
X_train = train_df.drop(columns=['y', 'name', 'datetime', 'new_y', 'new_new_y'])
y_train = train_df['new_y']

In [4]:
tscv = TimeSeriesSplit(n_splits=5)

# Optunaによるハイパーパラメータ探索
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        'random_state': 42,
        'verbosity': -1  # 出力を静かにする
    }
    
    val_scores = []
    
    for train_idx, valid_idx in tscv.split(X_train):
        X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[valid_idx]
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[valid_idx]
        
        train_data = lgb.Dataset(X_tr, label=y_tr)
        valid_data = lgb.Dataset(X_val, label=y_val)
        
        model = lgb.train(
            params,
            train_data,
            valid_sets=[valid_data],
            callbacks=[
                lgb.early_stopping(stopping_rounds=30, verbose=False),  # ここでもログ出さない
                lgb.log_evaluation(period=0)  # ログを完全に無効化
            ]
        )
        
        y_pred = model.predict(X_val)
        score = np.sqrt(mean_squared_error(y_val, y_pred))  # RMSE
        val_scores.append(score)
    
    return np.mean(val_scores)

In [5]:
# Optuna実行（静かに最適化）
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, show_progress_bar=False)

# ベストパラメータで最終学習
best_params = study.best_trial.params
best_params.update({
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'random_state': 1234,
    'verbosity': -1  # 最終モデルも静かに
})

# モデル全体で再学習
final_model = lgb.LGBMRegressor(**best_params)
final_model.fit(X_train, y_train)

# 最終モデルのトレーニングRMSEを計算
y_train_pred = final_model.predict(X_train)
final_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))

print(f"LightGBMのトレーニングRMSE: {final_rmse:.4f} ")

[I 2025-05-06 18:23:35,852] A new study created in memory with name: no-name-4f278baf-a3a2-4441-9c4d-52d9c80f67f6
[I 2025-05-06 18:23:35,923] Trial 0 finished with value: 1.0061720127662317 and parameters: {'learning_rate': 0.22359746639005648, 'num_leaves': 149, 'max_depth': 6, 'min_child_samples': 94, 'subsample': 0.8346092047063782, 'colsample_bytree': 0.6609520373139701, 'reg_alpha': 1.5158589560400004, 'reg_lambda': 0.1525490938459434}. Best is trial 0 with value: 1.0061720127662317.
[I 2025-05-06 18:23:35,945] Trial 1 finished with value: 1.0061720127662317 and parameters: {'learning_rate': 0.22723160606337575, 'num_leaves': 121, 'max_depth': 15, 'min_child_samples': 79, 'subsample': 0.7824592323667274, 'colsample_bytree': 0.914313196511719, 'reg_alpha': 1.298634499981438, 'reg_lambda': 4.531926319816365}. Best is trial 0 with value: 1.0061720127662317.
[I 2025-05-06 18:23:35,992] Trial 2 finished with value: 0.9493527367302645 and parameters: {'learning_rate': 0.2020986973876805

LightGBMのトレーニングRMSE: 0.3203 


In [6]:
# モデルを保存
with open('./output/モデル/lgb_model.pkl', 'wb') as f:
    pickle.dump(final_model, f)