In [2]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np
from joblib import dump



In [3]:
# データの読み込み
train_data = pd.read_csv('../data_processed/train_data_processed.csv')
test_data = pd.read_csv('../data_processed/test_data_processed.csv')
submit_data = pd.read_csv('../data/test.csv')

X = train_data.drop(['attendance'], axis=1)
y = train_data['attendance']

# 学習データと評価データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# LightGBM用のデータセットを作成
lgb_train = lgb.Dataset(X_train, label=y_train)

# パラメータの候補範囲を設定
param_grid = {
    'num_leaves': [31, 50, 100, 150],
    'max_depth': [-1, 5, 10, 15],
    'learning_rate': [0.1, 0.01, 0.001, 0.0001],
    'n_estimators': [100, 200, 500, 1000],
    'min_child_samples': [20, 30, 40],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'reg_alpha': [0.0, 0.1, 0.2],
    'reg_lambda': [0.0, 0.1, 0.2]
}


# グリッドサーチを実行
grid_search = GridSearchCV(estimator=lgb.LGBMRegressor(random_state=42),
                           param_grid=param_grid,
                           scoring='neg_root_mean_squared_error',
                           cv=5)

# モデルの学習とハイパーパラメータの探索
grid_search.fit(X_train, y_train, categorical_feature=['venue', 'weather', 'day_of_week', 'is_holiday', 'season'])

# 最適なハイパーパラメータの組み合わせ
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# 最適なモデルを取得
best_model = grid_search.best_estimator_



In [None]:
# モデルを保存
best_model.booster_.save_model('../models/best_model(LightGBM)')
# dump(best_model, '../models/best_model(LightGBM).joblib')

<lightgbm.basic.Booster at 0x7f2ef0451370>