In [13]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error
import optuna

df = pd.read_csv('../assets/dataset.csv')

features = [
    'surface_type', 
    'liquid_type', 
    'diameter',
    'height',
    'fall_point_type',
    'time',
]
label = 'finger_num'

X = df[features]
y = df[label]

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'verbose': -1,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'lambda_l1': trial.suggest_float('lambda_l1', 0.0, 10.0),
        'lambda_l2': trial.suggest_float('lambda_l2', 0.0, 10.0),
        'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0.0, 1.0)
    }
        
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse_list = []
    for train_idx, valid_idx in kf.split(X):
        X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

        train_data = lgb.Dataset(X_train, label=y_train)
        valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)

        model = lgb.train(
            params,
            train_data,
            valid_sets=[train_data, valid_data],
            num_boost_round=100,
        )
        y_pred = model.predict(X_valid, num_iteration=model.best_iteration)
        rmse = mean_squared_error(y_valid, y_pred, squared=False)
        rmse_list.append(rmse)

    return np.mean(rmse_list)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print("Best Parameters:", study.best_params)
print("Best Results:", study.best_value)

[I 2024-11-28 12:42:31,071] A new study created in memory with name: no-name-6346be75-088b-4c34-a7b7-4a163deb5af6
[I 2024-11-28 12:42:31,391] Trial 0 finished with value: 3.928631823802968 and parameters: {'learning_rate': 0.025568497231647815, 'num_leaves': 73, 'max_depth': 6, 'feature_fraction': 0.92144697161515, 'bagging_fraction': 0.6868113865875275, 'bagging_freq': 2, 'lambda_l1': 7.159766612260011, 'lambda_l2': 9.604083733824549, 'min_gain_to_split': 0.5717586682940928}. Best is trial 0 with value: 3.928631823802968.
[I 2024-11-28 12:42:31,703] Trial 1 finished with value: 3.4961364063117366 and parameters: {'learning_rate': 0.16283906000047915, 'num_leaves': 73, 'max_depth': 11, 'feature_fraction': 0.775750152765339, 'bagging_fraction': 0.7978575056020438, 'bagging_freq': 6, 'lambda_l1': 6.5573681432304, 'lambda_l2': 4.045442243252916, 'min_gain_to_split': 0.4834433317085045}. Best is trial 1 with value: 3.4961364063117366.
[I 2024-11-28 12:42:32,008] Trial 2 finished with value

Best Parameters: {'learning_rate': 0.16824320886893038, 'num_leaves': 77, 'max_depth': 8, 'feature_fraction': 0.795229243985457, 'bagging_fraction': 0.9546818348770746, 'bagging_freq': 4, 'lambda_l1': 2.0550493855964294, 'lambda_l2': 2.4194325080039794, 'min_gain_to_split': 0.023887473667017295}
Best Results: 3.3898080684641045


In [None]:
params_dict = {
    ""
}