# Lightgbm + Optuna Hyperparameter tunning

# loading preprocessing data

In [None]:
import pandas as pd
df = pd.read_pickle('../input/preprocessingdata/df.pkl')

In [None]:
df.info()

In [None]:
X_train = df[df.date_block_num < 33].drop(['item_cnt_month'], axis=1)
Y_train = df[df.date_block_num < 33]['item_cnt_month']
X_valid = df[df.date_block_num == 33].drop(['item_cnt_month'], axis=1)
Y_valid = df[df.date_block_num == 33]['item_cnt_month']
X_test = df[df.date_block_num == 34].drop(['item_cnt_month'], axis=1)
del df

**using optuna to get the best parameters of model**
you can add some parameters that you want to tune inside the params object.

In [None]:
import lightgbm as lgb
import sklearn
feature_name = X_train.columns.tolist()
feature_name_indexes = [ 
                            'country_part', 
                            'item_category_common',
                            'item_category_code', 
                            'city_code',
    ]
def objective(trial):

    lgb_train = lgb.Dataset(X_train[feature_name], Y_train)
    lgb_eval = lgb.Dataset(X_valid[feature_name], Y_valid, reference=lgb_train)

    params = {
        'objective': 'rmse',
        'metric': 'rmse',
        'num_leaves': trial.suggest_int('num_leaves', 1000, 1500),
        'min_data_in_leaf':10,
        'feature_fraction':trial.suggest_uniform('feature_fraction', 0.6, 0.8),
        'learning_rate': trial.suggest_uniform('feature_fraction', 0.01, 0.015),
        'num_rounds': 1000,
        'early_stopping_rounds': 30,
        'seed': 1
    }

    evals_result = {}
    gbm = lgb.train(
            params, 
            lgb_train,
            num_boost_round=3000,
            valid_sets=(lgb_train, lgb_eval), 
            feature_name = feature_name,
            categorical_feature = feature_name_indexes,
            verbose_eval=50, 
            evals_result = evals_result,
            )

    preds = gbm.predict(X_valid)
    loss = sklearn.metrics.mean_squared_error(Y_valid, preds)
    return loss




In [None]:
import optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

# After getting best parameter, train the LGBM model again.

In [None]:
params = {
    'objective': 'rmse',
    'metric': 'rmse',
    'num_leaves': 1012,
    'min_data_in_leaf':10,
    'feature_fraction':0.622351664881,
    'learning_rate': 0.01,
    'num_rounds': 1000,
    'early_stopping_rounds': 30,
    'seed': 1
}
feature_name_indexes = [ 
                        'country_part', 
                        'item_category_common',
                        'item_category_code', 
                        'city_code',
]

lgb_train = lgb.Dataset(X_train[feature_name], Y_train)
lgb_eval = lgb.Dataset(X_valid[feature_name], Y_valid, reference=lgb_train)

evals_result = {}
gbm = lgb.train(
        params, 
        lgb_train,
        num_boost_round=3000,
        valid_sets=(lgb_train, lgb_eval), 
        feature_name = feature_name,
        categorical_feature = feature_name_indexes,
        verbose_eval=50, 
        evals_result = evals_result,
        )

In [None]:
test = pd.read_csv('../input/competitive-data-science-predict-future-sales/test.csv')
Y_test = gbm.predict(X_test[feature_name]).clip(0, 20)

submission = pd.DataFrame({
    "ID": test.index, 
    "item_cnt_month": Y_test
})
submission.to_csv('gbm_submission.csv', index=False)