In [None]:
import numpy as np
import pandas as pd

from lightgbm import Dataset
import optuna
import pickle

import warnings
warnings.filterwarnings("ignore")

In [None]:
data = pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv', index_col='id')
study_file = open('../input/lightgbm-tuning/lgbm_optimizing_study.pickle', 'rb')
study = pickle.load(study_file)
study_file.close()

In [None]:
data['n_missing'] = data.isna().sum(axis=1)

In [None]:
X, y = data.drop('claim', axis=1), data['claim']

In [None]:
train_set = Dataset(X, label=y)

In [None]:
params = {'objective': 'binary',
         'learning_rate': 0.01,
         'random_state': 42,
          'metric': 'auc',
          'verbose': -1
         }

time_budget = 60*60*6

tuner = optuna.integration.lightgbm.LightGBMTunerCV(params=params,
                                                   train_set=train_set,
                                                   num_boost_round=20000,
                                                   nfold=5,
                                                   early_stopping_rounds=160,
                                                   verbose_eval=False,
                                                   time_budget=time_budget,
                                                   study=study,
                                                   verbosity=-1)
tuner.run()

In [None]:
print(f'Best score: {tuner.best_score}')
print(f'Best parameters: {tuner.best_params}')

In [None]:
pickle.dump(study, open('lgbm_optimizing_study.pickle', 'wb'))