In [None]:
import warnings
warnings.simplefilter('ignore')

import gc

import numpy as np
import pandas as pd
pd.set_option('max_columns', None)
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error

import joblib
import lightgbm as lgb

In [None]:
%%time

train = pd.read_csv('../input/ubiquant-market-prediction/train.csv')
print(train.shape)
train.head()

In [None]:
ycol = 'target'
feature_names = list(filter(lambda x: x not in [ycol, 'row_id', 'time_id'], train.columns))

model = lgb.LGBMRegressor(boosting_type='gbdt',
                          objective='regression',
                          metric='rmse',
                          tree_learner='serial',
                          n_estimators=1000,
                          num_leaves=64,
                          max_depth=8,
                          learning_rate=0.1,
                          subsample=0.8,
                          feature_fraction=0.6,
                          reg_alpha=0.1,
                          reg_lambda=0.1,
                          random_state=2022)

df_importance_list = []

kfold = KFold(n_splits=5, shuffle=True, random_state=2022)
for fold_id, (trn_idx, val_idx) in enumerate(kfold.split(train[feature_names], train[ycol])):
    X_train = train.iloc[trn_idx][feature_names]
    Y_train = train.iloc[trn_idx][ycol]
    X_val = train.iloc[val_idx][feature_names]
    Y_val = train.iloc[val_idx][ycol]
    lgb_model = model.fit(X_train,
                          Y_train,
                          eval_names=['train', 'valid'],
                          eval_set=[(X_train, Y_train), (X_val, Y_val)],
                          verbose=100,
                          eval_metric='rmse',
                          early_stopping_rounds=50)
    joblib.dump(lgb_model, f'lgb_{fold_id}.pkl')
    df_importance = pd.DataFrame({
        'column': feature_names,
        'importance': lgb_model.feature_importances_,
    })
    df_importance_list.append(df_importance)
    del lgb_model, X_train, Y_train, X_val, Y_val
    gc.collect()