In [1]:
import pandas as pd
import polars as pl
import numpy as np
import os
import gc
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import KFold, StratifiedKFold
import xgboost as xgb
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor, log_evaluation
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
#from sklearn.impute import IterativeImputer
import pickle
import optuna
import shap

gc.enable()

pd.options.display.max_columns = None
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)

pl.Config.set_tbl_rows(-1)
pl.Config.set_tbl_cols(-1)
pl.Config.set_fmt_str_lengths(500)

polars.config.Config

In [2]:
path = 'I:/Kaggle/jane-street-real-time-market-data-forecasting/'

In [3]:
os.listdir(path)

['features.csv',
 'kaggle_evaluation',
 'lags.parquet',
 'my_folder',
 'responders.csv',
 'sample_submission.csv',
 'test.parquet',
 'train.parquet']

In [4]:
train_df = pl.read_parquet(path + 'train.parquet/').select(pl.all().shrink_dtype())
y_sr = train_df['responder_6']
#train_df = train_df.drop(['responder_0', 'responder_1', 'responder_2', 'responder_3', 'responder_4', 'responder_5', 'responder_6', 'responder_7', 'responder_8', 'partition_id']).select(pl.all().shrink_dtype())
train_df = train_df.drop(['partition_id']).select(pl.all().shrink_dtype())
print(train_df.shape)
train_df.head()

(47127338, 92)


date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8
i16,i16,i8,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i8,i8,i16,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
0,0,1,3.889038,,,,,,0.851033,0.242971,0.2634,-0.891687,11,7,76,-0.883028,0.003067,-0.744703,,-0.169586,,-1.335938,-1.707803,0.91013,,1.636431,1.522133,-1.551398,-0.229627,,,1.378301,-0.283712,0.123196,,,,0.28118,0.269163,0.349028,-0.012596,-0.225932,,-1.073602,,,-0.181716,,,,0.564021,2.088506,0.832022,,0.204797,,,-0.808103,,-2.037683,0.727661,,-0.989118,-0.345213,-1.36224,,,,,,-1.251104,-0.110252,-0.491157,-1.02269,0.152241,-0.659864,,,-0.261412,-0.211486,-0.335556,-0.281498,0.738489,-0.069556,1.380875,2.005353,0.186018,1.218368,0.775981,0.346999,0.095504
0,0,7,1.370613,,,,,,0.676961,0.151984,0.192465,-0.521729,11,7,76,-0.865307,-0.225629,-0.582163,,0.317467,,-1.250016,-1.682929,1.412757,,0.520378,0.744132,-0.788658,0.641776,,,0.2272,0.580907,1.128879,,,,-1.512286,-1.414357,-1.823322,-0.082763,-0.184119,,,,,,,,,-10.835207,-0.002704,-0.621836,,1.172836,,,-1.625862,,-1.410017,1.063013,,0.888355,0.467994,-1.36224,,,,,,-1.065759,0.013322,-0.592855,-1.052685,-0.393726,-0.741603,,,-0.281207,-0.182894,-0.245565,-0.302441,2.965889,1.190077,-0.523998,3.849921,2.626981,5.0,0.703665,0.216683,0.778639
0,0,9,2.285698,,,,,,1.056285,0.187227,0.249901,-0.77305,11,7,76,-0.675719,-0.199404,-0.586798,,-0.814909,,-1.296782,-2.040234,0.639589,,1.597359,0.657514,-1.350148,0.364215,,,-0.017751,-0.317361,-0.122379,,,,-0.320921,-0.95809,-2.436589,0.070999,-0.245239,,,,,,,,,-1.420632,-3.515137,-4.67776,,0.535897,,,-0.72542,,-2.29417,1.764551,,-0.120789,-0.063458,-1.36224,,,,,,-0.882604,-0.072482,-0.617934,-0.86323,-0.241892,-0.709919,,,0.377131,0.300724,-0.106842,-0.096792,-0.864488,-0.280303,-0.326697,0.375781,1.271291,0.099793,2.109352,0.670881,0.772828
0,0,10,0.690606,,,,,,1.139366,0.273328,0.306549,-1.262223,42,5,150,-0.694008,3.004091,0.114809,,-0.251882,,-1.902009,-0.979447,0.241165,,-0.392359,-0.224699,-2.129397,-0.855287,,,0.404142,-0.578156,0.105702,,,,0.544138,-0.087091,-1.500147,-0.201288,-0.038042,,,,,,,,,0.382074,2.669135,0.611711,,2.413415,,,1.313203,,-0.810125,2.939022,,3.988801,1.834661,-1.36224,,,,,,-0.697595,1.074309,-0.206929,-0.530602,4.765215,0.571554,,,-0.226891,-0.251412,-0.215522,-0.296244,0.408499,0.223992,2.294888,1.097444,1.225872,1.225376,1.114137,0.775199,-1.379516
0,0,14,0.44057,,,,,,0.9552,0.262404,0.344457,-0.613813,44,3,16,-0.947351,-0.030018,-0.502379,,0.646086,,-1.844685,-1.58656,-0.182024,,-0.969949,-0.673813,-1.282132,-1.399894,,,0.043815,-0.320225,-0.031713,,,,-0.08842,-0.995003,-2.635336,-0.196461,-0.618719,,,,,,,,,-2.0146,-2.321076,-3.711265,,1.253902,,,0.476195,,-0.771732,2.843421,,1.379815,0.411827,-1.36224,,,,,,-0.948601,-0.136814,-0.447704,-1.141761,0.099631,-0.661928,,,3.678076,2.793581,2.61825,3.418133,-0.373387,-0.502764,-0.348021,-3.928148,-1.591366,-5.0,-3.57282,-1.089123,-5.0


In [5]:
train_df.estimated_size() / 1e9

16.790916418

In [18]:
# https://www.kaggle.com/code/motono0223/js24-preprocessing-create-lags/notebook

class CONFIG:
    target_col = "responder_6"
    lag_cols_original = ["date_id", "symbol_id"] + [f"responder_{idx}" for idx in range(9)]
    lag_cols_rename = { f"responder_{idx}" : f"responder_{idx}_lag_1" for idx in range(9)}
    valid_ratio = 0.05
    start_dt = 1100

In [7]:
lags = train_df.select(pl.col(CONFIG.lag_cols_original))
lags = lags.rename(CONFIG.lag_cols_rename)
lags = lags.with_columns(
    date_id = pl.col('date_id') + 1,  # lagged by 1 day
    )
lags = lags.group_by(["date_id", "symbol_id"], maintain_order=True).last()  # pick up last record of previous date

In [12]:
print(lags.shape)
lags.head()

(50382, 11)


date_id,symbol_id,responder_0_lag_1,responder_1_lag_1,responder_2_lag_1,responder_3_lag_1,responder_4_lag_1,responder_5_lag_1,responder_6_lag_1,responder_7_lag_1,responder_8_lag_1
i16,i8,f32,f32,f32,f32,f32,f32,f32,f32,f32
1,1,1.382971,0.416069,3.134381,1.032238,0.580303,1.853194,0.01209,0.040493,0.013765
1,7,0.635954,0.173196,3.124839,1.068124,0.436262,1.311089,0.116055,0.073651,0.164904
1,9,0.248801,-0.104118,-0.110137,0.168204,0.112685,0.459633,0.041742,0.04499,0.063626
1,10,-1.626361,-0.753249,-0.433064,-1.753868,-0.806781,-2.706254,-0.455618,-0.197604,-0.864137
1,14,5.0,3.496727,5.0,4.731386,2.209459,5.0,0.36648,0.315538,0.699698


In [10]:
first_day_df = train_df.filter(pl.col('date_id') == 0)
print(first_day_df.shape)
first_day_df.head()

(6792, 92)


date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8
i16,i16,i8,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i8,i8,i16,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
0,0,1,3.889038,,,,,,0.851033,0.242971,0.2634,-0.891687,11,7,76,-0.883028,0.003067,-0.744703,,-0.169586,,-1.335938,-1.707803,0.91013,,1.636431,1.522133,-1.551398,-0.229627,,,1.378301,-0.283712,0.123196,,,,0.28118,0.269163,0.349028,-0.012596,-0.225932,,-1.073602,,,-0.181716,,,,0.564021,2.088506,0.832022,,0.204797,,,-0.808103,,-2.037683,0.727661,,-0.989118,-0.345213,-1.36224,,,,,,-1.251104,-0.110252,-0.491157,-1.02269,0.152241,-0.659864,,,-0.261412,-0.211486,-0.335556,-0.281498,0.738489,-0.069556,1.380875,2.005353,0.186018,1.218368,0.775981,0.346999,0.095504
0,0,7,1.370613,,,,,,0.676961,0.151984,0.192465,-0.521729,11,7,76,-0.865307,-0.225629,-0.582163,,0.317467,,-1.250016,-1.682929,1.412757,,0.520378,0.744132,-0.788658,0.641776,,,0.2272,0.580907,1.128879,,,,-1.512286,-1.414357,-1.823322,-0.082763,-0.184119,,,,,,,,,-10.835207,-0.002704,-0.621836,,1.172836,,,-1.625862,,-1.410017,1.063013,,0.888355,0.467994,-1.36224,,,,,,-1.065759,0.013322,-0.592855,-1.052685,-0.393726,-0.741603,,,-0.281207,-0.182894,-0.245565,-0.302441,2.965889,1.190077,-0.523998,3.849921,2.626981,5.0,0.703665,0.216683,0.778639
0,0,9,2.285698,,,,,,1.056285,0.187227,0.249901,-0.77305,11,7,76,-0.675719,-0.199404,-0.586798,,-0.814909,,-1.296782,-2.040234,0.639589,,1.597359,0.657514,-1.350148,0.364215,,,-0.017751,-0.317361,-0.122379,,,,-0.320921,-0.95809,-2.436589,0.070999,-0.245239,,,,,,,,,-1.420632,-3.515137,-4.67776,,0.535897,,,-0.72542,,-2.29417,1.764551,,-0.120789,-0.063458,-1.36224,,,,,,-0.882604,-0.072482,-0.617934,-0.86323,-0.241892,-0.709919,,,0.377131,0.300724,-0.106842,-0.096792,-0.864488,-0.280303,-0.326697,0.375781,1.271291,0.099793,2.109352,0.670881,0.772828
0,0,10,0.690606,,,,,,1.139366,0.273328,0.306549,-1.262223,42,5,150,-0.694008,3.004091,0.114809,,-0.251882,,-1.902009,-0.979447,0.241165,,-0.392359,-0.224699,-2.129397,-0.855287,,,0.404142,-0.578156,0.105702,,,,0.544138,-0.087091,-1.500147,-0.201288,-0.038042,,,,,,,,,0.382074,2.669135,0.611711,,2.413415,,,1.313203,,-0.810125,2.939022,,3.988801,1.834661,-1.36224,,,,,,-0.697595,1.074309,-0.206929,-0.530602,4.765215,0.571554,,,-0.226891,-0.251412,-0.215522,-0.296244,0.408499,0.223992,2.294888,1.097444,1.225872,1.225376,1.114137,0.775199,-1.379516
0,0,14,0.44057,,,,,,0.9552,0.262404,0.344457,-0.613813,44,3,16,-0.947351,-0.030018,-0.502379,,0.646086,,-1.844685,-1.58656,-0.182024,,-0.969949,-0.673813,-1.282132,-1.399894,,,0.043815,-0.320225,-0.031713,,,,-0.08842,-0.995003,-2.635336,-0.196461,-0.618719,,,,,,,,,-2.0146,-2.321076,-3.711265,,1.253902,,,0.476195,,-0.771732,2.843421,,1.379815,0.411827,-1.36224,,,,,,-0.948601,-0.136814,-0.447704,-1.141761,0.099631,-0.661928,,,3.678076,2.793581,2.61825,3.418133,-0.373387,-0.502764,-0.348021,-3.928148,-1.591366,-5.0,-3.57282,-1.089123,-5.0


In [11]:
first_day_df.tail()

date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8
i16,i16,i8,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i8,i8,i16,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
0,848,10,0.690606,,,,,,0.198604,-0.029741,-0.140966,-0.962467,42,5,150,-0.609078,-0.317297,-0.555104,0.218694,0.930868,0.084928,0.977743,-1.411582,0.241165,,-0.392359,-0.224699,-2.129397,-0.855287,,,0.404142,-0.578156,0.105702,,0.0401,-1.477262,0.477078,-0.099005,-2.003799,0.123686,0.196912,-2.41272,-1.971498,-2.319298,-0.233091,-1.696398,-1.668827,-0.369762,-0.506779,-1.27199,-0.138415,-1.065655,-1.40252,0.667866,-0.600702,-0.322166,1.481436,-0.638902,0.062355,-0.109078,-0.563586,-0.095232,-1.013011,-1.36224,-0.032731,0.081868,0.004475,0.411844,-0.461753,-0.522853,-0.37743,-0.297253,-0.354697,-0.400324,-0.43562,-0.269232,-0.209199,-0.204087,-0.198284,-0.219605,-0.217057,-1.626361,-0.753249,-0.433064,-1.753868,-0.806781,-2.706254,-0.455618,-0.197604,-0.864137
0,848,14,0.44057,,,,,,0.15202,-0.102155,-0.176178,-0.88675,44,3,16,0.86018,-0.130832,0.361187,-0.419721,0.000898,-0.307039,-1.797052,3.354049,-0.182024,,-0.969949,-0.673813,-1.282132,-1.399894,,,0.043815,-0.320225,-0.031713,,0.265402,-0.102072,-0.168627,-0.084208,-0.036219,-2.125518,-1.87508,3.055206,0.497257,0.282488,1.716991,1.377666,1.375853,-1.299332,2.265358,0.445946,-0.010143,-0.006032,-0.285916,0.707829,0.375782,0.55298,0.507947,1.18715,1.21586,0.805848,0.57349,0.017643,0.051777,-1.36224,2.538301,-0.452482,0.673293,2.763822,2.529073,1.05483,-0.11562,0.419742,0.57189,-0.074639,0.179734,-0.310574,-0.131544,-0.319625,-0.282895,-0.495259,-0.324233,5.0,3.496727,5.0,4.731386,2.209459,5.0,0.36648,0.315538,0.699698
0,848,16,1.118269,,,,,,0.248446,-0.103795,-0.162988,-1.043239,11,7,76,-0.680361,-0.104304,-0.633052,1.013882,0.577262,0.644032,-0.480924,1.651714,0.748643,,0.36953,0.748203,-1.476237,-0.098337,,,0.276082,0.108393,0.762983,,0.266868,-0.91659,0.172893,0.687238,1.314304,-0.322478,-0.728568,-1.956467,-2.477132,-1.90616,-0.950059,-1.646639,-1.778881,-0.567111,0.316163,-0.070921,0.104588,0.219041,0.617248,0.736621,0.248948,0.66224,-1.328723,0.344979,0.426625,0.578139,0.205389,0.001701,-0.045987,-1.36224,0.117757,-0.378636,-0.335423,-0.691378,2.632958,-0.874593,-0.378252,-0.631277,-0.463631,0.161928,-0.414095,-0.40165,-0.234403,-0.20618,-0.159838,-0.365168,-0.254341,-1.389964,-0.35261,-0.175036,-0.811313,-0.525263,-0.691823,0.014288,0.037717,0.021741
0,848,19,2.456331,,,,,,0.214786,-0.070809,-0.083887,-0.656999,4,3,11,-0.372137,-0.045578,0.182541,0.300347,-0.272577,0.348121,-0.931276,0.431828,0.105461,,0.946351,1.344925,-0.573883,0.964037,,,-0.616086,0.142924,0.266898,,0.563234,-1.02677,1.083842,0.451631,0.239471,-0.015606,-0.619604,0.491704,0.793377,-0.494236,1.673495,1.233939,1.693789,-1.341861,0.427657,0.040478,-0.008797,0.016295,0.128528,0.388076,-0.501757,0.186988,-0.922461,-0.05951,-0.297873,0.874518,0.310264,0.097209,0.065888,-1.36224,-0.497462,-0.562408,-0.319766,2.27459,-0.438213,-0.152419,0.220645,0.711995,-0.348788,-0.345888,-0.302881,-0.265228,-0.245246,-0.32055,-0.246791,-0.308591,-0.337729,1.588425,0.913681,4.615185,1.8067,0.995739,3.996884,0.01929,0.042577,0.036325
0,848,33,1.663408,,,,,,0.24232,-0.071163,-0.185832,-0.990036,11,7,76,-0.715745,-0.416333,-0.763716,1.16266,1.838131,1.779362,-1.189198,1.31688,0.679352,,0.767303,0.740793,-1.215067,-1.124142,,,-0.46222,0.263958,1.038144,,-0.075871,0.745373,-1.087094,-0.898329,-1.63703,-0.342946,-0.592909,-1.916139,-1.657291,-1.59462,-0.494865,-1.914274,-1.765884,-1.763513,-0.578134,0.082022,-0.004346,-0.813945,-0.720179,0.884682,0.084543,0.567816,-1.461053,-0.813586,0.055976,0.323272,0.136678,-0.013815,-0.011322,-1.36224,-0.080933,-0.368057,-0.524512,1.182688,-0.093663,-0.80539,-0.282463,-0.579693,-0.890176,-0.449834,-0.703111,-0.395729,-0.304303,-0.243684,-0.209915,-0.291822,-0.277637,2.676013,1.650422,3.072583,3.455832,1.315261,3.428687,0.030653,0.045052,0.042633


In [14]:
train_df = train_df.join(lags, on=["date_id", "symbol_id"],  how="left")
print(train_df.shape)
train_df.head()

(47127338, 101)


date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8,responder_0_lag_1,responder_1_lag_1,responder_2_lag_1,responder_3_lag_1,responder_4_lag_1,responder_5_lag_1,responder_6_lag_1,responder_7_lag_1,responder_8_lag_1
i16,i16,i8,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i8,i8,i16,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
0,0,1,3.889038,,,,,,0.851033,0.242971,0.2634,-0.891687,11,7,76,-0.883028,0.003067,-0.744703,,-0.169586,,-1.335938,-1.707803,0.91013,,1.636431,1.522133,-1.551398,-0.229627,,,1.378301,-0.283712,0.123196,,,,0.28118,0.269163,0.349028,-0.012596,-0.225932,,-1.073602,,,-0.181716,,,,0.564021,2.088506,0.832022,,0.204797,,,-0.808103,,-2.037683,0.727661,,-0.989118,-0.345213,-1.36224,,,,,,-1.251104,-0.110252,-0.491157,-1.02269,0.152241,-0.659864,,,-0.261412,-0.211486,-0.335556,-0.281498,0.738489,-0.069556,1.380875,2.005353,0.186018,1.218368,0.775981,0.346999,0.095504,,,,,,,,,
0,0,7,1.370613,,,,,,0.676961,0.151984,0.192465,-0.521729,11,7,76,-0.865307,-0.225629,-0.582163,,0.317467,,-1.250016,-1.682929,1.412757,,0.520378,0.744132,-0.788658,0.641776,,,0.2272,0.580907,1.128879,,,,-1.512286,-1.414357,-1.823322,-0.082763,-0.184119,,,,,,,,,-10.835207,-0.002704,-0.621836,,1.172836,,,-1.625862,,-1.410017,1.063013,,0.888355,0.467994,-1.36224,,,,,,-1.065759,0.013322,-0.592855,-1.052685,-0.393726,-0.741603,,,-0.281207,-0.182894,-0.245565,-0.302441,2.965889,1.190077,-0.523998,3.849921,2.626981,5.0,0.703665,0.216683,0.778639,,,,,,,,,
0,0,9,2.285698,,,,,,1.056285,0.187227,0.249901,-0.77305,11,7,76,-0.675719,-0.199404,-0.586798,,-0.814909,,-1.296782,-2.040234,0.639589,,1.597359,0.657514,-1.350148,0.364215,,,-0.017751,-0.317361,-0.122379,,,,-0.320921,-0.95809,-2.436589,0.070999,-0.245239,,,,,,,,,-1.420632,-3.515137,-4.67776,,0.535897,,,-0.72542,,-2.29417,1.764551,,-0.120789,-0.063458,-1.36224,,,,,,-0.882604,-0.072482,-0.617934,-0.86323,-0.241892,-0.709919,,,0.377131,0.300724,-0.106842,-0.096792,-0.864488,-0.280303,-0.326697,0.375781,1.271291,0.099793,2.109352,0.670881,0.772828,,,,,,,,,
0,0,10,0.690606,,,,,,1.139366,0.273328,0.306549,-1.262223,42,5,150,-0.694008,3.004091,0.114809,,-0.251882,,-1.902009,-0.979447,0.241165,,-0.392359,-0.224699,-2.129397,-0.855287,,,0.404142,-0.578156,0.105702,,,,0.544138,-0.087091,-1.500147,-0.201288,-0.038042,,,,,,,,,0.382074,2.669135,0.611711,,2.413415,,,1.313203,,-0.810125,2.939022,,3.988801,1.834661,-1.36224,,,,,,-0.697595,1.074309,-0.206929,-0.530602,4.765215,0.571554,,,-0.226891,-0.251412,-0.215522,-0.296244,0.408499,0.223992,2.294888,1.097444,1.225872,1.225376,1.114137,0.775199,-1.379516,,,,,,,,,
0,0,14,0.44057,,,,,,0.9552,0.262404,0.344457,-0.613813,44,3,16,-0.947351,-0.030018,-0.502379,,0.646086,,-1.844685,-1.58656,-0.182024,,-0.969949,-0.673813,-1.282132,-1.399894,,,0.043815,-0.320225,-0.031713,,,,-0.08842,-0.995003,-2.635336,-0.196461,-0.618719,,,,,,,,,-2.0146,-2.321076,-3.711265,,1.253902,,,0.476195,,-0.771732,2.843421,,1.379815,0.411827,-1.36224,,,,,,-0.948601,-0.136814,-0.447704,-1.141761,0.099631,-0.661928,,,3.678076,2.793581,2.61825,3.418133,-0.373387,-0.502764,-0.348021,-3.928148,-1.591366,-5.0,-3.57282,-1.089123,-5.0,,,,,,,,,


In [15]:
second_day_df = train_df.filter(pl.col('date_id') == 1)
print(second_day_df.shape)
second_day_df.head()

(10188, 101)


date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8,responder_0_lag_1,responder_1_lag_1,responder_2_lag_1,responder_3_lag_1,responder_4_lag_1,responder_5_lag_1,responder_6_lag_1,responder_7_lag_1,responder_8_lag_1
i16,i16,i8,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i8,i8,i16,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
1,0,0,1.749479,,,,,,0.053447,2.192887,1.160708,-0.00983,11,7,76,-0.819115,1.403962,-0.098782,,0.340309,,-1.577595,-1.237946,1.039095,,0.756488,0.432157,-1.626238,-0.137137,,,0.952274,-0.186008,0.127051,,,,1.48572,0.760141,2.145851,-0.021399,-0.016591,,,,,,,,,3.88576,7.287179,4.877959,,2.20351,,,-0.850504,,-1.543995,1.741718,,2.035935,0.892778,-1.081097,,,,,,-0.734145,0.276101,-0.576441,-0.799783,4.812901,0.393107,,,-0.226359,-0.327119,-0.31504,-0.385573,-0.022355,0.337951,0.156607,1.406054,3.6808,0.598633,2.337418,2.577889,0.725604,,,,,,,,,
1,0,1,3.911768,,,,,,0.053053,2.126396,1.101112,-0.013003,11,7,76,-0.839616,2.043382,-0.109693,,0.063372,,-1.372385,-0.35307,0.751294,,2.282396,1.263641,-1.622081,-0.294155,,,1.758774,-0.304175,0.141651,,,,0.770913,0.97156,2.926169,-0.043944,-0.046172,,2.65897,,,1.678913,,,,1.167185,0.746974,2.753489,,0.673048,,,-0.765633,,-1.702322,1.68906,,0.753961,0.197442,-1.081097,-0.022838,-0.400573,0.173137,,,-0.822781,1.984625,-0.017244,-1.149028,1.284678,-0.176955,,,-0.280429,-0.242559,-0.31284,-0.281565,0.16938,0.043324,-0.030532,0.985784,1.485551,-0.379609,1.677068,1.752096,-0.523871,1.382971,0.416069,3.134381,1.032238,0.580303,1.853194,0.01209,0.040493,0.013765
1,0,2,1.062098,,,,,,0.049771,1.897569,0.936499,-0.012585,81,2,59,-0.688746,1.482645,-0.340683,,0.370178,,-1.30026,-1.844302,-0.830805,,-0.238425,0.028479,-1.182548,0.392532,,,0.341612,-0.521844,-0.178283,,,,-1.444112,-1.482314,-0.9553,-0.286239,-0.298491,,2.912592,,,3.202891,,,,1.332428,-0.003779,0.282652,,3.159928,,,-0.592981,,-0.646195,1.875373,,4.203779,1.5127,-1.081097,-0.362663,-0.576967,-0.583051,,,-0.807695,0.851287,-0.271779,-1.111671,1.145801,-0.261378,,,0.366157,0.405923,-0.148441,-0.146584,-0.484949,2.462649,-0.544341,-0.199138,1.638287,-0.311347,0.217322,-1.571629,0.598442,,,,,,,,,
1,0,7,1.083765,,,,,,0.034801,1.365109,0.866335,-0.009643,11,7,76,-1.025133,0.925701,-0.523978,,0.21753,,-2.165407,-1.268907,2.111038,,0.484647,1.033527,-0.771244,0.504023,,,0.240051,0.634331,1.433624,,,,1.111958,1.432608,1.147511,-0.17079,-0.065815,,-1.086018,,,-1.538898,,,,0.635281,1.416676,0.927752,,2.450088,,,-0.909762,,-0.857625,1.501932,,0.078696,-0.019289,-1.081097,-0.571457,-0.229733,-0.426658,,,-0.865701,-0.507224,-0.487616,-0.91007,1.927954,-0.131662,,,-0.315205,-0.304216,-0.353955,-0.318037,-1.926957,-1.510374,-0.155575,-0.327899,-0.127712,-0.237061,1.023567,1.464319,-0.335524,0.635954,0.173196,3.124839,1.068124,0.436262,1.311089,0.116055,0.073651,0.164904
1,0,9,2.015567,,,,,,0.052724,1.93197,0.964913,-0.015632,11,7,76,-1.534853,-0.174624,-0.603317,,-0.212626,,-1.962477,-0.819575,0.850963,,1.199708,0.503995,-0.922459,0.257535,,,0.014172,-0.445241,-0.089754,,,,1.071689,1.26891,2.002604,-0.088542,-0.029438,,-0.250594,,,0.396369,,,,-0.308828,16.23712,7.316531,,0.93571,,,-1.27724,,-1.218835,1.369809,,0.264098,0.186981,-1.081097,-0.500489,-0.450951,-0.537666,,,-1.314306,-0.030444,-0.700091,-1.139296,-0.130347,-0.743521,,,-0.247459,-0.21729,-0.28127,-0.276687,0.416857,-0.67107,-0.813518,0.921864,1.101624,-1.50329,0.962854,1.608399,-1.01472,0.248801,-0.104118,-0.110137,0.168204,0.112685,0.459633,0.041742,0.04499,0.063626


In [17]:
second_day_df.head(30)

date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8,responder_0_lag_1,responder_1_lag_1,responder_2_lag_1,responder_3_lag_1,responder_4_lag_1,responder_5_lag_1,responder_6_lag_1,responder_7_lag_1,responder_8_lag_1
i16,i16,i8,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i8,i8,i16,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
1,0,0,1.749479,,,,,,0.053447,2.192887,1.160708,-0.00983,11,7,76,-0.819115,1.403962,-0.098782,,0.340309,,-1.577595,-1.237946,1.039095,,0.756488,0.432157,-1.626238,-0.137137,,,0.952274,-0.186008,0.127051,,,,1.48572,0.760141,2.145851,-0.021399,-0.016591,,,,,,,,,3.88576,7.287179,4.877959,,2.20351,,,-0.850504,,-1.543995,1.741718,,2.035935,0.892778,-1.081097,,,,,,-0.734145,0.276101,-0.576441,-0.799783,4.812901,0.393107,,,-0.226359,-0.327119,-0.31504,-0.385573,-0.022355,0.337951,0.156607,1.406054,3.6808,0.598633,2.337418,2.577889,0.725604,,,,,,,,,
1,0,1,3.911768,,,,,,0.053053,2.126396,1.101112,-0.013003,11,7,76,-0.839616,2.043382,-0.109693,,0.063372,,-1.372385,-0.35307,0.751294,,2.282396,1.263641,-1.622081,-0.294155,,,1.758774,-0.304175,0.141651,,,,0.770913,0.97156,2.926169,-0.043944,-0.046172,,2.65897,,,1.678913,,,,1.167185,0.746974,2.753489,,0.673048,,,-0.765633,,-1.702322,1.68906,,0.753961,0.197442,-1.081097,-0.022838,-0.400573,0.173137,,,-0.822781,1.984625,-0.017244,-1.149028,1.284678,-0.176955,,,-0.280429,-0.242559,-0.31284,-0.281565,0.16938,0.043324,-0.030532,0.985784,1.485551,-0.379609,1.677068,1.752096,-0.523871,1.382971,0.416069,3.134381,1.032238,0.580303,1.853194,0.01209,0.040493,0.013765
1,0,2,1.062098,,,,,,0.049771,1.897569,0.936499,-0.012585,81,2,59,-0.688746,1.482645,-0.340683,,0.370178,,-1.30026,-1.844302,-0.830805,,-0.238425,0.028479,-1.182548,0.392532,,,0.341612,-0.521844,-0.178283,,,,-1.444112,-1.482314,-0.9553,-0.286239,-0.298491,,2.912592,,,3.202891,,,,1.332428,-0.003779,0.282652,,3.159928,,,-0.592981,,-0.646195,1.875373,,4.203779,1.5127,-1.081097,-0.362663,-0.576967,-0.583051,,,-0.807695,0.851287,-0.271779,-1.111671,1.145801,-0.261378,,,0.366157,0.405923,-0.148441,-0.146584,-0.484949,2.462649,-0.544341,-0.199138,1.638287,-0.311347,0.217322,-1.571629,0.598442,,,,,,,,,
1,0,7,1.083765,,,,,,0.034801,1.365109,0.866335,-0.009643,11,7,76,-1.025133,0.925701,-0.523978,,0.21753,,-2.165407,-1.268907,2.111038,,0.484647,1.033527,-0.771244,0.504023,,,0.240051,0.634331,1.433624,,,,1.111958,1.432608,1.147511,-0.17079,-0.065815,,-1.086018,,,-1.538898,,,,0.635281,1.416676,0.927752,,2.450088,,,-0.909762,,-0.857625,1.501932,,0.078696,-0.019289,-1.081097,-0.571457,-0.229733,-0.426658,,,-0.865701,-0.507224,-0.487616,-0.91007,1.927954,-0.131662,,,-0.315205,-0.304216,-0.353955,-0.318037,-1.926957,-1.510374,-0.155575,-0.327899,-0.127712,-0.237061,1.023567,1.464319,-0.335524,0.635954,0.173196,3.124839,1.068124,0.436262,1.311089,0.116055,0.073651,0.164904
1,0,9,2.015567,,,,,,0.052724,1.93197,0.964913,-0.015632,11,7,76,-1.534853,-0.174624,-0.603317,,-0.212626,,-1.962477,-0.819575,0.850963,,1.199708,0.503995,-0.922459,0.257535,,,0.014172,-0.445241,-0.089754,,,,1.071689,1.26891,2.002604,-0.088542,-0.029438,,-0.250594,,,0.396369,,,,-0.308828,16.23712,7.316531,,0.93571,,,-1.27724,,-1.218835,1.369809,,0.264098,0.186981,-1.081097,-0.500489,-0.450951,-0.537666,,,-1.314306,-0.030444,-0.700091,-1.139296,-0.130347,-0.743521,,,-0.247459,-0.21729,-0.28127,-0.276687,0.416857,-0.67107,-0.813518,0.921864,1.101624,-1.50329,0.962854,1.608399,-1.01472,0.248801,-0.104118,-0.110137,0.168204,0.112685,0.459633,0.041742,0.04499,0.063626
1,0,10,0.77363,,,,,,0.067796,3.182509,0.779314,-0.016156,42,5,150,-1.060049,0.960094,-0.372507,,0.163267,,-1.410131,-1.132914,0.204911,,-0.282548,-0.318057,-1.459293,-0.941575,,,0.562559,-0.374894,0.08647,,,,0.896635,0.933808,2.027686,0.098274,0.185163,,-2.391001,,,-2.38522,,,,-0.013896,7.63488,3.956042,,0.751485,,,-0.386404,,-1.661961,1.23,,0.253676,-0.049409,-1.081097,-0.448618,-0.331322,-0.374927,,,-1.15283,0.702894,-0.363198,-0.972505,0.326733,-0.64355,,,-0.202315,-0.316041,-0.159497,-0.320991,-0.013016,-0.396741,-0.277027,-1.123182,-0.717669,-1.388763,-2.123101,-0.457139,-1.967546,-1.626361,-0.753249,-0.433064,-1.753868,-0.806781,-2.706254,-0.455618,-0.197604,-0.864137
1,0,13,0.848667,,,,,,0.028705,1.149839,0.756712,-0.012358,50,1,522,0.202192,8.836797,4.562393,,5.043127,,-1.280806,-1.494693,-1.329525,,-0.320104,2.082726,-0.672189,1.076837,,,-0.588403,4.61185,4.293418,,,,3.202066,2.906221,3.076441,0.381584,0.209283,,,,,,,,,-0.011848,-0.004332,-0.011676,,2.448127,,,-2.735469,,-0.472805,0.239302,,-6.354319,-2.792035,-1.081097,,,,,,1.031243,24.793123,8.173369,-0.796975,-0.567876,-0.607295,,,-0.327489,-0.221774,-0.372921,-0.3267,0.283354,-5.0,-0.012748,-5.0,-5.0,-5.0,-0.312899,-0.841574,-0.183077,,,,,,,,,
1,0,15,0.615878,,,,,,0.038281,1.379171,0.388855,-0.012173,9,3,25,-0.89682,-0.458682,-0.817882,,0.05688,,-1.455759,-1.412851,0.171671,,-0.973531,-1.042202,-0.694158,0.994613,,,0.024019,-0.363502,-0.343292,,,,-1.600792,-1.373851,0.895236,0.17924,0.126553,,,,,,,,,-0.010449,-0.002266,-0.013021,,0.293015,,,-1.719882,,-1.569736,0.42513,,-0.372967,-0.340758,-1.081097,,,,,,-1.122836,-0.312645,-0.855207,-1.088937,-0.107421,-0.549493,,,1.836962,1.995013,4.822402,2.302238,-0.24305,1.759308,0.848355,-0.332405,0.650443,-0.740239,-0.338148,-0.658097,-1.342462,,,,,,,,,
1,0,16,1.376948,,,,,,0.039825,2.87378,0.480149,-0.01275,11,7,76,-0.865781,0.899471,-0.068091,,1.137149,,-1.750905,-1.089725,0.556182,,0.308664,0.622673,-1.228022,0.083183,,,0.525753,0.114926,0.63587,,,,-0.16529,-0.415782,-2.221012,-0.033943,0.032193,,-2.075244,,,-1.574746,,,,-1.4411,-4.794304,-2.352341,,1.973768,,,-1.431793,,-0.63167,0.898689,,1.458067,0.773695,-1.081097,,,,,,-1.149468,0.219897,-0.495558,-0.837325,2.732417,0.430316,,,-0.308821,-0.239087,-0.204367,-0.289617,-1.443583,-1.164387,-1.506644,-0.271647,-0.387036,-1.798204,2.017544,0.528743,-1.084174,-1.389964,-0.35261,-0.175036,-0.811313,-0.525263,-0.691823,0.014288,0.037717,0.021741
1,0,19,2.211876,,,,,,0.034414,1.416918,0.833757,-0.005772,4,3,11,-0.837793,1.125798,-0.286371,,-0.758309,,-1.457058,-1.962869,0.189085,,0.965976,1.108526,-0.75213,0.613145,,,-0.480373,0.142785,0.293518,,,,-2.434284,-1.227646,3.495409,0.105799,0.101932,,-0.147172,,,0.380489,,,,1.288574,-56.744083,-6.394832,,0.792911,,,-0.166957,,-1.716459,2.369105,,-1.292833,-0.810182,-1.081097,,,,,,-0.763306,0.408507,-0.356284,-1.431947,1.009419,-0.304862,,,-0.403787,-0.287361,-0.340536,-0.29125,-0.221735,-0.135975,-0.471902,-0.896692,-0.124874,-1.306144,-1.350804,-0.006192,-2.700724,1.588425,0.913681,4.615185,1.8067,0.995739,3.996884,0.01929,0.042577,0.036325


In [None]:
lags_df = pl.read_parquet(path + 'lags.parquet/').select(pl.all().shrink_dtype())
print(lags_df.shape)
lags_df

In [None]:
def lgb_train(train_data, y):
    weights = train_data['weight']
    
    unique_date_ids = train_data['date_id'].unique()
    train_date_id_cut = int(unique_date_ids.max() * 0.85)
    
    X_train = train_data.filter(pl.col('date_id') <= train_date_id_cut).drop(['date_id', 'time_id', 'symbol_id', 'weight']).select(pl.all().shrink_dtype()).to_pandas()
    X_val = train_data.filter(pl.col('date_id') > train_date_id_cut).drop(['date_id', 'time_id', 'symbol_id', 'weight']).select(pl.all().shrink_dtype()).to_pandas()

    print(X_train.shape[0] / train_data.shape[0])
    
    y_train = y[:X_train.shape[0]].to_pandas()
    y_val = y[X_train.shape[0]:].to_pandas()
    
    weights_train = weights[:X_train.shape[0]].to_pandas()
    weights_val = weights[X_train.shape[0]:].to_pandas()

    print(X_train.shape)
    display(X_train.head())

    base_params = {
        'verbosity': -1,
        'learning_rate': 0.03,
        'feature_fraction': 0.8,
        'device': 'gpu',
        'early_stopping_round': 30,
        'lambda_l2': 100
    }
    
    model = LGBMRegressor(
        **base_params,
        n_estimators=100000
    )

    model.fit(X_train, y_train, sample_weight=weights_train, eval_set=[(X_train, y_train), (X_val, y_val)], eval_sample_weight=[weights_train, weights_val], callbacks=[log_evaluation(period=50)])#, categorical_feature=['feature_09', 'feature_10', 'feature_11'])

    val_preds = model.predict(X_val)

    plt.figure()
    lgb.plot_metric(model)
    plt.ylim(0, 1)
    plt.show()

    models_path = path + 'my_folder/models/20241227_01/'

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    with open(models_path + "lgb_model.pkl", 'wb') as file:
        pickle.dump(model, file)

    print('Val R2 score is:', r2_score(y_val, val_preds))
    print('Val Weighted R2 score is:', r2_score(y_val, val_preds, sample_weight=weights_val))

    sample_val = X_val.sample(frac=0.001)
    sample_y = y_val.loc[sample_val.index]

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X=sample_val, y=sample_y)
    shap_importance = np.abs(shap_values).mean(axis=0)

    return shap_importance

In [None]:
first_shap_importance = lgb_train(train_df, y_sr)

In [None]:
cols = train_df.drop(['date_id', 'time_id', 'symbol_id']).columns
imp_df = pd.DataFrame(sorted(zip(cols, first_shap_importance)), columns=['Feature', 'Importance']).sort_values('Importance', ascending=False)

In [None]:
print(imp_df.shape)

In [None]:
imp_df

In [None]:
unimportant_df = imp_df[imp_df['Importance'] <= imp_df['Importance'].quantile(0.3)]
unimportant_cols = unimportant_df['Feature'].tolist()

In [None]:
train_selected_df = train_df.drop(unimportant_cols)
print(train_selected_df.shape)
train_selected_df.head()

In [None]:
second_shap_importance = lgb_train(train_selected_df, y_sr)