In [2]:
import pandas as pd
import polars as pl
import numpy as np
import os
from tqdm.auto import tqdm
from matplotlib import pyplot as plt
import pickle

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
import lightgbm as lgb
from xgboost import XGBRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

# import kaggle_evaluation.jane_street_inference_server

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
train = pl.scan_parquet("./preprocessed_dataset/training.parquet").collect().to_pandas()
valid = pl.scan_parquet("./preprocessed_dataset/validation.parquet").collect().to_pandas()
train.shape, valid.shape

((27256944, 129), (7397456, 129))

In [4]:
train.head()

Unnamed: 0,date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8,partition_id,responder_0_lag_1_min,responder_0_lag_1_max,responder_0_lag_1_mean,responder_0_lag_1_sum,responder_1_lag_1_min,responder_1_lag_1_max,responder_1_lag_1_mean,responder_1_lag_1_sum,responder_2_lag_1_min,responder_2_lag_1_max,responder_2_lag_1_mean,responder_2_lag_1_sum,responder_3_lag_1_min,responder_3_lag_1_max,responder_3_lag_1_mean,responder_3_lag_1_sum,responder_4_lag_1_min,responder_4_lag_1_max,responder_4_lag_1_mean,responder_4_lag_1_sum,responder_5_lag_1_min,responder_5_lag_1_max,responder_5_lag_1_mean,responder_5_lag_1_sum,responder_6_lag_1_min,responder_6_lag_1_max,responder_6_lag_1_mean,responder_6_lag_1_sum,responder_7_lag_1_min,responder_7_lag_1_max,responder_7_lag_1_mean,responder_7_lag_1_sum,responder_8_lag_1_min,responder_8_lag_1_max,responder_8_lag_1_mean,responder_8_lag_1_sum
0,1000,0,0,3.324375,-0.276179,-0.655325,-0.40481,-0.349785,-2.882722,0.222446,2.04136,0.077636,-0.2129,11,7,76,-0.676669,0.433896,-0.355392,-0.325359,-0.492663,-0.312569,-2.078509,-1.272981,0.411098,-0.144264,0.86681,0.739595,0.312714,-0.260926,1.02241,2.206249,0.694807,-0.205259,-0.363403,-0.219138,-0.757111,-0.329624,-1.524193,-1.683511,-1.642109,-0.073022,-0.417487,0.535187,1.487399,0.721523,0.569957,-0.293067,0.759278,-1.505769,1.269841,1.11414,0.245846,0.316328,1.741328,2.114579,1.746721,0.676324,-0.946189,-0.114278,-0.96018,1.436662,0.913091,2.705352,1.023804,1.415883,-0.28104,-0.141356,-0.280767,-1.607651,-1.580315,-0.779298,0.2267,-0.343499,-1.106544,0.829147,-0.559327,-0.398754,-0.345258,-0.201428,-0.238426,-0.317972,-0.307707,-0.322415,0.021479,0.29822,0.207253,-0.629241,0.283405,0.496563,-0.696224,0.288554,5,-5.0,5.0,-0.004038,-136.791397,-2.510265,5.0,-0.029304,-992.809082,-5.0,5.0,0.000356,12.054842,-5.0,5.0,-0.005787,-196.050095,-4.482391,4.49442,-0.010955,-371.140717,-5.0,5.0,-0.006098,-206.614258,-5.0,5.0,0.000154,5.210876,-4.959575,5.0,0.016449,557.28479,-5.0,5.0,-0.004793,-162.399887
1,1000,0,1,4.711303,-0.418316,-0.762019,-0.43368,-0.616798,-2.57797,0.205137,2.825696,0.057949,-0.334825,11,7,76,-1.086459,0.187593,-0.468343,-0.66183,-0.715271,-0.533931,-2.1003,-1.40754,0.307116,0.023577,2.056575,1.128782,0.405507,-0.028294,0.41147,1.283333,1.51304,-0.577109,-0.717683,0.02388,-0.941301,0.112354,-0.443716,-0.916682,0.187367,-0.104276,-0.280721,1.313757,0.678723,0.894007,-0.14015,-0.552522,-0.254354,-1.539251,1.249265,0.383563,0.544622,0.424014,1.022142,1.396059,-0.79931,0.306575,1.002958,-0.186624,-1.783038,2.750445,1.651704,1.351758,0.904945,1.415883,-0.385043,-0.392026,-0.400931,-2.225126,-1.699629,-0.378554,0.14099,-0.509071,-0.801501,0.154954,-0.657456,-0.252326,-0.273913,-0.286799,-0.230067,-0.31766,-0.292564,-0.485403,-0.368653,-0.31481,0.13614,-0.228491,0.11328,0.529877,-0.063614,0.379624,5,-5.0,5.0,-0.004038,-136.791397,-2.510265,5.0,-0.029304,-992.809082,-5.0,5.0,0.000356,12.054842,-5.0,5.0,-0.005787,-196.050095,-4.482391,4.49442,-0.010955,-371.140717,-5.0,5.0,-0.006098,-206.614258,-5.0,5.0,0.000154,5.210876,-4.959575,5.0,0.016449,557.28479,-5.0,5.0,-0.004793,-162.399887
2,1000,0,2,3.028847,-0.724897,-1.223187,-0.452174,-0.523907,-2.61743,0.277329,3.187561,0.106466,-0.605264,81,2,59,-0.843919,1.43855,-0.277886,-0.560161,-0.230204,-0.607569,-1.796766,-2.026716,-1.291414,-0.15041,0.849451,0.358889,-0.965452,-1.009622,0.523362,1.558008,1.162534,-0.706968,-0.415798,-0.067852,-0.719535,-0.220722,-0.784527,-1.42784,-1.550996,0.21641,0.067241,1.410361,0.922301,0.151298,0.787733,-1.43247,-0.137687,-1.718383,0.873165,-0.608555,-1.130368,-1.85057,1.61765,1.1532,0.091229,1.956982,-1.624783,1.727688,-1.461313,1.325366,1.603095,-1.778872,-0.564926,1.415883,-0.270195,-0.21319,-0.340736,-1.803492,-2.130251,-1.020369,1.126988,-0.209642,-0.863716,0.976765,-0.333443,-0.296884,-0.170356,0.665298,0.42544,-0.060631,-0.055999,0.055522,0.09482,-0.226534,0.607437,-0.308555,-0.184021,0.746983,-0.457417,-5.3e-05,5,-5.0,5.0,-0.004038,-136.791397,-2.510265,5.0,-0.029304,-992.809082,-5.0,5.0,0.000356,12.054842,-5.0,5.0,-0.005787,-196.050095,-4.482391,4.49442,-0.010955,-371.140717,-5.0,5.0,-0.006098,-206.614258,-5.0,5.0,0.000154,5.210876,-4.959575,5.0,0.016449,557.28479,-5.0,5.0,-0.004793,-162.399887
3,1000,0,3,2.099438,-0.717159,-0.259479,-0.522695,-0.066547,-2.712632,0.151858,2.101236,0.050587,-0.202774,4,3,11,-1.008739,0.739211,-0.600964,-0.727756,-0.312651,-0.785713,-1.890998,-2.191556,-0.298989,0.435727,0.454574,-0.125127,2.71948,0.807435,-0.435139,-0.559512,-0.398334,-0.446672,-1.123957,0.402513,-0.795163,-0.521615,-0.628105,-0.185086,-1.043298,-0.127314,-0.161652,-0.467614,1.032857,0.149189,0.193527,0.106351,-0.202571,-0.681127,1.176033,1.032387,2.696958,1.046625,-0.109621,2.372714,-0.277001,1.05145,0.000985,0.751337,-0.854866,1.854103,0.506535,4.236681,1.543532,1.415883,-0.362911,-0.087396,-0.209432,-1.323028,-1.655476,-0.806757,0.649669,-0.410411,-1.075513,0.434237,-0.629315,0.586753,0.629628,3.187536,3.547002,0.552897,0.628914,-1.045448,-0.660787,-1.386082,0.171568,0.124653,2.525337,0.941218,0.571101,3.878372,5,-5.0,5.0,-0.004038,-136.791397,-2.510265,5.0,-0.029304,-992.809082,-5.0,5.0,0.000356,12.054842,-5.0,5.0,-0.005787,-196.050095,-4.482391,4.49442,-0.010955,-371.140717,-5.0,5.0,-0.006098,-206.614258,-5.0,5.0,0.000154,5.210876,-4.959575,5.0,0.016449,557.28479,-5.0,5.0,-0.004793,-162.399887
4,1000,0,4,3.166049,-0.377845,-0.360645,-0.641121,-0.508439,-2.661481,0.072445,1.045034,0.032492,-0.184668,15,1,9,-0.755182,0.091499,-0.642677,-0.950556,-0.327257,-0.899928,-2.950822,-1.13996,-0.713557,0.542368,0.820871,-0.418231,6.986126,2.24682,-1.285617,-0.375309,0.539631,-0.509956,-0.693066,0.716088,0.336757,0.453049,-0.60723,-2.030101,-3.161393,-0.083239,-0.241423,0.369969,0.856179,0.126312,0.027258,-0.579862,-0.482137,-1.087101,1.551772,0.059531,0.512374,-0.079132,1.077313,2.150784,0.131066,1.079772,-0.302273,0.578545,-1.327679,2.083986,0.744855,3.022675,1.44955,1.415883,-0.332068,-0.013393,-0.284892,-1.493068,-1.513046,-1.160538,0.054993,-0.522811,-1.0998,0.071865,-0.62714,0.042729,0.2352,5.050842,3.694357,1.085351,0.583265,-0.2549,-0.189837,0.429034,0.117711,-0.182523,-0.252368,0.204584,-0.15876,-0.68163,5,-5.0,5.0,-0.004038,-136.791397,-2.510265,5.0,-0.029304,-992.809082,-5.0,5.0,0.000356,12.054842,-5.0,5.0,-0.005787,-196.050095,-4.482391,4.49442,-0.010955,-371.140717,-5.0,5.0,-0.006098,-206.614258,-5.0,5.0,0.000154,5.210876,-4.959575,5.0,0.016449,557.28479,-5.0,5.0,-0.004793,-162.399887


In [5]:
valid.head()

Unnamed: 0,date_id,time_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_09,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8,partition_id,responder_0_lag_1_min,responder_0_lag_1_max,responder_0_lag_1_mean,responder_0_lag_1_sum,responder_1_lag_1_min,responder_1_lag_1_max,responder_1_lag_1_mean,responder_1_lag_1_sum,responder_2_lag_1_min,responder_2_lag_1_max,responder_2_lag_1_mean,responder_2_lag_1_sum,responder_3_lag_1_min,responder_3_lag_1_max,responder_3_lag_1_mean,responder_3_lag_1_sum,responder_4_lag_1_min,responder_4_lag_1_max,responder_4_lag_1_mean,responder_4_lag_1_sum,responder_5_lag_1_min,responder_5_lag_1_max,responder_5_lag_1_mean,responder_5_lag_1_sum,responder_6_lag_1_min,responder_6_lag_1_max,responder_6_lag_1_mean,responder_6_lag_1_sum,responder_7_lag_1_min,responder_7_lag_1_max,responder_7_lag_1_mean,responder_7_lag_1_sum,responder_8_lag_1_min,responder_8_lag_1_max,responder_8_lag_1_mean,responder_8_lag_1_sum
0,1500,0,0,4.426604,0.299048,-2.586622,0.30078,0.685828,2.150813,-0.692489,-0.159566,-0.829518,0.505951,11,7,76,-0.973271,2.296871,-0.123932,-0.928972,-0.526217,-0.808886,-1.656028,-1.476867,0.454376,-0.049191,0.897194,0.927829,3.011863,0.280865,0.985659,0.529341,0.195879,-0.90816,-0.999317,-0.057682,-2.967964,-0.906526,-2.636707,-3.170421,-2.922452,-0.019737,0.283299,-1.159388,1.1307,-0.701951,-1.590536,-0.189725,-1.657003,-1.188985,1.142485,-0.677676,2.368523,-0.088314,-2.308198,2.195963,-1.299398,-0.930982,0.449019,-1.569682,-0.548544,2.261573,-1.144345,1.746195,0.641324,-0.644595,-0.527839,-0.293633,-0.495875,-1.291651,-1.418662,-0.738298,0.7786,-0.316974,-0.652625,2.652362,0.134199,0.275032,0.278624,2.72201,2.666984,0.572996,0.603494,-0.433041,0.073844,-0.385438,-1.170643,0.676503,-0.609011,-1.314976,0.819128,-0.747843,8,-5.0,5.0,-0.059639,-2251.505127,-5.0,5.0,-0.159751,-6030.923828,-5.0,5.0,-0.013975,-527.591187,-5.0,5.0,0.004935,186.320587,-5.0,5.0,0.034419,1299.403076,-5.0,5.0,-0.003542,-133.705368,-5.0,5.0,0.026963,1017.908264,-4.250791,5.0,0.089488,3378.341553,-5.0,5.0,0.003796,143.312149
1,1500,0,1,2.180898,0.084839,-2.740633,0.845043,-0.109019,2.475105,-1.215764,-0.19648,-0.740933,0.494199,11,7,76,-0.865345,2.34736,-0.008078,-0.625953,-0.766337,-0.59456,-1.044726,-2.061352,0.13683,0.122308,-0.102373,0.570324,2.77765,-0.858358,-1.851242,-0.743143,0.322359,-0.633344,-1.032915,0.108102,1.324808,-0.626489,0.624657,0.875317,-1.312669,0.109273,0.214136,-0.693444,0.124902,0.122866,-1.23377,-1.08617,-0.70481,-1.753582,1.150372,-0.293006,-0.90724,-0.437529,-1.296799,0.017573,-0.470545,-2.055546,-0.345634,-0.556286,-1.906425,1.674549,-0.967334,-0.706519,-0.469597,-0.644595,-0.339356,-0.346034,-0.331003,-1.533621,-1.740196,-0.552159,1.703114,0.008995,-1.000061,2.383818,-0.0144,0.282659,0.350185,-0.099467,-0.112053,-0.219843,-0.337088,0.434484,0.391614,-0.332026,0.097082,1.164387,-0.708231,-0.086879,1.134315,-1.159028,8,-5.0,5.0,-0.059639,-2251.505127,-5.0,5.0,-0.159751,-6030.923828,-5.0,5.0,-0.013975,-527.591187,-5.0,5.0,0.004935,186.320587,-5.0,5.0,0.034419,1299.403076,-5.0,5.0,-0.003542,-133.705368,-5.0,5.0,0.026963,1017.908264,-4.250791,5.0,0.089488,3378.341553,-5.0,5.0,0.003796,143.312149
2,1500,0,2,2.481837,-0.012348,-1.991983,-0.227444,0.324845,2.569635,-1.38426,-0.175967,-0.802814,0.628703,81,2,59,-0.824781,0.663235,-0.469845,0.252981,-0.14856,-0.576213,-1.488687,-1.308336,0.489053,-0.154577,-0.363205,-0.090004,2.414092,-0.563555,0.722114,0.082055,-0.421517,-0.788858,-0.546105,-0.256529,1.490553,-0.715426,1.212882,1.49263,-1.857415,0.263952,0.447342,-1.472887,0.951669,-0.67118,-1.22089,-0.808094,-1.015753,-1.23792,0.315994,-0.316383,-2.503886,-1.571836,-1.456378,0.474543,-0.996384,-0.82476,-1.311872,-0.720342,-1.798486,1.062707,-0.816079,-3.962492,-1.38563,-0.644595,-0.251004,-0.135457,-0.282759,-1.52327,-1.929727,-0.830298,0.820165,-0.249728,-1.275444,0.453026,-0.56945,1.126951,1.2782,4.202456,3.410814,0.976747,0.474274,0.39952,0.06456,-0.860682,-1.770414,-1.573436,-1.937909,-2.544681,-1.296882,-2.472254,8,-5.0,5.0,-0.059639,-2251.505127,-5.0,5.0,-0.159751,-6030.923828,-5.0,5.0,-0.013975,-527.591187,-5.0,5.0,0.004935,186.320587,-5.0,5.0,0.034419,1299.403076,-5.0,5.0,-0.003542,-133.705368,-5.0,5.0,0.026963,1017.908264,-4.250791,5.0,0.089488,3378.341553,-5.0,5.0,0.003796,143.312149
3,1500,0,3,2.257297,0.038545,-2.354101,0.211194,0.460168,1.853636,-1.191883,-0.27166,-1.081388,0.454297,4,3,11,-0.54563,1.557022,-0.272898,-0.888821,1.21447,-0.501437,-1.575279,-1.76565,-0.17195,-0.039821,-0.155127,-0.856117,2.692951,-0.741403,-0.074333,-0.360155,-0.803108,-0.940098,-0.72795,-0.039504,1.730812,-0.476625,1.543752,0.91884,-2.088719,0.159027,0.221379,-1.308271,1.003886,-0.339578,-2.150762,-0.936004,-1.494127,-1.343386,1.453148,-2.383814,-0.651252,-0.87494,-2.061404,2.58638,-1.557489,-1.590055,-1.303314,-0.913252,-0.245286,1.10667,-1.929316,-0.998047,-0.593398,-0.644595,-0.200456,-0.144005,-0.23722,-1.344301,-1.994867,-0.900081,1.129493,-0.236318,-1.198223,2.055767,-0.128319,1.779203,1.471522,20.604864,17.47576,9.281943,7.214635,-0.700328,-0.29134,-0.437914,-0.74813,-0.896065,-1.244823,-0.711154,-1.123474,-2.053464,8,-5.0,5.0,-0.059639,-2251.505127,-5.0,5.0,-0.159751,-6030.923828,-5.0,5.0,-0.013975,-527.591187,-5.0,5.0,0.004935,186.320587,-5.0,5.0,0.034419,1299.403076,-5.0,5.0,-0.003542,-133.705368,-5.0,5.0,0.026963,1017.908264,-4.250791,5.0,0.089488,3378.341553,-5.0,5.0,0.003796,143.312149
4,1500,0,4,2.768874,0.327654,-2.558024,0.554381,0.622369,2.664366,-0.819317,-0.099036,-0.496227,0.23509,15,1,9,-0.894622,0.72856,-0.523362,-1.091727,-0.482817,-0.808715,-1.911317,-1.727229,-0.832224,0.331219,0.204233,-0.511465,6.609023,2.970973,-1.593679,-1.474862,-0.27928,-0.643428,-0.499375,0.285893,1.364949,-1.03737,1.536376,2.07727,2.636199,0.173928,0.147402,-0.288785,0.222003,0.439775,-0.167832,-2.114571,0.496864,-1.364847,0.590064,-1.552628,-1.387474,-0.567388,-0.232144,0.202221,0.522065,0.63643,-1.091887,1.102161,-2.55602,0.529221,-0.122342,-1.541575,-1.399525,-0.644595,-0.564423,-0.272925,-0.463661,-1.401917,-1.432396,-0.723318,0.592202,-0.315948,-1.127284,0.173116,-0.611687,0.793898,0.898277,3.475598,2.057356,-0.432939,-0.408113,0.242107,0.02259,-0.685559,0.239782,0.739654,0.847956,0.197136,1.138621,1.099223,8,-5.0,5.0,-0.059639,-2251.505127,-5.0,5.0,-0.159751,-6030.923828,-5.0,5.0,-0.013975,-527.591187,-5.0,5.0,0.004935,186.320587,-5.0,5.0,0.034419,1299.403076,-5.0,5.0,-0.003542,-133.705368,-5.0,5.0,0.026963,1017.908264,-4.250791,5.0,0.089488,3378.341553,-5.0,5.0,0.003796,143.312149


In [6]:
class CONFIG:
    seed = 42
    target_col = "responder_6"
    cont_ftrs = ["weight"] + \
                [f"feature_{i:02d}" for i in range(9)] +  \
                [f"feature_{i:02d}" for i in range(12, 79)] + \
                [f"responder_{i}_lag_1_min" for i in range(9)] + \
                [f"responder_{i}_lag_1_max" for i in range(9)] + \
                [f"responder_{i}_lag_1_mean" for i in range(9)] + \
                [f"responder_{i}_lag_1_sum" for i in range(9)]
    onehot_ftrs = ["feature_09", "feature_10", "feature_11"]
    feature_cols = cont_ftrs + onehot_ftrs

In [7]:
X_train = train[ CONFIG.feature_cols ]
y_train = train[ CONFIG.target_col ]
w_train = train[ "weight" ]
X_valid = valid[ CONFIG.feature_cols ]
y_valid = valid[ CONFIG.target_col ]
w_valid = valid[ "weight" ]

X_train.shape, y_train.shape, w_train.shape, X_valid.shape, y_valid.shape, w_valid.shape

((27256944, 116),
 (27256944,),
 (27256944,),
 (7397456, 116),
 (7397456,),
 (7397456,))

In [8]:
# collect all the encoders into one preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'), CONFIG.onehot_ftrs),
        ('std', StandardScaler(), CONFIG.cont_ftrs)])

prep = Pipeline(steps=[('preprocessor', preprocessor)])

In [9]:
X_train.head()

Unnamed: 0,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,feature_07,feature_08,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_0_lag_1_min,responder_1_lag_1_min,responder_2_lag_1_min,responder_3_lag_1_min,responder_4_lag_1_min,responder_5_lag_1_min,responder_6_lag_1_min,responder_7_lag_1_min,responder_8_lag_1_min,responder_0_lag_1_max,responder_1_lag_1_max,responder_2_lag_1_max,responder_3_lag_1_max,responder_4_lag_1_max,responder_5_lag_1_max,responder_6_lag_1_max,responder_7_lag_1_max,responder_8_lag_1_max,responder_0_lag_1_mean,responder_1_lag_1_mean,responder_2_lag_1_mean,responder_3_lag_1_mean,responder_4_lag_1_mean,responder_5_lag_1_mean,responder_6_lag_1_mean,responder_7_lag_1_mean,responder_8_lag_1_mean,responder_0_lag_1_sum,responder_1_lag_1_sum,responder_2_lag_1_sum,responder_3_lag_1_sum,responder_4_lag_1_sum,responder_5_lag_1_sum,responder_6_lag_1_sum,responder_7_lag_1_sum,responder_8_lag_1_sum,feature_09,feature_10,feature_11
0,3.324375,-0.276179,-0.655325,-0.40481,-0.349785,-2.882722,0.222446,2.04136,0.077636,-0.2129,-0.676669,0.433896,-0.355392,-0.325359,-0.492663,-0.312569,-2.078509,-1.272981,0.411098,-0.144264,0.86681,0.739595,0.312714,-0.260926,1.02241,2.206249,0.694807,-0.205259,-0.363403,-0.219138,-0.757111,-0.329624,-1.524193,-1.683511,-1.642109,-0.073022,-0.417487,0.535187,1.487399,0.721523,0.569957,-0.293067,0.759278,-1.505769,1.269841,1.11414,0.245846,0.316328,1.741328,2.114579,1.746721,0.676324,-0.946189,-0.114278,-0.96018,1.436662,0.913091,2.705352,1.023804,1.415883,-0.28104,-0.141356,-0.280767,-1.607651,-1.580315,-0.779298,0.2267,-0.343499,-1.106544,0.829147,-0.559327,-0.398754,-0.345258,-0.201428,-0.238426,-0.317972,-0.307707,-5.0,-2.510265,-5.0,-5.0,-4.482391,-5.0,-5.0,-4.959575,-5.0,5.0,5.0,5.0,5.0,4.49442,5.0,5.0,5.0,5.0,-0.004038,-0.029304,0.000356,-0.005787,-0.010955,-0.006098,0.000154,0.016449,-0.004793,-136.791397,-992.809082,12.054842,-196.050095,-371.140717,-206.614258,5.210876,557.28479,-162.399887,11,7,76
1,4.711303,-0.418316,-0.762019,-0.43368,-0.616798,-2.57797,0.205137,2.825696,0.057949,-0.334825,-1.086459,0.187593,-0.468343,-0.66183,-0.715271,-0.533931,-2.1003,-1.40754,0.307116,0.023577,2.056575,1.128782,0.405507,-0.028294,0.41147,1.283333,1.51304,-0.577109,-0.717683,0.02388,-0.941301,0.112354,-0.443716,-0.916682,0.187367,-0.104276,-0.280721,1.313757,0.678723,0.894007,-0.14015,-0.552522,-0.254354,-1.539251,1.249265,0.383563,0.544622,0.424014,1.022142,1.396059,-0.79931,0.306575,1.002958,-0.186624,-1.783038,2.750445,1.651704,1.351758,0.904945,1.415883,-0.385043,-0.392026,-0.400931,-2.225126,-1.699629,-0.378554,0.14099,-0.509071,-0.801501,0.154954,-0.657456,-0.252326,-0.273913,-0.286799,-0.230067,-0.31766,-0.292564,-5.0,-2.510265,-5.0,-5.0,-4.482391,-5.0,-5.0,-4.959575,-5.0,5.0,5.0,5.0,5.0,4.49442,5.0,5.0,5.0,5.0,-0.004038,-0.029304,0.000356,-0.005787,-0.010955,-0.006098,0.000154,0.016449,-0.004793,-136.791397,-992.809082,12.054842,-196.050095,-371.140717,-206.614258,5.210876,557.28479,-162.399887,11,7,76
2,3.028847,-0.724897,-1.223187,-0.452174,-0.523907,-2.61743,0.277329,3.187561,0.106466,-0.605264,-0.843919,1.43855,-0.277886,-0.560161,-0.230204,-0.607569,-1.796766,-2.026716,-1.291414,-0.15041,0.849451,0.358889,-0.965452,-1.009622,0.523362,1.558008,1.162534,-0.706968,-0.415798,-0.067852,-0.719535,-0.220722,-0.784527,-1.42784,-1.550996,0.21641,0.067241,1.410361,0.922301,0.151298,0.787733,-1.43247,-0.137687,-1.718383,0.873165,-0.608555,-1.130368,-1.85057,1.61765,1.1532,0.091229,1.956982,-1.624783,1.727688,-1.461313,1.325366,1.603095,-1.778872,-0.564926,1.415883,-0.270195,-0.21319,-0.340736,-1.803492,-2.130251,-1.020369,1.126988,-0.209642,-0.863716,0.976765,-0.333443,-0.296884,-0.170356,0.665298,0.42544,-0.060631,-0.055999,-5.0,-2.510265,-5.0,-5.0,-4.482391,-5.0,-5.0,-4.959575,-5.0,5.0,5.0,5.0,5.0,4.49442,5.0,5.0,5.0,5.0,-0.004038,-0.029304,0.000356,-0.005787,-0.010955,-0.006098,0.000154,0.016449,-0.004793,-136.791397,-992.809082,12.054842,-196.050095,-371.140717,-206.614258,5.210876,557.28479,-162.399887,81,2,59
3,2.099438,-0.717159,-0.259479,-0.522695,-0.066547,-2.712632,0.151858,2.101236,0.050587,-0.202774,-1.008739,0.739211,-0.600964,-0.727756,-0.312651,-0.785713,-1.890998,-2.191556,-0.298989,0.435727,0.454574,-0.125127,2.71948,0.807435,-0.435139,-0.559512,-0.398334,-0.446672,-1.123957,0.402513,-0.795163,-0.521615,-0.628105,-0.185086,-1.043298,-0.127314,-0.161652,-0.467614,1.032857,0.149189,0.193527,0.106351,-0.202571,-0.681127,1.176033,1.032387,2.696958,1.046625,-0.109621,2.372714,-0.277001,1.05145,0.000985,0.751337,-0.854866,1.854103,0.506535,4.236681,1.543532,1.415883,-0.362911,-0.087396,-0.209432,-1.323028,-1.655476,-0.806757,0.649669,-0.410411,-1.075513,0.434237,-0.629315,0.586753,0.629628,3.187536,3.547002,0.552897,0.628914,-5.0,-2.510265,-5.0,-5.0,-4.482391,-5.0,-5.0,-4.959575,-5.0,5.0,5.0,5.0,5.0,4.49442,5.0,5.0,5.0,5.0,-0.004038,-0.029304,0.000356,-0.005787,-0.010955,-0.006098,0.000154,0.016449,-0.004793,-136.791397,-992.809082,12.054842,-196.050095,-371.140717,-206.614258,5.210876,557.28479,-162.399887,4,3,11
4,3.166049,-0.377845,-0.360645,-0.641121,-0.508439,-2.661481,0.072445,1.045034,0.032492,-0.184668,-0.755182,0.091499,-0.642677,-0.950556,-0.327257,-0.899928,-2.950822,-1.13996,-0.713557,0.542368,0.820871,-0.418231,6.986126,2.24682,-1.285617,-0.375309,0.539631,-0.509956,-0.693066,0.716088,0.336757,0.453049,-0.60723,-2.030101,-3.161393,-0.083239,-0.241423,0.369969,0.856179,0.126312,0.027258,-0.579862,-0.482137,-1.087101,1.551772,0.059531,0.512374,-0.079132,1.077313,2.150784,0.131066,1.079772,-0.302273,0.578545,-1.327679,2.083986,0.744855,3.022675,1.44955,1.415883,-0.332068,-0.013393,-0.284892,-1.493068,-1.513046,-1.160538,0.054993,-0.522811,-1.0998,0.071865,-0.62714,0.042729,0.2352,5.050842,3.694357,1.085351,0.583265,-5.0,-2.510265,-5.0,-5.0,-4.482391,-5.0,-5.0,-4.959575,-5.0,5.0,5.0,5.0,5.0,4.49442,5.0,5.0,5.0,5.0,-0.004038,-0.029304,0.000356,-0.005787,-0.010955,-0.006098,0.000154,0.016449,-0.004793,-136.791397,-992.809082,12.054842,-196.050095,-371.140717,-206.614258,5.210876,557.28479,-162.399887,15,1,9


In [10]:
# RANDOM SAMPLE 1000000 ROWS TO FORM OUR TRAIN, VAL DF
X_train = X_train.sample(100000, random_state=CONFIG.seed)
y_train = y_train[X_train.index]
w_train = w_train[X_train.index]

X_valid = X_valid.sample(20000, random_state=CONFIG.seed)
y_valid = y_valid[X_valid.index]
w_valid = w_valid[X_valid.index]


df_train = prep.fit_transform(X_train)
feature_names = prep.get_feature_names_out()
df_train = pd.DataFrame(data=df_train, columns=feature_names, index=X_train.index)

df_valid = prep.transform(X_valid)
df_valid = pd.DataFrame(data=df_valid, columns=feature_names, index=X_valid.index)



In [11]:
df_train.head()

Unnamed: 0,onehot__feature_09_2,onehot__feature_09_4,onehot__feature_09_9,onehot__feature_09_11,onehot__feature_09_12,onehot__feature_09_14,onehot__feature_09_15,onehot__feature_09_26,onehot__feature_09_30,onehot__feature_09_34,onehot__feature_09_42,onehot__feature_09_44,onehot__feature_09_46,onehot__feature_09_50,onehot__feature_09_57,onehot__feature_09_64,onehot__feature_09_68,onehot__feature_09_70,onehot__feature_09_81,onehot__feature_09_82,onehot__feature_10_1,onehot__feature_10_2,onehot__feature_10_3,onehot__feature_10_4,onehot__feature_10_5,onehot__feature_10_6,onehot__feature_10_7,onehot__feature_10_10,onehot__feature_10_12,onehot__feature_11_9,onehot__feature_11_11,onehot__feature_11_13,onehot__feature_11_16,onehot__feature_11_24,onehot__feature_11_25,onehot__feature_11_34,onehot__feature_11_40,onehot__feature_11_48,onehot__feature_11_50,onehot__feature_11_59,onehot__feature_11_62,onehot__feature_11_63,onehot__feature_11_66,onehot__feature_11_76,onehot__feature_11_150,onehot__feature_11_158,onehot__feature_11_171,onehot__feature_11_214,onehot__feature_11_230,onehot__feature_11_261,onehot__feature_11_336,onehot__feature_11_376,onehot__feature_11_388,onehot__feature_11_410,onehot__feature_11_522,onehot__feature_11_534,onehot__feature_11_539,std__weight,std__feature_00,std__feature_01,std__feature_02,std__feature_03,std__feature_04,std__feature_05,std__feature_06,std__feature_07,std__feature_08,std__feature_12,std__feature_13,std__feature_14,std__feature_15,std__feature_16,std__feature_17,std__feature_18,std__feature_19,std__feature_20,std__feature_21,std__feature_22,std__feature_23,std__feature_24,std__feature_25,std__feature_26,std__feature_27,std__feature_28,std__feature_29,std__feature_30,std__feature_31,std__feature_32,std__feature_33,std__feature_34,std__feature_35,std__feature_36,std__feature_37,std__feature_38,std__feature_39,std__feature_40,std__feature_41,std__feature_42,std__feature_43,std__feature_44,std__feature_45,std__feature_46,std__feature_47,std__feature_48,std__feature_49,std__feature_50,std__feature_51,std__feature_52,std__feature_53,std__feature_54,std__feature_55,std__feature_56,std__feature_57,std__feature_58,std__feature_59,std__feature_60,std__feature_61,std__feature_62,std__feature_63,std__feature_64,std__feature_65,std__feature_66,std__feature_67,std__feature_68,std__feature_69,std__feature_70,std__feature_71,std__feature_72,std__feature_73,std__feature_74,std__feature_75,std__feature_76,std__feature_77,std__feature_78,std__responder_0_lag_1_min,std__responder_1_lag_1_min,std__responder_2_lag_1_min,std__responder_3_lag_1_min,std__responder_4_lag_1_min,std__responder_5_lag_1_min,std__responder_6_lag_1_min,std__responder_7_lag_1_min,std__responder_8_lag_1_min,std__responder_0_lag_1_max,std__responder_1_lag_1_max,std__responder_2_lag_1_max,std__responder_3_lag_1_max,std__responder_4_lag_1_max,std__responder_5_lag_1_max,std__responder_6_lag_1_max,std__responder_7_lag_1_max,std__responder_8_lag_1_max,std__responder_0_lag_1_mean,std__responder_1_lag_1_mean,std__responder_2_lag_1_mean,std__responder_3_lag_1_mean,std__responder_4_lag_1_mean,std__responder_5_lag_1_mean,std__responder_6_lag_1_mean,std__responder_7_lag_1_mean,std__responder_8_lag_1_mean,std__responder_0_lag_1_sum,std__responder_1_lag_1_sum,std__responder_2_lag_1_sum,std__responder_3_lag_1_sum,std__responder_4_lag_1_sum,std__responder_5_lag_1_sum,std__responder_6_lag_1_sum,std__responder_7_lag_1_sum,std__responder_8_lag_1_sum
2145488,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.53565,-0.631741,-0.766635,-0.954914,-0.948088,0.278891,-0.047051,-0.108702,0.033896,0.405188,-0.221769,-0.487554,-0.453303,-0.648023,-0.745711,-0.405036,0.499802,-1.090882,2.229815,0.039898,0.319465,-0.574945,1.49302,1.216924,-1.416103,-0.733892,0.324137,-0.468706,-0.348032,0.000699,-0.612218,0.319372,-1.077794,-0.460511,0.700167,3.285591,0.746022,0.10202,-0.414084,0.446021,1.177718,0.590798,1.689185,0.046352,-1.678194,0.052983,0.505166,0.428965,0.920442,-0.151087,0.156768,0.061356,1.134846,1.748104,-0.180267,-1.618331,0.07288,0.59622,0.491989,0.334382,-0.788907,-0.56902,-0.713831,0.980735,-1.988251,-0.240883,-0.32147,-0.52302,-0.236968,-0.283095,-0.48734,0.295725,0.278339,-0.150217,-0.224263,0.323967,0.129718,-0.490383,-1.059237,-0.354927,-0.518021,-0.07293,-0.428887,-0.236823,-0.605745,-0.055308,0.464919,1.025177,0.34874,0.261383,0.194528,0.209277,0.080325,0.250357,0.0,0.559217,0.468058,0.404876,-0.168008,-0.621101,-0.057431,-1.526612,-1.434947,-1.339073,0.570783,0.477493,0.414108,-0.159098,-0.602562,-0.05307,-1.652526,-1.493626,-1.593503
15553628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.964327,1.8965,0.008785,1.973551,2.402066,-1.781799,-0.215854,0.299782,-0.482597,0.213265,-0.130219,-0.27012,-0.297617,-0.605851,-0.397366,-0.7504,1.012835,0.362228,0.616728,-0.106614,1.199999,-0.050014,1.146201,0.58733,0.722143,0.413563,-0.171605,-0.657555,-0.524868,-0.097534,1.863463,0.534177,1.467227,1.495962,-0.12381,0.47095,0.648425,0.558961,0.465601,0.437037,0.057181,-0.254461,1.038934,0.011441,-1.238769,-0.061832,1.186109,0.097048,-0.146533,0.927486,-0.198875,0.135363,1.712032,1.237185,-0.196582,-0.326417,-0.0489,0.840146,0.121195,-0.907713,-0.566606,-0.325359,-0.411516,0.691675,-0.150426,-0.171114,-0.490003,-0.283093,-0.059219,-0.070329,-0.285011,-0.156076,-0.255395,-0.149135,-0.336697,-0.093866,-0.217292,0.54539,0.076751,2.579108,-0.518021,0.184453,-0.428887,-0.236823,-0.605745,-0.055308,0.464919,0.726379,0.34874,0.261383,-0.973469,0.209277,0.080325,0.250357,0.0,-0.240547,-0.247064,-0.704439,0.08732,0.559972,0.07879,0.519714,1.372435,0.47324,-0.256603,-0.253716,-0.743057,0.086335,0.57063,0.07823,0.581438,1.499583,0.576447
1553481,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133488,-0.264113,-1.459426,-0.363962,-0.39023,-1.239129,1.994956,0.170114,1.343807,-1.02954,-0.497351,-0.087092,-0.413625,-0.336419,-0.211959,-0.293836,-0.08088,1.191986,-0.784087,-0.281552,0.2491,0.193692,-0.173084,-1.263809,1.561039,1.171778,1.003618,-0.498111,-0.130262,-0.378028,1.026988,-0.447949,0.683963,0.553711,-0.081576,-0.343238,-0.383984,1.996366,0.278795,1.378061,1.0101,-0.280524,0.309054,1.38408,1.525397,1.566748,0.984583,1.19847,1.380548,-0.806592,1.354917,0.678384,-0.476675,0.279548,1.406592,1.727007,1.090223,0.81645,0.783795,-1.747027,-0.850993,-0.468117,-0.791297,0.346975,1.447828,-0.505007,-0.243953,-0.536489,-0.592771,0.091142,-0.380395,-0.103777,-0.491924,-0.144936,-0.221728,-0.347803,-0.291524,0.62189,-0.202783,-0.354927,2.906807,2.614028,2.633068,-0.236823,2.339045,-0.055308,0.464919,0.97065,0.34874,0.261383,0.435853,-1.142607,0.080325,0.250357,0.0,0.118361,-0.21524,0.792099,0.000549,-0.266457,0.050485,-0.388465,-0.679986,-0.031906,0.127494,-0.191562,0.762977,0.006629,-0.239614,0.051347,-0.400223,-0.675263,-0.043414
10582661,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.03027,0.242364,0.30771,0.45323,0.407059,0.120781,0.345025,-0.936564,-1.196861,-0.420299,-0.326051,-0.516209,-0.174636,-0.624744,-0.612568,-0.445188,0.286781,0.95196,-2.668459,-0.307748,-1.174752,-1.099802,0.190784,0.013404,0.433957,-1.454882,-1.942017,-0.596864,-0.550993,-0.224729,1.579077,0.28949,1.607359,1.789235,-0.598931,-0.178898,-0.289869,-1.690794,-0.020247,-0.402641,-0.097901,0.758264,-1.222387,0.726831,0.128489,0.282226,-0.102693,-0.482137,-1.594098,-0.902795,-0.894858,-0.692916,1.193425,-0.946607,0.552675,1.122266,0.338854,0.059234,-0.448328,-1.018,-0.582004,-0.549185,-0.729826,0.363365,1.082701,-0.395665,-0.533084,-0.28968,-0.159386,-0.443032,-0.340041,0.360697,0.618893,1.232973,0.676672,0.792474,0.823284,0.842549,-0.06169,-0.354927,-0.518021,0.566087,-0.428887,-0.236823,-0.137276,-0.055308,0.426282,-0.959716,0.10784,0.261383,0.435853,0.209277,0.080325,0.250357,0.0,-0.556181,-0.393322,-0.8996,0.124946,0.307685,0.064175,0.752212,0.824553,0.276903,-0.611886,-0.420194,-0.977101,0.125705,0.318225,0.06335,0.867203,0.925507,0.339062
21133891,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.180561,-1.226951,1.37053,-1.527523,-1.192635,0.022302,0.320859,0.014522,0.843754,0.136674,-0.215831,0.061703,-0.204736,-0.482099,-0.598258,-0.321705,-0.22063,0.071443,-1.711479,-0.319776,0.284075,-0.429017,0.730749,0.237601,1.924245,1.47221,1.409085,-0.178937,-0.340485,-0.402507,-1.645914,-1.344109,-1.084417,-1.989319,-2.045606,-0.36837,-0.696489,1.037744,0.289472,-0.27836,1.38495,0.553482,1.202161,0.086357,1.230531,0.781687,0.716812,0.549259,1.484559,0.490105,1.371003,1.005599,-0.04807,0.512802,0.348074,2.273053,1.527938,0.463023,1.1432,-0.813763,0.140275,0.14863,-0.021925,-0.17924,-0.184693,-0.454058,-0.289758,-0.275429,-0.064386,0.430841,0.041165,-0.381098,-0.287368,0.427683,0.314939,-0.491095,-0.410592,-0.490383,-1.059237,-0.354927,-0.518021,-0.855114,-0.428887,-0.236823,-0.605745,-0.055308,0.464919,1.025177,0.34874,0.261383,0.435853,0.209277,0.080325,0.250357,0.0,0.974492,0.748627,1.231009,0.05554,-0.155807,0.095572,-0.413768,-0.670221,0.217894,0.848873,0.640264,1.050471,0.058715,-0.113052,0.089171,-0.371492,-0.595553,0.212947


In [12]:
df_valid.head()

Unnamed: 0,onehot__feature_09_2,onehot__feature_09_4,onehot__feature_09_9,onehot__feature_09_11,onehot__feature_09_12,onehot__feature_09_14,onehot__feature_09_15,onehot__feature_09_26,onehot__feature_09_30,onehot__feature_09_34,onehot__feature_09_42,onehot__feature_09_44,onehot__feature_09_46,onehot__feature_09_50,onehot__feature_09_57,onehot__feature_09_64,onehot__feature_09_68,onehot__feature_09_70,onehot__feature_09_81,onehot__feature_09_82,onehot__feature_10_1,onehot__feature_10_2,onehot__feature_10_3,onehot__feature_10_4,onehot__feature_10_5,onehot__feature_10_6,onehot__feature_10_7,onehot__feature_10_10,onehot__feature_10_12,onehot__feature_11_9,onehot__feature_11_11,onehot__feature_11_13,onehot__feature_11_16,onehot__feature_11_24,onehot__feature_11_25,onehot__feature_11_34,onehot__feature_11_40,onehot__feature_11_48,onehot__feature_11_50,onehot__feature_11_59,onehot__feature_11_62,onehot__feature_11_63,onehot__feature_11_66,onehot__feature_11_76,onehot__feature_11_150,onehot__feature_11_158,onehot__feature_11_171,onehot__feature_11_214,onehot__feature_11_230,onehot__feature_11_261,onehot__feature_11_336,onehot__feature_11_376,onehot__feature_11_388,onehot__feature_11_410,onehot__feature_11_522,onehot__feature_11_534,onehot__feature_11_539,std__weight,std__feature_00,std__feature_01,std__feature_02,std__feature_03,std__feature_04,std__feature_05,std__feature_06,std__feature_07,std__feature_08,std__feature_12,std__feature_13,std__feature_14,std__feature_15,std__feature_16,std__feature_17,std__feature_18,std__feature_19,std__feature_20,std__feature_21,std__feature_22,std__feature_23,std__feature_24,std__feature_25,std__feature_26,std__feature_27,std__feature_28,std__feature_29,std__feature_30,std__feature_31,std__feature_32,std__feature_33,std__feature_34,std__feature_35,std__feature_36,std__feature_37,std__feature_38,std__feature_39,std__feature_40,std__feature_41,std__feature_42,std__feature_43,std__feature_44,std__feature_45,std__feature_46,std__feature_47,std__feature_48,std__feature_49,std__feature_50,std__feature_51,std__feature_52,std__feature_53,std__feature_54,std__feature_55,std__feature_56,std__feature_57,std__feature_58,std__feature_59,std__feature_60,std__feature_61,std__feature_62,std__feature_63,std__feature_64,std__feature_65,std__feature_66,std__feature_67,std__feature_68,std__feature_69,std__feature_70,std__feature_71,std__feature_72,std__feature_73,std__feature_74,std__feature_75,std__feature_76,std__feature_77,std__feature_78,std__responder_0_lag_1_min,std__responder_1_lag_1_min,std__responder_2_lag_1_min,std__responder_3_lag_1_min,std__responder_4_lag_1_min,std__responder_5_lag_1_min,std__responder_6_lag_1_min,std__responder_7_lag_1_min,std__responder_8_lag_1_min,std__responder_0_lag_1_max,std__responder_1_lag_1_max,std__responder_2_lag_1_max,std__responder_3_lag_1_max,std__responder_4_lag_1_max,std__responder_5_lag_1_max,std__responder_6_lag_1_max,std__responder_7_lag_1_max,std__responder_8_lag_1_max,std__responder_0_lag_1_mean,std__responder_1_lag_1_mean,std__responder_2_lag_1_mean,std__responder_3_lag_1_mean,std__responder_4_lag_1_mean,std__responder_5_lag_1_mean,std__responder_6_lag_1_mean,std__responder_7_lag_1_mean,std__responder_8_lag_1_mean,std__responder_0_lag_1_sum,std__responder_1_lag_1_sum,std__responder_2_lag_1_sum,std__responder_3_lag_1_sum,std__responder_4_lag_1_sum,std__responder_5_lag_1_sum,std__responder_6_lag_1_sum,std__responder_7_lag_1_sum,std__responder_8_lag_1_sum
2999095,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.814319,1.257527,-0.21851,1.27675,1.936195,-1.004007,-0.290064,-0.123782,-0.328785,0.690101,-0.171653,-0.079009,-0.267276,-0.725592,-0.648014,-0.775463,0.184557,-1.368198,-1.32036,-0.244633,-1.009617,-0.687541,0.829493,1.197318,-0.325823,-0.922633,-0.609217,-0.395812,-0.840852,-0.346913,0.015537,0.169486,-0.128768,-0.0199,-0.073869,0.071233,-0.032791,0.539705,-0.063674,-0.19249,1.614341,0.572939,0.805878,0.691769,0.753729,0.533393,0.482649,0.516148,0.728979,-0.297614,-0.453358,1.29484,-0.342809,0.658622,1.011675,1.287264,0.564508,0.122944,0.303869,-0.375104,-0.016718,-0.408504,-0.723531,-0.154147,-1.739422,-0.230497,-0.175694,-0.386017,-0.210985,0.147673,-0.003356,0.222495,0.205015,-0.228176,-0.337218,-0.083547,-0.080912,1.150853,0.577121,0.130384,2.080709,1.516767,-0.428887,3.411673,2.728002,-0.055308,0.464919,0.079407,0.34874,0.261383,0.435853,0.209277,0.080325,0.250357,0.0,0.370198,0.444112,0.201479,0.086036,0.155548,0.074171,0.240322,0.194633,0.292376,0.399744,0.487075,0.220273,0.085278,0.15815,0.073739,0.26957,0.217996,0.358873
2899125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.046733,2.093788,0.88551,2.028576,1.61665,-0.009637,0.409662,-0.249742,0.589793,0.320433,-0.065226,-0.257337,-0.083017,-0.325232,-0.399195,-0.418864,-0.789175,1.073965,0.096511,0.053896,-0.369625,0.037775,1.400349,1.086029,2.15465,-1.220728,-2.378792,-0.179092,-0.103244,0.032181,0.78545,0.881131,0.976084,0.567277,-1.281431,-2.421108,-1.080992,0.382306,-0.060456,-0.395274,0.395619,-0.930337,-0.568908,1.864488,0.029079,0.182576,-0.210293,0.288398,0.050678,-1.055009,-0.30153,1.22707,-0.172943,0.466888,1.929749,0.924749,0.280433,-0.062676,0.393462,-0.072356,-0.552857,-0.653615,-0.246351,-1.470229,-0.32349,-0.139432,-0.188069,-0.25374,-0.056706,-0.278149,0.072525,-0.283512,-0.206609,-0.250968,-0.17033,-0.300051,-0.225498,-0.490383,1.236647,-0.354927,2.034181,1.250391,-0.428887,-0.236823,0.566096,-0.055308,-1.177706,0.105925,-0.633232,0.261383,0.435853,0.209277,0.080325,0.250357,0.0,0.130808,0.304817,-0.122073,0.127684,0.461328,0.060707,0.589164,0.940285,0.215944,0.138324,0.336164,-0.131575,0.12855,0.479884,0.059746,0.676844,1.055493,0.261012
4235921,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.88017,1.690299,-0.908973,1.31071,1.726988,0.95984,1.304719,-1.553489,-0.644121,-1.872035,0.665542,0.023198,0.678768,-0.219463,-0.420575,-0.802726,-0.57209,2.364585,-0.975756,-0.24494,-1.489151,-0.909592,0.545279,0.406189,-0.401441,-0.247996,-0.593567,-0.278507,-0.496271,-0.332613,0.476812,0.195478,0.392442,0.399837,-0.741935,-2.041064,-1.125626,0.743185,-0.041888,-0.563781,-0.017138,1.217197,-1.695606,2.167126,0.331309,1.045772,-0.259035,-0.200562,0.70682,-0.354791,-1.157255,-0.292943,0.736194,-1.233786,1.481679,0.747864,0.54235,-0.8305,-0.303096,0.106486,-0.376218,-0.119104,-0.210133,0.389446,2.473501,0.461926,0.178512,0.696294,0.577056,-0.200814,0.488615,-0.898944,-0.604058,-2.053225,-1.94041,-1.409567,-1.555888,-0.490383,-1.059237,-0.354927,-0.518021,-0.764152,-0.428887,-0.236823,-0.605745,-0.055308,0.464919,-0.880594,0.34874,0.261383,0.435853,0.209277,0.080325,0.250357,0.0,-0.397763,-0.318843,-0.043203,0.029305,-0.209476,0.045425,-0.01394,-0.235309,-0.16943,-0.423885,-0.329486,-0.04243,0.027603,-0.218204,0.044443,-0.02563,-0.259892,-0.225314
5990367,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.315039,0.51904,0.031704,0.477349,0.706368,1.109576,0.636442,-0.788971,0.651374,-0.27096,0.42297,-0.023814,0.456676,-0.272536,-0.307689,-0.545172,0.201064,-1.525426,1.443277,-0.03697,1.206215,0.435536,0.210777,0.650454,-1.42164,0.016458,1.127182,-0.429585,-0.433528,-0.051734,1.42267,-0.988857,1.238213,1.433406,1.736598,1.84976,1.683059,1.559368,0.027932,1.274532,0.312866,-0.164066,-1.084638,1.12085,-0.927933,1.29468,1.360843,2.025176,1.640225,-0.375736,1.741668,0.699772,0.481892,-0.257215,1.313408,-2.216604,1.20578,1.315595,1.37199,-0.816737,-1.28653,-0.523869,-0.81185,-0.048149,-1.217448,0.369864,-0.135188,0.189853,1.161636,0.214542,0.52126,0.482637,0.314833,0.483334,0.197668,0.612128,0.314354,1.140875,1.285711,4.641342,-0.223176,-0.698951,2.910108,-0.236823,-0.605745,-0.055308,-2.173444,-1.321086,-6.262803,0.261383,0.435853,-1.0921,0.080325,0.250357,0.0,-0.35057,-0.304397,-0.506952,0.063072,0.062854,0.035775,0.336105,0.313218,-0.117106,-0.387353,-0.323854,-0.550113,0.061418,0.06062,0.033835,0.381397,0.351187,-0.165418
6835785,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.707002,1.786924,1.268689,2.238861,2.624563,-0.156925,-0.363881,-0.323697,-0.736621,-0.004652,0.205889,-0.260219,-0.067932,-0.573857,-0.403638,-0.582324,0.226971,-0.420547,0.325523,-0.265807,-0.686303,-0.339021,-0.153491,-0.898982,-0.591514,-0.155291,-0.302808,-0.168577,-0.390643,-0.324623,1.099943,-1.139746,1.460933,1.521481,-0.965141,-0.030285,0.182747,0.275251,-0.927933,-0.425555,0.573077,0.6813,-0.853473,-0.569276,0.792184,-0.089131,-0.11859,-0.031401,0.375147,0.534686,-0.964294,0.646367,0.422154,-0.472831,-1.410263,1.00624,0.080932,-0.042141,0.026337,-0.498045,-0.474831,-0.289253,-0.481256,-0.60323,-1.287735,0.16222,-0.139927,-0.182101,0.23512,-0.265963,0.05469,-0.18962,-0.244078,-0.293848,-0.323889,-0.114139,-0.162239,2.313012,1.446234,5.619449,0.730984,0.128875,0.294539,-0.236823,0.287293,-0.055308,-1.302026,-1.578113,-7.105606,-2.52348,0.340291,-1.510299,0.080325,0.250357,0.0,1.242587,1.184894,1.244856,0.075585,0.246757,0.066842,0.00658,0.084332,0.046684,1.352414,1.289632,1.354899,0.07442,0.254118,0.066123,-0.003324,0.094108,0.044295


In [13]:
df_train.shape, df_valid.shape

((100000, 170), (20000, 170))

In [14]:
# Support Vector Machine Model
def SVM_model(seed):
    # SVM parameters
    SVM_Params = {
        'gamma': 1e-7,
        'C': 1e5,
        'kernel': 'rbf',
    }
    
    SVM_Model = SVR(**SVM_Params)
    return SVM_Model

# Random Forest Model
def RF_model(seed):
    # Random Forest parameters
    RF_Params = {
        'n_estimators': 100,
        'max_depth': 20,
        'random_state': seed
    }
    
    RF_Model = RandomForestRegressor(**RF_Params)
    return RF_Model


# XGBoost Model
def XGB_model(seed):
    # XGBoost parameters
    XGB_Params = {
        'learning_rate': 0.05,
        'max_depth': 6,
        'n_estimators': 200,
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'reg_alpha': 1,
        'reg_lambda': 5,
        'random_state': seed,
        'tree_method': 'hist',
    }
    
    XGB_Model = XGBRegressor(**XGB_Params)
    return XGB_Model

In [15]:
%%time
model = RF_model(CONFIG.seed)
model.fit(df_train, y_train)

CPU times: user 17min 11s, sys: 3.3 s, total: 17min 14s
Wall time: 17min 22s


In [16]:
y_pred_train = model.predict(df_train)
train_score = r2_score(y_train, y_pred_train, sample_weight=w_train )
train_score

0.11306091629333914

In [17]:
y_pred_valid = model.predict(df_valid)
valid_score = r2_score(y_valid, y_pred_valid, sample_weight=w_valid )
valid_score

0.0005463881061417153

In [18]:
import sklearn

In [19]:
result = {
    "model" : model,
    "preprocessor" : preprocessor,
}

with open("result.pkl", "wb") as fp:
    pickle.dump(result, fp)

In [20]:
# y_means = { symbol_id : -1 for symbol_id in range(39) }
# for symbol_id, gdf in train[["symbol_id", CONFIG.target_col]].groupby("symbol_id"):
#     y_mean = gdf[ CONFIG.target_col ].mean()
#     y_means[symbol_id] = y_mean
#     print(f"symbol_id = {symbol_id}, y_means = {y_mean:.5f}")

In [21]:
# cv_detail = { symbol_id : 0 for symbol_id in range(39) }
# for symbol_id, gdf in valid.groupby("symbol_id"):
#     X_valid = gdf[ CONFIG.feature_cols ]
#     y_valid = gdf[ CONFIG.target_col ]
#     w_valid = gdf[ "weight" ]
#     y_pred_valid = model.predict(X_valid)
#     score = r2_score(y_valid, y_pred_valid, sample_weight=w_valid )
#     cv_detail[symbol_id] = score
    
#     print(f"symbol_id = {symbol_id}, score = {score:.5f}")

In [22]:
# sids = list(cv_detail.keys())
# plt.bar(sids, [cv_detail[sid] for sid in sids])
# plt.grid()
# plt.xlabel("symbol_id")
# plt.ylabel("CV score")
# plt.show()