In [37]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('max_rows', 300)
pd.set_option('max_columns', 300)

import os
import glob

In [38]:
data_dir = '../input/optiver-realized-volatility-prediction/'

# 前処理に使う関数

In [39]:
# 加重平均価格
def calc_wap(df):
    wap = (df['bid_price1'] * df['ask_size1'] + df['ask_price1'] * df['bid_size1']) / (df['bid_size1'] + df['ask_size1'])
    return wap

def calc_wap2(df):
    wap = (df['bid_price2'] * df['ask_size2'] + df['ask_price2'] * df['bid_size2']) / (df['bid_size2'] + df['ask_size2'])
    return wap

In [40]:
def log_return(list_stock_prices):
    return np.log(list_stock_prices).diff() 

In [41]:
def realized_volatility(series):
    return np.sqrt(np.sum(series**2))

In [42]:
def count_unique(series):
    return len(np.unique(series))

In [43]:
book_train = pd.read_parquet(data_dir + "book_train.parquet/stock_id=15")
book_train.head()

Unnamed: 0,time_id,seconds_in_bucket,bid_price1,ask_price1,bid_price2,ask_price2,bid_size1,ask_size1,bid_size2,ask_size2
0,5,0,0.999519,0.999839,0.999454,0.999904,2,166,2,12
1,5,1,0.999711,1.000225,0.999647,1.000289,100,20,100,20
2,5,2,0.999775,1.000225,0.999711,1.000289,1,20,400,20
3,5,3,0.999839,1.000225,0.999775,1.000289,100,20,1,20
4,5,4,0.999839,1.000225,0.999711,1.000289,1,20,400,20


# book data 前処理

In [44]:
def preprocessor_book(file_path):
    df = pd.read_parquet(file_path)
    df['wap'] = calc_wap(df)
    df['log_return'] = df.groupby('time_id')['wap'].apply(log_return)
    df['wap2'] = calc_wap2(df)
    df['log_return2'] = df.groupby('time_id')['wap2'].apply(log_return)

    df['wap_balance'] = abs(df['wap'] - df['wap2'])
    df['price_spread'] = (df['ask_price1'] - df['bid_price1']) / ((df['ask_price1'] + df['bid_price1'])/2)
    df['bid_spread'] = df['bid_price1'] - df['bid_price2']
    df['ask_spread'] = df['ask_price1'] - df['ask_price2']
    df['total_volume'] = df['ask_size1'] + df['bid_size1'] + df['ask_size2'] + df['bid_size2']
    df['volume_imbalance'] = abs((df['ask_size1'] + df['ask_size2']) - (df['bid_size1'] + df['bid_size2']))
    
    create_feature_dict = {
        'log_return':[realized_volatility, np.mean, np.std, np.sum],
        'log_return2':[realized_volatility, np.mean, np.std, np.sum],
        'wap_balance':[np.mean, np.std, np.sum],
        'price_spread':[np.mean, np.std, np.sum],
        'bid_spread':[np.mean, np.std, np.sum],
        'ask_spread':[np.mean, np.std, np.sum],
        'volume_imbalance':[np.mean, np.std, np.sum],
        'total_volume':[np.mean, np.std, np.sum],
        'wap':[np.mean, np.std, np.sum],
    }
    
    df_feature = pd.DataFrame(df.groupby(['time_id']).agg(create_feature_dict)).reset_index()
    df_feature.columns = ['_'.join(col) for col in df_feature.columns]
    
    last_seconds = [100, 150, 200, 250, 300, 350, 400, 450, 500]
    
    for second in last_seconds:
        second = 600 - second
        
        df_feature_sec = pd.DataFrame(df[df['seconds_in_bucket'] >= second].groupby('time_id').agg(create_feature_dict)).reset_index()
        df_feature_sec.columns = ['_'.join(col) for col in df_feature_sec.columns]
        df_feature_sec = df_feature_sec.add_suffix('_' + str(second))
        
        df_feature = pd.merge(df_feature, df_feature_sec, how='left', left_on='time_id_', right_on=f'time_id__{second}')
        df_feature = df_feature.drop([f'time_id__{second}'], axis=1)
        
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['time_id_'].apply(lambda x:f'{stock_id}-{x}')
    df_feature = df_feature.drop(['time_id_'], axis=1)

    return df_feature

In [45]:
%%time
file_path = data_dir + "book_train.parquet/stock_id=0"
preprocessor_book(file_path)

Wall time: 14.3 s


Unnamed: 0,log_return_realized_volatility,log_return_mean,log_return_std,log_return_sum,log_return2_realized_volatility,log_return2_mean,log_return2_std,log_return2_sum,wap_balance_mean,wap_balance_std,wap_balance_sum,price_spread_mean,price_spread_std,price_spread_sum,bid_spread_mean,bid_spread_std,bid_spread_sum,ask_spread_mean,ask_spread_std,ask_spread_sum,volume_imbalance_mean,volume_imbalance_std,volume_imbalance_sum,total_volume_mean,total_volume_std,total_volume_sum,wap_mean,wap_std,wap_sum,log_return_realized_volatility_500,log_return_mean_500,log_return_std_500,log_return_sum_500,log_return2_realized_volatility_500,log_return2_mean_500,log_return2_std_500,log_return2_sum_500,wap_balance_mean_500,wap_balance_std_500,wap_balance_sum_500,price_spread_mean_500,price_spread_std_500,price_spread_sum_500,bid_spread_mean_500,bid_spread_std_500,bid_spread_sum_500,ask_spread_mean_500,ask_spread_std_500,ask_spread_sum_500,volume_imbalance_mean_500,volume_imbalance_std_500,volume_imbalance_sum_500,total_volume_mean_500,total_volume_std_500,total_volume_sum_500,wap_mean_500,wap_std_500,wap_sum_500,log_return_realized_volatility_450,log_return_mean_450,log_return_std_450,log_return_sum_450,log_return2_realized_volatility_450,log_return2_mean_450,log_return2_std_450,log_return2_sum_450,wap_balance_mean_450,wap_balance_std_450,wap_balance_sum_450,price_spread_mean_450,price_spread_std_450,price_spread_sum_450,bid_spread_mean_450,bid_spread_std_450,bid_spread_sum_450,ask_spread_mean_450,ask_spread_std_450,ask_spread_sum_450,volume_imbalance_mean_450,volume_imbalance_std_450,volume_imbalance_sum_450,total_volume_mean_450,total_volume_std_450,total_volume_sum_450,wap_mean_450,wap_std_450,wap_sum_450,log_return_realized_volatility_400,log_return_mean_400,log_return_std_400,log_return_sum_400,log_return2_realized_volatility_400,log_return2_mean_400,log_return2_std_400,log_return2_sum_400,wap_balance_mean_400,wap_balance_std_400,wap_balance_sum_400,price_spread_mean_400,price_spread_std_400,price_spread_sum_400,bid_spread_mean_400,bid_spread_std_400,bid_spread_sum_400,ask_spread_mean_400,ask_spread_std_400,ask_spread_sum_400,volume_imbalance_mean_400,volume_imbalance_std_400,volume_imbalance_sum_400,total_volume_mean_400,total_volume_std_400,total_volume_sum_400,wap_mean_400,wap_std_400,wap_sum_400,log_return_realized_volatility_350,log_return_mean_350,log_return_std_350,log_return_sum_350,log_return2_realized_volatility_350,log_return2_mean_350,log_return2_std_350,log_return2_sum_350,wap_balance_mean_350,wap_balance_std_350,wap_balance_sum_350,price_spread_mean_350,price_spread_std_350,price_spread_sum_350,bid_spread_mean_350,bid_spread_std_350,bid_spread_sum_350,ask_spread_mean_350,ask_spread_std_350,ask_spread_sum_350,volume_imbalance_mean_350,volume_imbalance_std_350,volume_imbalance_sum_350,total_volume_mean_350,total_volume_std_350,total_volume_sum_350,wap_mean_350,wap_std_350,wap_sum_350,log_return_realized_volatility_300,log_return_mean_300,log_return_std_300,log_return_sum_300,log_return2_realized_volatility_300,log_return2_mean_300,log_return2_std_300,log_return2_sum_300,wap_balance_mean_300,wap_balance_std_300,wap_balance_sum_300,price_spread_mean_300,price_spread_std_300,price_spread_sum_300,bid_spread_mean_300,bid_spread_std_300,bid_spread_sum_300,ask_spread_mean_300,ask_spread_std_300,ask_spread_sum_300,volume_imbalance_mean_300,volume_imbalance_std_300,volume_imbalance_sum_300,total_volume_mean_300,total_volume_std_300,total_volume_sum_300,wap_mean_300,wap_std_300,wap_sum_300,log_return_realized_volatility_250,log_return_mean_250,log_return_std_250,log_return_sum_250,log_return2_realized_volatility_250,log_return2_mean_250,log_return2_std_250,log_return2_sum_250,wap_balance_mean_250,wap_balance_std_250,wap_balance_sum_250,price_spread_mean_250,price_spread_std_250,price_spread_sum_250,bid_spread_mean_250,bid_spread_std_250,bid_spread_sum_250,ask_spread_mean_250,ask_spread_std_250,ask_spread_sum_250,volume_imbalance_mean_250,volume_imbalance_std_250,volume_imbalance_sum_250,total_volume_mean_250,total_volume_std_250,total_volume_sum_250,wap_mean_250,wap_std_250,wap_sum_250,log_return_realized_volatility_200,log_return_mean_200,log_return_std_200,log_return_sum_200,log_return2_realized_volatility_200,log_return2_mean_200,log_return2_std_200,log_return2_sum_200,wap_balance_mean_200,wap_balance_std_200,wap_balance_sum_200,price_spread_mean_200,price_spread_std_200,price_spread_sum_200,bid_spread_mean_200,bid_spread_std_200,bid_spread_sum_200,ask_spread_mean_200,ask_spread_std_200,ask_spread_sum_200,volume_imbalance_mean_200,volume_imbalance_std_200,volume_imbalance_sum_200,total_volume_mean_200,total_volume_std_200,total_volume_sum_200,wap_mean_200,wap_std_200,wap_sum_200,log_return_realized_volatility_150,log_return_mean_150,log_return_std_150,log_return_sum_150,log_return2_realized_volatility_150,log_return2_mean_150,log_return2_std_150,log_return2_sum_150,wap_balance_mean_150,wap_balance_std_150,wap_balance_sum_150,price_spread_mean_150,price_spread_std_150,price_spread_sum_150,bid_spread_mean_150,bid_spread_std_150,bid_spread_sum_150,ask_spread_mean_150,ask_spread_std_150,ask_spread_sum_150,volume_imbalance_mean_150,volume_imbalance_std_150,volume_imbalance_sum_150,total_volume_mean_150,total_volume_std_150,total_volume_sum_150,wap_mean_150,wap_std_150,wap_sum_150,log_return_realized_volatility_100,log_return_mean_100,log_return_std_100,log_return_sum_100,log_return2_realized_volatility_100,log_return2_mean_100,log_return2_std_100,log_return2_sum_100,wap_balance_mean_100,wap_balance_std_100,wap_balance_sum_100,price_spread_mean_100,price_spread_std_100,price_spread_sum_100,bid_spread_mean_100,bid_spread_std_100,bid_spread_sum_100,ask_spread_mean_100,ask_spread_std_100,ask_spread_sum_100,volume_imbalance_mean_100,volume_imbalance_std_100,volume_imbalance_sum_100,total_volume_mean_100,total_volume_std_100,total_volume_sum_100,wap_mean_100,wap_std_100,wap_sum_100,row_id
0,0.004499,7.613599e-06,0.000260,0.002292,0.006999,0.000008,0.000404,0.002325,0.000388,0.000295,0.117051,0.000852,0.000211,0.257255,0.000176,0.000162,0.053006,-0.000151,0.000126,-0.045557,134.894040,107.260583,40738,323.496689,138.101214,97696,1.003725,0.000693,303.125061,0.001459,-5.253940e-06,0.000208,-0.000263,0.003018,-0.000005,0.000431,-0.000272,0.000355,0.000247,0.017729,0.000781,0.000179,0.039065,0.000258,0.000158,0.012877,-0.000176,0.000131,-0.008792,144.820000,71.123833,7241,249.700000,100.559709,12485,1.003369,0.000546,50.168442,0.001721,-0.000005,0.000210,-0.000361,0.004114,0.000001,0.000503,0.000068,0.000366,0.000277,0.024868,0.000783,0.000181,0.053236,0.000262,0.000178,0.017790,-0.000166,0.000126,-0.011274,141.470588,84.467864,9620,263.941176,116.940077,17948,1.003482,0.000514,68.236749,0.002300,2.110692e-06,0.000234,0.000207,0.004589,-0.000008,0.000466,-0.000750,0.000390,0.000291,0.038194,0.000783,0.000181,0.076778,0.000214,0.000168,0.020996,-0.000191,0.000140,-0.018721,124.326531,82.090066,12184,262.489796,118.188932,25724,1.003633,0.000497,98.356007,0.002459,-2.722273e-07,0.000228,-0.000032,0.004700,2.294014e-06,0.000436,0.000268,0.000390,0.000283,0.045605,0.000761,0.000196,0.089035,0.000237,0.000177,0.027719,-0.000179,0.000136,-0.020997,130.829060,88.045275,15307,266.538462,117.329887,31185,1.003710,0.000504,117.434121,0.002953,1.131529e-06,0.000251,0.000157,0.004863,0.000002,0.000414,0.000274,0.000372,0.000273,0.051757,0.000822,0.000237,0.114272,0.000223,0.000173,0.030976,-0.000162,0.000131,-0.022548,137.158273,97.898813,19065,294.928058,136.527199,40995,1.003753,0.000487,139.521722,0.003146,-4.076705e-06,0.000249,-0.000656,0.005262,-2.030618e-06,0.000416,-0.000327,0.000370,0.000271,0.059532,0.000844,0.000241,0.135856,0.000207,0.000171,0.033251,-0.000166,0.000129,-0.026736,146.745342,110.699327,23626,298.236025,134.937370,48016,1.003762,0.000464,161.605654,0.003402,-1.392656e-06,0.000246,-0.000269,0.005802,0.000001,0.000419,0.000260,0.000379,0.000278,0.073062,0.000865,0.000238,0.167016,0.000205,0.000174,0.039560,-0.000155,0.000124,-0.029837,134.772021,106.866300,26011,321.455959,151.899636,62041,1.003836,0.000466,193.740261,0.003796,0.000001,0.000250,0.000276,0.006087,1.295471e-08,0.000400,0.000003,0.000397,0.000281,0.091997,0.000858,0.000221,0.199058,0.000188,0.000165,0.043697,-0.000147,0.000120,-0.034024,123.586207,103.533216,28672,327.431034,142.761068,75964,1.003832,0.000445,232.888919,0.004041,-0.000002,0.000250,-0.000396,0.006566,-3.291241e-07,0.000406,-0.000086,0.000404,0.000289,0.105931,0.000848,0.000214,0.222287,0.000184,0.000164,0.048196,-0.000151,0.000127,-0.039558,130.854962,107.857691,34284,332.167939,141.270190,87028,1.003875,0.000453,263.015170,0-5
1,0.001204,1.810239e-06,0.000086,0.000360,0.002476,0.000004,0.000176,0.000801,0.000212,0.000155,0.042312,0.000394,0.000157,0.078836,0.000142,0.000148,0.028358,-0.000135,0.000065,-0.027001,142.050000,102.139758,28410,411.450000,172.263581,82290,1.000239,0.000262,200.047768,0.000857,-3.608732e-07,0.000143,-0.000013,0.001435,0.000016,0.000239,0.000575,0.000277,0.000186,0.010265,0.000339,0.000163,0.012540,0.000296,0.000255,0.010942,-0.000148,0.000063,-0.005470,109.702703,97.631639,4059,425.810811,180.407846,15755,1.000610,0.000230,37.022563,0.000918,-0.000001,0.000126,-0.000059,0.001883,0.000009,0.000258,0.000488,0.000269,0.000175,0.014524,0.000348,0.000144,0.018812,0.000233,0.000239,0.012598,-0.000143,0.000066,-0.007729,97.685185,88.144569,5275,447.981481,177.264272,24191,1.000518,0.000235,54.027991,0.000934,8.459831e-07,0.000110,0.000062,0.001907,0.000003,0.000225,0.000237,0.000261,0.000170,0.019041,0.000367,0.000129,0.026790,0.000186,0.000220,0.013552,-0.000133,0.000063,-0.009736,96.136986,79.708203,7018,480.000000,167.075582,35040,1.000480,0.000217,73.035016,0.000976,6.782933e-07,0.000097,0.000069,0.001981,1.338194e-06,0.000197,0.000136,0.000251,0.000161,0.025584,0.000366,0.000122,0.037378,0.000167,0.000191,0.017015,-0.000125,0.000060,-0.012748,129.774510,111.295068,13237,492.970588,173.353382,50283,1.000419,0.000210,102.042731,0.000981,8.383753e-07,0.000092,0.000096,0.002009,0.000004,0.000188,0.000413,0.000239,0.000158,0.027445,0.000353,0.000121,0.040589,0.000164,0.000180,0.018873,-0.000123,0.000059,-0.014153,135.513043,110.256349,15584,484.521739,168.586713,55720,1.000397,0.000207,115.045656,0.001000,2.155868e-06,0.000087,0.000287,0.002032,4.969762e-06,0.000177,0.000661,0.000213,0.000164,0.028342,0.000340,0.000121,0.045255,0.000149,0.000172,0.019776,-0.000129,0.000058,-0.017164,135.090226,104.794348,17967,445.030075,189.128021,59189,1.000364,0.000211,133.048450,0.001014,2.180828e-06,0.000083,0.000331,0.002105,0.000006,0.000171,0.000950,0.000210,0.000165,0.031864,0.000348,0.000115,0.052886,0.000139,0.000163,0.021131,-0.000123,0.000059,-0.018770,151.407895,108.481656,23014,438.921053,178.623008,66716,1.000332,0.000215,152.050502,0.001058,0.000002,0.000081,0.000298,0.002262,5.044579e-06,0.000172,0.000873,0.000205,0.000158,0.035454,0.000353,0.000112,0.061017,0.000141,0.000154,0.024394,-0.000127,0.000058,-0.022032,151.566474,104.576846,26221,419.277457,178.652395,72535,1.000301,0.000221,173.052001,0.001140,0.000003,0.000084,0.000561,0.002432,3.798911e-06,0.000178,0.000710,0.000213,0.000157,0.039800,0.000378,0.000143,0.070603,0.000146,0.000151,0.027304,-0.000129,0.000056,-0.024140,146.914439,102.961696,27473,420.112299,173.976587,78561,1.000264,0.000251,187.049275,0-11
2,0.002369,-1.109201e-05,0.000173,-0.002074,0.004801,-0.000008,0.000352,-0.001493,0.000331,0.000246,0.062228,0.000725,0.000164,0.136330,0.000197,0.000170,0.036955,-0.000198,0.000171,-0.037243,141.414894,108.891243,26586,416.351064,138.433034,78274,0.999542,0.000864,187.913849,0.000640,-4.641993e-05,0.000143,-0.000882,0.002509,-0.000026,0.000591,-0.000495,0.000403,0.000258,0.007657,0.000553,0.000092,0.010505,0.000335,0.000264,0.006367,-0.000247,0.000125,-0.004691,162.105263,131.846668,3080,450.421053,130.228140,8558,0.997783,0.000365,18.957882,0.001158,-0.000033,0.000173,-0.001469,0.002972,-0.000042,0.000451,-0.001831,0.000365,0.000282,0.016055,0.000605,0.000105,0.026608,0.000186,0.000217,0.008186,-0.000208,0.000168,-0.009143,156.113636,102.024670,6869,459.113636,116.212559,20201,0.998237,0.000541,43.922425,0.001179,-3.326700e-05,0.000163,-0.001697,0.003034,-0.000046,0.000427,-0.002332,0.000411,0.000299,0.020971,0.000625,0.000120,0.031877,0.000167,0.000207,0.008521,-0.000204,0.000164,-0.010387,152.509804,100.093231,7778,454.000000,115.120632,23154,0.998356,0.000586,50.916172,0.001284,-3.679629e-05,0.000159,-0.002318,0.003195,-2.535106e-05,0.000405,-0.001597,0.000418,0.000300,0.026317,0.000672,0.000155,0.042318,0.000145,0.000192,0.009143,-0.000223,0.000173,-0.014073,149.507937,102.797777,9419,468.015873,114.869981,29485,0.998576,0.000700,62.910286,0.001295,-3.810560e-05,0.000153,-0.002591,0.003196,-0.000023,0.000390,-0.001549,0.000431,0.000294,0.029308,0.000689,0.000162,0.046866,0.000141,0.000185,0.009622,-0.000249,0.000190,-0.016945,144.147059,101.873534,9802,455.235294,120.920736,30956,0.998685,0.000779,67.910601,0.001857,-1.896695e-05,0.000205,-0.001555,0.003633,-2.787335e-05,0.000403,-0.002286,0.000418,0.000279,0.034313,0.000713,0.000174,0.058453,0.000141,0.000172,0.011585,-0.000268,0.000211,-0.021971,129.231707,100.552707,10597,426.475610,135.297042,34971,0.998841,0.000803,81.904967,0.001940,-2.217922e-05,0.000193,-0.002240,0.003900,-0.000024,0.000389,-0.002434,0.000396,0.000286,0.039989,0.000683,0.000174,0.068989,0.000146,0.000158,0.014792,-0.000263,0.000201,-0.026519,143.514851,116.258558,14495,440.544554,138.584092,44495,0.998944,0.000757,100.893294,0.002138,-0.000024,0.000195,-0.002854,0.004019,-2.509050e-05,0.000369,-0.002986,0.000373,0.000276,0.044347,0.000679,0.000163,0.080811,0.000161,0.000155,0.019100,-0.000241,0.000195,-0.028626,132.084034,114.924631,15718,428.537815,135.376048,50996,0.999126,0.000829,118.896016,0.002205,-0.000019,0.000183,-0.002715,0.004106,-1.841958e-05,0.000342,-0.002671,0.000345,0.000260,0.050096,0.000680,0.000149,0.098615,0.000162,0.000154,0.023552,-0.000212,0.000187,-0.030732,133.337931,106.949574,19334,400.620690,140.906641,58090,0.999359,0.000902,144.907030,0-16
3,0.002574,-2.376661e-05,0.000236,-0.002828,0.003637,-0.000017,0.000334,-0.002053,0.000380,0.000248,0.045611,0.000860,0.000280,0.103252,0.000190,0.000199,0.022764,-0.000108,0.000091,-0.013001,146.216667,121.533215,17546,435.266667,156.120334,52232,0.998832,0.000757,119.859781,0.000987,-5.288889e-05,0.000292,-0.000635,0.001360,-0.000071,0.000403,-0.000857,0.000202,0.000091,0.002419,0.001062,0.000090,0.012745,0.000131,0.000048,0.001573,-0.000050,0.000013,-0.000601,178.333333,112.091954,2140,539.166667,176.489934,6470,0.998074,0.000534,11.976885,0.000993,-0.000029,0.000239,-0.000526,0.001424,-0.000049,0.000342,-0.000882,0.000358,0.000253,0.006441,0.001058,0.000074,0.019047,0.000116,0.000046,0.002082,-0.000049,0.000011,-0.000879,146.000000,106.693624,2628,540.000000,153.413704,9720,0.998079,0.000430,17.965415,0.001003,-3.119674e-05,0.000211,-0.000718,0.001513,-0.000025,0.000322,-0.000574,0.000350,0.000250,0.008060,0.001050,0.000067,0.024143,0.000155,0.000147,0.003563,-0.000048,0.000010,-0.001111,153.826087,95.507569,3538,498.956522,156.965682,11476,0.998079,0.000379,22.955812,0.001776,-2.638866e-05,0.000269,-0.001161,0.002652,-8.569902e-06,0.000404,-0.000377,0.000293,0.000225,0.012904,0.000881,0.000281,0.038784,0.000177,0.000176,0.007773,-0.000079,0.000064,-0.003470,161.340909,102.690763,7099,437.386364,151.999574,19245,0.998391,0.000543,43.929225,0.001776,-2.224226e-05,0.000245,-0.001179,0.002713,-0.000008,0.000376,-0.000440,0.000331,0.000228,0.017525,0.000833,0.000278,0.044159,0.000158,0.000165,0.008375,-0.000095,0.000076,-0.005043,144.698113,101.135778,7669,418.169811,146.485459,22163,0.998436,0.000504,52.917110,0.001842,-1.889591e-05,0.000233,-0.001190,0.002872,-9.104604e-06,0.000365,-0.000574,0.000353,0.000228,0.022211,0.000824,0.000260,0.051898,0.000141,0.000157,0.008884,-0.000109,0.000089,-0.006848,133.079365,99.939224,8384,415.269841,141.298039,26162,0.998474,0.000472,62.903835,0.001855,-1.255555e-05,0.000225,-0.000866,0.002880,-0.000007,0.000349,-0.000456,0.000339,0.000225,0.023407,0.000856,0.000269,0.059033,0.000133,0.000152,0.009162,-0.000114,0.000088,-0.007865,137.217391,101.947632,9468,424.782609,140.057013,29310,0.998472,0.000452,68.894597,0.002196,-0.000016,0.000245,-0.001290,0.003273,-1.372564e-05,0.000366,-0.001112,0.000362,0.000247,0.029323,0.000920,0.000296,0.074552,0.000170,0.000191,0.013789,-0.000108,0.000085,-0.008745,151.765432,124.293028,12293,424.234568,156.628404,34363,0.998464,0.000432,80.875601,0.002552,-0.000025,0.000251,-0.002525,0.003580,-2.040528e-05,0.000354,-0.002102,0.000363,0.000258,0.037411,0.000900,0.000283,0.092658,0.000213,0.000206,0.021932,-0.000114,0.000097,-0.011706,155.038835,124.004263,15969,434.048544,158.774929,44707,0.998587,0.000491,102.854482,0-31
4,0.001894,-1.057099e-08,0.000144,-0.000002,0.003257,-0.000002,0.000247,-0.000281,0.000254,0.000188,0.044783,0.000397,0.000130,0.069901,0.000191,0.000083,0.033565,-0.000109,0.000076,-0.019206,123.846591,102.407501,21797,343.221591,158.054066,60407,0.999619,0.000258,175.932865,0.001124,3.369865e-05,0.000279,0.000573,0.000780,-0.000010,0.000195,-0.000173,0.000360,0.000175,0.006128,0.000414,0.000072,0.007042,0.000195,0.000018,0.003310,-0.000140,0.000066,-0.002377,108.882353,112.990532,1851,349.352941,147.725227,5939,0.999454,0.000307,16.990718,0.001378,0.000011,0.000233,0.000397,0.000966,-0.000008,0.000163,-0.000298,0.000364,0.000203,0.013087,0.000519,0.000138,0.018700,0.000196,0.000024,0.007040,-0.000136,0.000066,-0.004895,117.000000,99.328028,4212,391.944444,123.180227,14110,0.999518,0.000257,35.982653,0.001435,1.431020e-05,0.000195,0.000787,0.001516,0.000008,0.000206,0.000433,0.000298,0.000208,0.016410,0.000469,0.000143,0.025789,0.000209,0.000071,0.011515,-0.000126,0.000083,-0.006946,85.618182,93.493413,4709,339.945455,129.296590,18697,0.999473,0.000243,54.971001,0.001512,5.518365e-06,0.000178,0.000403,0.001832,-3.276818e-06,0.000216,-0.000239,0.000247,0.000204,0.018038,0.000435,0.000150,0.031759,0.000204,0.000062,0.014872,-0.000123,0.000077,-0.008997,103.356164,97.637420,7545,373.465753,152.658759,27263,0.999463,0.000216,72.960818,0.001520,7.249930e-06,0.000162,0.000645,0.002188,-0.000002,0.000233,-0.000201,0.000252,0.000188,0.022397,0.000425,0.000140,0.037820,0.000191,0.000073,0.017016,-0.000120,0.000076,-0.010722,99.449438,93.029811,8851,407.584270,165.851509,36275,0.999488,0.000205,88.954468,0.001560,1.593930e-06,0.000158,0.000158,0.002414,-2.817140e-06,0.000244,-0.000279,0.000252,0.000188,0.024920,0.000425,0.000142,0.042110,0.000184,0.000076,0.018182,-0.000116,0.000074,-0.011514,106.767677,97.226417,10570,407.212121,163.304540,40314,0.999513,0.000218,98.951821,0.001571,2.401551e-06,0.000148,0.000274,0.002461,-0.000003,0.000231,-0.000310,0.000230,0.000186,0.026236,0.000414,0.000137,0.047192,0.000174,0.000076,0.019859,-0.000108,0.000073,-0.012260,116.956140,102.595726,13333,388.394737,164.649352,44277,0.999575,0.000259,113.951545,0.001609,0.000004,0.000139,0.000491,0.002927,2.213193e-06,0.000253,0.000299,0.000242,0.000193,0.032718,0.000395,0.000137,0.053347,0.000187,0.000088,0.025220,-0.000117,0.000080,-0.015757,131.474074,109.275622,17749,371.266667,162.610706,50121,0.999618,0.000259,134.948413,0.001617,0.000004,0.000132,0.000533,0.002975,4.033350e-06,0.000244,0.000605,0.000248,0.000187,0.037210,0.000389,0.000133,0.058384,0.000185,0.000086,0.027691,-0.000112,0.000079,-0.016736,126.686667,105.347118,19003,364.100000,156.327343,54615,0.999611,0.000246,149.941716,0-62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3825,0.002579,-7.215157e-06,0.000150,-0.002136,0.003821,-0.000007,0.000222,-0.002055,0.000212,0.000159,0.062912,0.000552,0.000202,0.163999,0.000083,0.000072,0.024713,-0.000182,0.000125,-0.054055,197.144781,129.451712,58552,374.235690,165.473374,111148,0.997938,0.000747,296.387479,0.001019,-5.243988e-06,0.000165,-0.000205,0.001348,-0.000005,0.000219,-0.000207,0.000179,0.000111,0.006996,0.000428,0.000085,0.016705,0.000087,0.000045,0.003381,-0.000169,0.000079,-0.006607,141.538462,94.836937,5520,239.487179,94.308528,9340,0.996622,0.000233,38.868268,0.001148,-0.000016,0.000143,-0.001050,0.002048,-0.000019,0.000255,-0.001227,0.000172,0.000141,0.011163,0.000425,0.000098,0.027610,0.000074,0.000039,0.004838,-0.000215,0.000103,-0.013995,227.153846,175.857473,14765,322.200000,153.142825,20943,0.996724,0.000301,64.787079,0.001412,-1.559898e-05,0.000148,-0.001420,0.002306,-0.000017,0.000242,-0.001518,0.000167,0.000160,0.015194,0.000447,0.000102,0.040700,0.000068,0.000034,0.006191,-0.000197,0.000118,-0.017949,220.857143,151.670298,20098,317.296703,145.085683,28874,0.996927,0.000419,90.720330,0.001556,-1.813159e-05,0.000142,-0.002176,0.002377,-1.904179e-05,0.000217,-0.002285,0.000163,0.000154,0.019509,0.000458,0.000113,0.054930,0.000064,0.000031,0.007700,-0.000179,0.000113,-0.021539,236.800000,143.151597,28416,316.416667,134.087929,37970,0.997233,0.000669,119.667973,0.001673,-1.303872e-05,0.000136,-0.001956,0.002573,-0.000011,0.000210,-0.001652,0.000193,0.000164,0.028973,0.000509,0.000150,0.076295,0.000062,0.000028,0.009261,-0.000169,0.000107,-0.025285,233.946667,139.068560,35092,350.560000,142.775530,52584,0.997519,0.000830,149.627778,0.001732,-1.106652e-05,0.000130,-0.001959,0.002714,-9.598848e-06,0.000204,-0.001699,0.000207,0.000166,0.036628,0.000528,0.000167,0.093441,0.000068,0.000033,0.011967,-0.000175,0.000115,-0.030955,219.858757,139.408847,38915,359.305085,144.263717,63597,0.997630,0.000809,176.580578,0.002247,-7.942849e-06,0.000158,-0.001620,0.003328,-0.000008,0.000233,-0.001614,0.000223,0.000171,0.045543,0.000558,0.000193,0.113766,0.000067,0.000031,0.013580,-0.000176,0.000120,-0.035950,207.519608,137.125251,42334,341.882353,148.976759,69744,0.997725,0.000796,203.535859,0.002383,-0.000010,0.000158,-0.002261,0.003507,-1.210854e-05,0.000233,-0.002749,0.000223,0.000166,0.050565,0.000573,0.000218,0.130025,0.000066,0.000030,0.014881,-0.000171,0.000118,-0.038915,196.365639,137.189564,44575,349.916300,168.444647,79431,0.997784,0.000779,226.496946,0.002400,-0.000010,0.000152,-0.002480,0.003628,-9.036732e-06,0.000230,-0.002259,0.000218,0.000164,0.054467,0.000558,0.000217,0.139559,0.000071,0.000048,0.017846,-0.000171,0.000117,-0.042817,201.860000,134.492533,50465,365.420000,169.946376,91355,0.997863,0.000783,249.465819,0-32751
3826,0.002206,1.966770e-06,0.000154,0.000403,0.002847,0.000005,0.000199,0.000959,0.000267,0.000193,0.055028,0.000542,0.000147,0.111732,0.000092,0.000088,0.018874,-0.000172,0.000083,-0.035445,233.781553,153.128340,48159,621.131068,266.019708,127953,1.000310,0.000551,206.063903,0.000493,-1.568289e-05,0.000089,-0.000486,0.001155,-0.000009,0.000211,-0.000290,0.000234,0.000186,0.007242,0.000555,0.000015,0.017208,0.000068,0.000057,0.002103,-0.000203,0.000113,-0.006307,251.225806,155.965532,7788,573.354839,156.098590,17774,1.000577,0.000268,31.017879,0.001146,-0.000020,0.000170,-0.000917,0.001863,-0.000018,0.000277,-0.000842,0.000255,0.000166,0.011711,0.000572,0.000066,0.026312,0.000075,0.000061,0.003454,-0.000173,0.000114,-0.007959,234.217391,153.964919,10774,531.478261,201.290916,24448,1.000614,0.000278,46.028267,0.001288,-9.491476e-06,0.000170,-0.000551,0.001866,-0.000013,0.000247,-0.000742,0.000263,0.000170,0.015239,0.000555,0.000084,0.032162,0.000083,0.000075,0.004806,-0.000192,0.000109,-0.011113,250.672414,152.685480,14539,566.017241,215.469255,32829,1.000753,0.000382,58.043658,0.001485,2.674505e-06,0.000164,0.000222,0.002214,5.848296e-07,0.000245,0.000049,0.000255,0.000181,0.021171,0.000596,0.000129,0.049469,0.000079,0.000073,0.006558,-0.000182,0.000107,-0.015068,279.024096,178.784035,23159,610.831325,247.281503,50699,1.000803,0.000380,83.066633,0.001487,2.965882e-06,0.000149,0.000297,0.002255,0.000002,0.000227,0.000229,0.000300,0.000196,0.029996,0.000588,0.000118,0.058826,0.000074,0.000068,0.007409,-0.000177,0.000100,-0.017671,257.920000,176.388041,25792,668.640000,264.869952,66864,1.000682,0.000439,100.068151,0.001494,1.264079e-06,0.000136,0.000153,0.002282,5.584667e-06,0.000208,0.000676,0.000315,0.000188,0.038154,0.000580,0.000110,0.070235,0.000071,0.000063,0.008561,-0.000178,0.000092,-0.021527,259.223140,167.156289,31366,689.983471,256.979117,83488,1.000578,0.000458,121.069977,0.001496,1.254316e-06,0.000129,0.000169,0.002315,0.000004,0.000200,0.000595,0.000293,0.000191,0.039555,0.000570,0.000117,0.076890,0.000082,0.000075,0.011014,-0.000180,0.000088,-0.024330,262.807407,160.655397,35479,694.985185,253.288903,93823,1.000543,0.000446,135.073360,0.001519,0.000002,0.000126,0.000237,0.002396,-1.181481e-07,0.000198,-0.000017,0.000286,0.000186,0.042064,0.000577,0.000115,0.084847,0.000079,0.000073,0.011615,-0.000181,0.000085,-0.026583,253.850340,159.936518,37316,707.782313,248.050412,104044,1.000528,0.000431,147.077579,0.001900,0.000005,0.000147,0.000803,0.002532,4.890253e-06,0.000196,0.000817,0.000272,0.000192,0.045444,0.000572,0.000133,0.095557,0.000076,0.000069,0.012667,-0.000178,0.000088,-0.029688,249.347305,156.099016,41641,671.000000,262.442443,112057,1.000494,0.000435,167.082500,0-32753
3827,0.002913,8.895445e-06,0.000213,0.001663,0.003266,0.000011,0.000239,0.002077,0.000237,0.000188,0.044629,0.000525,0.000244,0.098700,0.000202,0.000147,0.038039,-0.000083,0.000060,-0.015621,115.829787,105.146411,21776,343.734043,140.150429,64622,0.999552,0.000743,187.915689,0.000969,1.724415e-07,0.000155,0.000007,0.001231,0.000006,0.000197,0.000238,0.000258,0.000141,0.010303,0.000495,0.000110,0.019794,0.000135,0.000078,0.005391,-0.000098,0.000062,-0.003930,119.775000,81.662058,4791,367.475000,121.926625,14699,1.000347,0.000154,40.013864,0.001303,0.000007,0.000167,0.000413,0.001972,0.000004,0.000252,0.000238,0.000239,0.000139,0.014819,0.000470,0.000125,0.029162,0.000151,0.000090,0.009371,-0.000085,0.000055,-0.005240,120.048387,92.412374,7443,331.177419,122.523805,20533,1.000342,0.000188,62.021208,0.001511,8.775489e-06,0.000173,0.000676,0.002365,0.000007,0.000271,0.000564,0.000226,0.000158,0.017435,0.000470,0.000176,0.036216,0.000177,0.000138,0.013604,-0.000082,0.000050,-0.006299,109.246753,88.450872,8412,335.272727,117.648074,25816,1.000220,0.000318,77.016907,0.001849,1.283255e-05,0.000190,0.001219,0.002490,6.188400e-06,0.000257,0.000588,0.000204,0.000152,0.019406,0.000437,0.000177,0.041555,0.000180,0.000128,0.017080,-0.000077,0.000048,-0.007307,104.800000,84.542272,9956,326.378947,123.149034,31006,1.000217,0.000307,95.020621,0.001929,1.301021e-05,0.000190,0.001353,0.002646,0.000009,0.000261,0.000891,0.000216,0.000159,0.022432,0.000446,0.000177,0.046396,0.000191,0.000133,0.019901,-0.000075,0.000046,-0.007760,105.432692,82.634975,10965,326.759615,124.014444,33983,1.000111,0.000454,104.011571,0.001962,1.158121e-05,0.000186,0.001297,0.002826,1.389242e-05,0.000268,0.001556,0.000222,0.000159,0.024906,0.000453,0.000174,0.050681,0.000206,0.000139,0.023075,-0.000073,0.000045,-0.008164,107.607143,82.238541,12052,320.142857,123.500874,35856,1.000036,0.000517,112.004000,0.001963,1.191725e-05,0.000183,0.001382,0.002842,0.000011,0.000265,0.001253,0.000223,0.000157,0.025847,0.000458,0.000173,0.053102,0.000213,0.000141,0.024688,-0.000072,0.000044,-0.008365,111.068966,84.957455,12884,318.155172,123.184740,36906,1.000001,0.000540,116.000109,0.002404,0.000006,0.000209,0.000799,0.003006,1.163567e-05,0.000261,0.001548,0.000244,0.000189,0.032494,0.000515,0.000234,0.068435,0.000215,0.000150,0.028567,-0.000084,0.000066,-0.011187,103.022556,84.414783,13702,313.142857,129.148560,41648,0.999855,0.000646,132.980752,0.002633,0.000014,0.000221,0.001972,0.003124,1.421911e-05,0.000262,0.002033,0.000254,0.000201,0.036384,0.000544,0.000251,0.077764,0.000211,0.000152,0.030230,-0.000082,0.000065,-0.011741,108.370629,100.688033,15497,321.027972,139.420225,45907,0.999814,0.000648,142.973458,0-32758
3828,0.003046,1.698933e-06,0.000174,0.000520,0.005105,0.000002,0.000292,0.000614,0.000245,0.000187,0.075224,0.000480,0.000145,0.147444,0.000113,0.000104,0.034710,-0.000166,0.000149,-0.050912,132.074919,95.735325,40547,385.429967,140.552333,118327,1.002357,0.000356,307.723687,0.001212,-1.107497e-05,0.000173,-0.000554,0.001893,-0.000010,0.000270,-0.000514,0.000239,0.000150,0.011932,0.000493,0.000109,0.024656,0.000111,0.000094,0.005527,-0.000175,0.000185,-0.008751,125.000000,94.481399,6250,410.000000,184.443122,20500,1.002203,0.000235,50.110131,0.001413,-0.000003,0.000163,-0.000225,0.002180,0.000004,0.000252,0.000300,0.000202,0.000150,0.015376,0.000519,0.000124,0.039426,0.000096,0.000082,0.007306,-0.000147,0.000169,-0.011154,121.394737,95.503589,9226,451.684211,168.076904,34328,1.002322,0.000278,76.176449,0.001617,-1.754265e-06,0.000160,-0.000181,0.002919,-0.000002,0.000289,-0.000246,0.000242,0.000165,0.024949,0.000503,0.000114,0.051850,0.000104,0.000078,0.010672,-0.000169,0.000172,-0.017453,115.728155,89.854042,11920,435.165049,165.821490,44822,1.002305,0.000249,103.237365,0.001970,5.536438e-07,0.000173,0.000072,0.003538,3.394218e-06,0.000311,0.000441,0.000264,0.000186,0.034375,0.000528,0.000132,0.068685,0.000098,0.000074,0.012740,-0.000179,0.000163,-0.023269,120.169231,90.822012,15622,416.830769,158.351984,54188,1.002312,0.000249,130.300567,0.002137,-2.744205e-06,0.000168,-0.000447,0.003934,-0.000003,0.000309,-0.000427,0.000269,0.000182,0.043832,0.000516,0.000124,0.084086,0.000096,0.000069,0.015576,-0.000175,0.000156,-0.028557,123.423313,88.622998,20118,394.588957,154.066273,64318,1.002277,0.000239,163.371123,0.002453,-6.832444e-07,0.000180,-0.000127,0.004279,-1.075748e-07,0.000315,-0.000020,0.000273,0.000197,0.050779,0.000526,0.000136,0.097855,0.000095,0.000066,0.017596,-0.000163,0.000151,-0.030240,123.564516,90.070595,22983,393.112903,151.498615,73119,1.002256,0.000250,186.419575,0.002550,-3.105771e-07,0.000178,-0.000064,0.004430,0.000001,0.000309,0.000244,0.000269,0.000193,0.055414,0.000523,0.000135,0.107737,0.000093,0.000063,0.019230,-0.000153,0.000147,-0.031538,118.563107,89.135979,24424,387.029126,147.183142,79728,1.002256,0.000248,206.464721,0.002645,-0.000003,0.000179,-0.000576,0.004526,-1.276322e-06,0.000307,-0.000280,0.000267,0.000190,0.058440,0.000518,0.000139,0.113443,0.000095,0.000070,0.020769,-0.000148,0.000145,-0.032307,120.972603,90.168594,26493,389.273973,146.309737,85251,1.002281,0.000270,219.499522,0.002712,-0.000002,0.000173,-0.000597,0.004583,-2.902686e-06,0.000292,-0.000717,0.000254,0.000184,0.062746,0.000494,0.000148,0.122121,0.000096,0.000072,0.023701,-0.000149,0.000138,-0.036682,130.538462,97.623830,32243,388.846154,142.010537,96045,1.002337,0.000302,247.577120,0-32763


In [46]:
trade_train = pd.read_parquet(data_dir + "trade_train.parquet/stock_id=0")
trade_train.head(15)

Unnamed: 0,time_id,seconds_in_bucket,price,size,order_count
0,5,21,1.002301,326,12
1,5,46,1.002778,128,4
2,5,50,1.002818,55,1
3,5,57,1.003155,121,5
4,5,68,1.003646,4,1
5,5,78,1.003762,134,5
6,5,122,1.004207,102,3
7,5,127,1.004577,1,1
8,5,144,1.00437,6,1
9,5,147,1.003964,233,4


# trade data 前処理

In [47]:
def preprocessor_trade(file_path):
    df = pd.read_parquet(file_path)
    df['log_return'] = df.groupby('time_id')['price'].apply(log_return)
    
    aggregate_dictionary = {
        'log_return':[realized_volatility],
        'seconds_in_bucket':[count_unique],
        'size':[np.sum],
        'order_count':[np.mean],
    }
    
    df_feature = df.groupby('time_id').agg(aggregate_dictionary)
    df_feature = df_feature.reset_index()
    df_feature.columns = ['_'.join(col) for col in df_feature.columns]
    
    last_seconds = [100, 150, 200, 250, 300, 350, 400, 450, 500]
    
    for second in last_seconds:
        second = 600 - second
        
        df_feature_sec = pd.DataFrame(df[df['seconds_in_bucket'] >= second].groupby('time_id').agg(aggregate_dictionary)).reset_index()
        df_feature_sec.columns = ['_'.join(col) for col in df_feature_sec.columns]
        df_feature_sec = df_feature_sec.add_suffix('_' + str(second))
        
        df_feature = pd.merge(df_feature, df_feature_sec, how='left', left_on='time_id_', right_on=f'time_id__{second}')
        df_feature = df_feature.drop([f'time_id__{second}'], axis=1)
    
    df_feature = df_feature.add_prefix('trade_')
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['trade_time_id_'].apply(lambda x:f'{stock_id}-{x}')
    df_feature = df_feature.drop(['trade_time_id_'], axis=1)
    
    return df_feature

In [48]:
%%time
file_path = data_dir + "trade_train.parquet/stock_id=0"
preprocessor_trade(file_path)

Wall time: 6.33 s


Unnamed: 0,trade_log_return_realized_volatility,trade_seconds_in_bucket_count_unique,trade_size_sum,trade_order_count_mean,trade_log_return_realized_volatility_500,trade_seconds_in_bucket_count_unique_500,trade_size_sum_500,trade_order_count_mean_500,trade_log_return_realized_volatility_450,trade_seconds_in_bucket_count_unique_450,trade_size_sum_450,trade_order_count_mean_450,trade_log_return_realized_volatility_400,trade_seconds_in_bucket_count_unique_400,trade_size_sum_400,trade_order_count_mean_400,trade_log_return_realized_volatility_350,trade_seconds_in_bucket_count_unique_350,trade_size_sum_350,trade_order_count_mean_350,trade_log_return_realized_volatility_300,trade_seconds_in_bucket_count_unique_300,trade_size_sum_300,trade_order_count_mean_300,trade_log_return_realized_volatility_250,trade_seconds_in_bucket_count_unique_250,trade_size_sum_250,trade_order_count_mean_250,trade_log_return_realized_volatility_200,trade_seconds_in_bucket_count_unique_200,trade_size_sum_200,trade_order_count_mean_200,trade_log_return_realized_volatility_150,trade_seconds_in_bucket_count_unique_150,trade_size_sum_150,trade_order_count_mean_150,trade_log_return_realized_volatility_100,trade_seconds_in_bucket_count_unique_100,trade_size_sum_100,trade_order_count_mean_100,row_id
0,0.002006,40,3179,2.750000,0.000959,10.0,737.0,2.600000,0.001060,14.0,1042.0,2.642857,0.001121,16.0,1045.0,2.437500,0.001271,20.0,1584.0,2.600000,0.001308,21.0,1587.0,2.571429,0.001372,25.0,1796.0,2.400000,0.001666,27.0,1901.0,2.555556,0.001701,30.0,2069.0,2.433333,0.001852,34,2411,2.411765,0-5
1,0.000901,30,1289,1.900000,0.000451,7.0,546.0,2.000000,0.000501,10.0,828.0,2.200000,0.000510,11.0,829.0,2.090909,0.000557,14.0,873.0,2.142857,0.000587,16.0,900.0,2.250000,0.000755,20.0,1119.0,2.100000,0.000802,22.0,1124.0,2.045455,0.000813,24.0,1173.0,2.041667,0.000819,25,1174,2.000000,0-11
2,0.001961,25,2161,2.720000,0.000723,4.0,661.0,3.500000,0.001048,9.0,1085.0,3.666667,0.001048,10.0,1087.0,3.400000,0.001137,12.0,1189.0,3.166667,0.001137,12.0,1189.0,3.166667,0.001515,15.0,1482.0,3.000000,0.001575,18.0,1691.0,2.833333,0.001621,20.0,2010.0,2.950000,0.001875,23,2032,2.739130,0-16
3,0.001561,15,1962,3.933333,0.000327,2.0,509.0,5.000000,0.000802,3.0,514.0,3.666667,0.000802,3.0,514.0,3.666667,0.001050,8.0,1301.0,3.875000,0.001089,9.0,1556.0,5.111111,0.001090,10.0,1561.0,4.700000,0.001090,10.0,1561.0,4.700000,0.001401,11.0,1631.0,4.545455,0.001561,13,1933,4.384615,0-31
4,0.000871,22,1791,4.045455,0.000348,3.0,40.0,4.000000,0.000360,4.0,43.0,3.500000,0.000395,6.0,162.0,3.666667,0.000452,10.0,1216.0,5.200000,0.000453,11.0,1219.0,4.909091,0.000493,12.0,1451.0,4.916667,0.000498,14.0,1458.0,4.428571,0.000550,16.0,1570.0,4.500000,0.000551,18,1574,4.166667,0-62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3825,0.001519,52,3450,3.057692,0.000672,9.0,872.0,2.777778,0.000786,19.0,1159.0,2.947368,0.000911,28.0,1856.0,3.142857,0.001161,34.0,2255.0,3.205882,0.001162,35.0,2365.0,3.257143,0.001163,37.0,2396.0,3.243243,0.001257,39.0,2407.0,3.128205,0.001409,42.0,2957.0,3.238095,0.001448,45,2969,3.111111,0-32751
3826,0.001411,28,4547,3.892857,0.000268,2.0,201.0,2.000000,0.000750,5.0,1158.0,4.600000,0.000765,6.0,1401.0,5.166667,0.001035,9.0,2148.0,5.111111,0.001066,12.0,2161.0,4.250000,0.001204,15.0,2167.0,3.666667,0.001235,18.0,2493.0,3.555556,0.001284,19.0,2494.0,3.421053,0.001342,22,3285,3.454545,0-32753
3827,0.001521,36,4250,3.500000,0.000349,2.0,15.0,2.000000,0.000780,8.0,416.0,2.000000,0.000875,13.0,1149.0,2.692308,0.001138,20.0,1723.0,2.900000,0.001242,22.0,2294.0,3.727273,0.001243,23.0,2295.0,3.608696,0.001243,23.0,2295.0,3.608696,0.001375,27.0,2736.0,3.444444,0.001398,28,3337,3.607143,0-32758
3828,0.001794,53,3217,2.150943,0.000601,7.0,932.0,2.857143,0.001012,12.0,1415.0,2.666667,0.001070,16.0,1463.0,2.312500,0.001371,21.0,1602.0,2.047619,0.001404,25.0,1627.0,1.920000,0.001435,28.0,2117.0,2.107143,0.001622,33.0,2171.0,2.030303,0.001650,36.0,2296.0,2.055556,0.001686,41,2422,2.097561,0-32763


# 前処理データ結合関数

In [49]:
def preprocessor(list_stock_ids, is_train = True):
    from joblib import Parallel, delayed #並列処理
    df = pd.DataFrame()
    
    def for_joblib(stock_id):
        if is_train:
            file_path_book = data_dir + "book_train.parquet/stock_id=" + str(stock_id)
            file_path_trade = data_dir + "trade_train.parquet/stock_id=" + str(stock_id) 
        else:
            file_path_book = data_dir + "book_test.parquet/stock_id=" + str(stock_id)
            file_path_trade = data_dir + "trade_test.parquet/stock_id=" + str(stock_id)  
            
        df_tmp = pd.merge(preprocessor_book(file_path_book), preprocessor_trade(file_path_trade), on='row_id', how='left')
        
        return pd.concat([df, df_tmp])
    
    df = Parallel(n_jobs=-1, verbose=1)(
        delayed(for_joblib)(stock_id) for stock_id in list_stock_ids
    )
    
    df = pd.concat(df, ignore_index=True)
    return df

In [50]:
list_stock_ids = [0, 1]
preprocessor(list_stock_ids, is_train=True)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:   22.8s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:   22.8s finished


Unnamed: 0,log_return_realized_volatility,log_return_mean,log_return_std,log_return_sum,log_return2_realized_volatility,log_return2_mean,log_return2_std,log_return2_sum,wap_balance_mean,wap_balance_std,wap_balance_sum,price_spread_mean,price_spread_std,price_spread_sum,bid_spread_mean,bid_spread_std,bid_spread_sum,ask_spread_mean,ask_spread_std,ask_spread_sum,volume_imbalance_mean,volume_imbalance_std,volume_imbalance_sum,total_volume_mean,total_volume_std,total_volume_sum,wap_mean,wap_std,wap_sum,log_return_realized_volatility_500,log_return_mean_500,log_return_std_500,log_return_sum_500,log_return2_realized_volatility_500,log_return2_mean_500,log_return2_std_500,log_return2_sum_500,wap_balance_mean_500,wap_balance_std_500,wap_balance_sum_500,price_spread_mean_500,price_spread_std_500,price_spread_sum_500,bid_spread_mean_500,bid_spread_std_500,bid_spread_sum_500,ask_spread_mean_500,ask_spread_std_500,ask_spread_sum_500,volume_imbalance_mean_500,volume_imbalance_std_500,volume_imbalance_sum_500,total_volume_mean_500,total_volume_std_500,total_volume_sum_500,wap_mean_500,wap_std_500,wap_sum_500,log_return_realized_volatility_450,log_return_mean_450,log_return_std_450,log_return_sum_450,log_return2_realized_volatility_450,log_return2_mean_450,log_return2_std_450,log_return2_sum_450,wap_balance_mean_450,wap_balance_std_450,wap_balance_sum_450,price_spread_mean_450,price_spread_std_450,price_spread_sum_450,bid_spread_mean_450,bid_spread_std_450,bid_spread_sum_450,ask_spread_mean_450,ask_spread_std_450,ask_spread_sum_450,volume_imbalance_mean_450,volume_imbalance_std_450,volume_imbalance_sum_450,total_volume_mean_450,total_volume_std_450,total_volume_sum_450,wap_mean_450,wap_std_450,wap_sum_450,log_return_realized_volatility_400,log_return_mean_400,log_return_std_400,log_return_sum_400,log_return2_realized_volatility_400,log_return2_mean_400,log_return2_std_400,log_return2_sum_400,wap_balance_mean_400,wap_balance_std_400,wap_balance_sum_400,price_spread_mean_400,price_spread_std_400,price_spread_sum_400,bid_spread_mean_400,bid_spread_std_400,bid_spread_sum_400,ask_spread_mean_400,ask_spread_std_400,ask_spread_sum_400,volume_imbalance_mean_400,volume_imbalance_std_400,volume_imbalance_sum_400,total_volume_mean_400,total_volume_std_400,total_volume_sum_400,wap_mean_400,wap_std_400,wap_sum_400,log_return_realized_volatility_350,log_return_mean_350,log_return_std_350,log_return_sum_350,log_return2_realized_volatility_350,log_return2_mean_350,log_return2_std_350,log_return2_sum_350,wap_balance_mean_350,wap_balance_std_350,wap_balance_sum_350,price_spread_mean_350,price_spread_std_350,price_spread_sum_350,bid_spread_mean_350,bid_spread_std_350,bid_spread_sum_350,ask_spread_mean_350,ask_spread_std_350,ask_spread_sum_350,volume_imbalance_mean_350,volume_imbalance_std_350,volume_imbalance_sum_350,total_volume_mean_350,total_volume_std_350,total_volume_sum_350,wap_mean_350,wap_std_350,wap_sum_350,log_return_realized_volatility_300,log_return_mean_300,log_return_std_300,log_return_sum_300,log_return2_realized_volatility_300,...,log_return2_sum_250,wap_balance_mean_250,wap_balance_std_250,wap_balance_sum_250,price_spread_mean_250,price_spread_std_250,price_spread_sum_250,bid_spread_mean_250,bid_spread_std_250,bid_spread_sum_250,ask_spread_mean_250,ask_spread_std_250,ask_spread_sum_250,volume_imbalance_mean_250,volume_imbalance_std_250,volume_imbalance_sum_250,total_volume_mean_250,total_volume_std_250,total_volume_sum_250,wap_mean_250,wap_std_250,wap_sum_250,log_return_realized_volatility_200,log_return_mean_200,log_return_std_200,log_return_sum_200,log_return2_realized_volatility_200,log_return2_mean_200,log_return2_std_200,log_return2_sum_200,wap_balance_mean_200,wap_balance_std_200,wap_balance_sum_200,price_spread_mean_200,price_spread_std_200,price_spread_sum_200,bid_spread_mean_200,bid_spread_std_200,bid_spread_sum_200,ask_spread_mean_200,ask_spread_std_200,ask_spread_sum_200,volume_imbalance_mean_200,volume_imbalance_std_200,volume_imbalance_sum_200,total_volume_mean_200,total_volume_std_200,total_volume_sum_200,wap_mean_200,wap_std_200,wap_sum_200,log_return_realized_volatility_150,log_return_mean_150,log_return_std_150,log_return_sum_150,log_return2_realized_volatility_150,log_return2_mean_150,log_return2_std_150,log_return2_sum_150,wap_balance_mean_150,wap_balance_std_150,wap_balance_sum_150,price_spread_mean_150,price_spread_std_150,price_spread_sum_150,bid_spread_mean_150,bid_spread_std_150,bid_spread_sum_150,ask_spread_mean_150,ask_spread_std_150,ask_spread_sum_150,volume_imbalance_mean_150,volume_imbalance_std_150,volume_imbalance_sum_150,total_volume_mean_150,total_volume_std_150,total_volume_sum_150,wap_mean_150,wap_std_150,wap_sum_150,log_return_realized_volatility_100,log_return_mean_100,log_return_std_100,log_return_sum_100,log_return2_realized_volatility_100,log_return2_mean_100,log_return2_std_100,log_return2_sum_100,wap_balance_mean_100,wap_balance_std_100,wap_balance_sum_100,price_spread_mean_100,price_spread_std_100,price_spread_sum_100,bid_spread_mean_100,bid_spread_std_100,bid_spread_sum_100,ask_spread_mean_100,ask_spread_std_100,ask_spread_sum_100,volume_imbalance_mean_100,volume_imbalance_std_100,volume_imbalance_sum_100,total_volume_mean_100,total_volume_std_100,total_volume_sum_100,wap_mean_100,wap_std_100,wap_sum_100,row_id,trade_log_return_realized_volatility,trade_seconds_in_bucket_count_unique,trade_size_sum,trade_order_count_mean,trade_log_return_realized_volatility_500,trade_seconds_in_bucket_count_unique_500,trade_size_sum_500,trade_order_count_mean_500,trade_log_return_realized_volatility_450,trade_seconds_in_bucket_count_unique_450,trade_size_sum_450,trade_order_count_mean_450,trade_log_return_realized_volatility_400,trade_seconds_in_bucket_count_unique_400,trade_size_sum_400,trade_order_count_mean_400,trade_log_return_realized_volatility_350,trade_seconds_in_bucket_count_unique_350,trade_size_sum_350,trade_order_count_mean_350,trade_log_return_realized_volatility_300,trade_seconds_in_bucket_count_unique_300,trade_size_sum_300,trade_order_count_mean_300,trade_log_return_realized_volatility_250,trade_seconds_in_bucket_count_unique_250,trade_size_sum_250,trade_order_count_mean_250,trade_log_return_realized_volatility_200,trade_seconds_in_bucket_count_unique_200,trade_size_sum_200,trade_order_count_mean_200,trade_log_return_realized_volatility_150,trade_seconds_in_bucket_count_unique_150,trade_size_sum_150,trade_order_count_mean_150,trade_log_return_realized_volatility_100,trade_seconds_in_bucket_count_unique_100,trade_size_sum_100,trade_order_count_mean_100
0,0.004499,7.613599e-06,0.000260,0.002292,0.006999,0.000008,0.000404,0.002325,0.000388,0.000295,0.117051,0.000852,0.000211,0.257255,0.000176,0.000162,0.053006,-0.000151,0.000126,-0.045557,134.894040,107.260583,40738,323.496689,138.101214,97696,1.003725,0.000693,303.125061,0.001459,-5.253940e-06,0.000208,-0.000263,0.003018,-5.444991e-06,0.000431,-0.000272,0.000355,0.000247,0.017729,0.000781,0.000179,0.039065,0.000258,0.000158,0.012877,-0.000176,0.000131,-0.008792,144.820000,71.123833,7241,249.700000,100.559709,12485,1.003369,0.000546,50.168442,0.001721,-0.000005,0.000210,-0.000361,0.004114,0.000001,0.000503,0.000068,0.000366,0.000277,0.024868,0.000783,0.000181,0.053236,0.000262,0.000178,0.017790,-0.000166,0.000126,-0.011274,141.470588,84.467864,9620,263.941176,116.940077,17948,1.003482,0.000514,68.236749,0.002300,2.110692e-06,0.000234,0.000207,0.004589,-0.000008,0.000466,-0.000750,0.000390,0.000291,0.038194,0.000783,0.000181,0.076778,0.000214,0.000168,0.020996,-0.000191,0.000140,-0.018721,124.326531,82.090066,12184,262.489796,118.188932,25724,1.003633,0.000497,98.356007,0.002459,-2.722273e-07,0.000228,-0.000032,0.004700,0.000002,0.000436,0.000268,0.000390,0.000283,0.045605,0.000761,0.000196,0.089035,0.000237,0.000177,0.027719,-0.000179,0.000136,-0.020997,130.829060,88.045275,15307,266.538462,117.329887,31185,1.003710,0.000504,117.434121,0.002953,1.131529e-06,0.000251,0.000157,0.004863,...,-0.000327,0.000370,0.000271,0.059532,0.000844,0.000241,0.135856,0.000207,0.000171,0.033251,-0.000166,0.000129,-0.026736,146.745342,110.699327,23626,298.236025,134.937370,48016,1.003762,0.000464,161.605654,0.003402,-1.392656e-06,0.000246,-0.000269,0.005802,0.000001,0.000419,0.000260,0.000379,0.000278,0.073062,0.000865,0.000238,0.167016,0.000205,0.000174,0.039560,-0.000155,0.000124,-0.029837,134.772021,106.866300,26011,321.455959,151.899636,62041,1.003836,0.000466,193.740261,0.003796,0.000001,0.000250,0.000276,0.006087,1.295471e-08,0.000400,0.000003,0.000397,0.000281,0.091997,0.000858,0.000221,0.199058,0.000188,0.000165,0.043697,-0.000147,0.000120,-0.034024,123.586207,103.533216,28672,327.431034,142.761068,75964,1.003832,0.000445,232.888919,0.004041,-1.509896e-06,0.000250,-0.000396,0.006566,-3.291241e-07,0.000406,-0.000086,0.000404,0.000289,0.105931,0.000848,0.000214,0.222287,0.000184,0.000164,0.048196,-0.000151,0.000127,-0.039558,130.854962,107.857691,34284,332.167939,141.270190,87028,1.003875,0.000453,263.015170,0-5,0.002006,40,3179,2.750000,0.000959,10.0,737.0,2.600000,0.001060,14.0,1042.0,2.642857,0.001121,16.0,1045.0,2.437500,0.001271,20.0,1584.0,2.600000,0.001308,21.0,1587.0,2.571429,0.001372,25.0,1796.0,2.400000,0.001666,27.0,1901.0,2.555556,0.001701,30.0,2069.0,2.433333,0.001852,34,2411,2.411765
1,0.001204,1.810239e-06,0.000086,0.000360,0.002476,0.000004,0.000176,0.000801,0.000212,0.000155,0.042312,0.000394,0.000157,0.078836,0.000142,0.000148,0.028358,-0.000135,0.000065,-0.027001,142.050000,102.139758,28410,411.450000,172.263581,82290,1.000239,0.000262,200.047768,0.000857,-3.608732e-07,0.000143,-0.000013,0.001435,1.554044e-05,0.000239,0.000575,0.000277,0.000186,0.010265,0.000339,0.000163,0.012540,0.000296,0.000255,0.010942,-0.000148,0.000063,-0.005470,109.702703,97.631639,4059,425.810811,180.407846,15755,1.000610,0.000230,37.022563,0.000918,-0.000001,0.000126,-0.000059,0.001883,0.000009,0.000258,0.000488,0.000269,0.000175,0.014524,0.000348,0.000144,0.018812,0.000233,0.000239,0.012598,-0.000143,0.000066,-0.007729,97.685185,88.144569,5275,447.981481,177.264272,24191,1.000518,0.000235,54.027991,0.000934,8.459831e-07,0.000110,0.000062,0.001907,0.000003,0.000225,0.000237,0.000261,0.000170,0.019041,0.000367,0.000129,0.026790,0.000186,0.000220,0.013552,-0.000133,0.000063,-0.009736,96.136986,79.708203,7018,480.000000,167.075582,35040,1.000480,0.000217,73.035016,0.000976,6.782933e-07,0.000097,0.000069,0.001981,0.000001,0.000197,0.000136,0.000251,0.000161,0.025584,0.000366,0.000122,0.037378,0.000167,0.000191,0.017015,-0.000125,0.000060,-0.012748,129.774510,111.295068,13237,492.970588,173.353382,50283,1.000419,0.000210,102.042731,0.000981,8.383753e-07,0.000092,0.000096,0.002009,...,0.000661,0.000213,0.000164,0.028342,0.000340,0.000121,0.045255,0.000149,0.000172,0.019776,-0.000129,0.000058,-0.017164,135.090226,104.794348,17967,445.030075,189.128021,59189,1.000364,0.000211,133.048450,0.001014,2.180828e-06,0.000083,0.000331,0.002105,0.000006,0.000171,0.000950,0.000210,0.000165,0.031864,0.000348,0.000115,0.052886,0.000139,0.000163,0.021131,-0.000123,0.000059,-0.018770,151.407895,108.481656,23014,438.921053,178.623008,66716,1.000332,0.000215,152.050502,0.001058,0.000002,0.000081,0.000298,0.002262,5.044579e-06,0.000172,0.000873,0.000205,0.000158,0.035454,0.000353,0.000112,0.061017,0.000141,0.000154,0.024394,-0.000127,0.000058,-0.022032,151.566474,104.576846,26221,419.277457,178.652395,72535,1.000301,0.000221,173.052001,0.001140,2.997740e-06,0.000084,0.000561,0.002432,3.798911e-06,0.000178,0.000710,0.000213,0.000157,0.039800,0.000378,0.000143,0.070603,0.000146,0.000151,0.027304,-0.000129,0.000056,-0.024140,146.914439,102.961696,27473,420.112299,173.976587,78561,1.000264,0.000251,187.049275,0-11,0.000901,30,1289,1.900000,0.000451,7.0,546.0,2.000000,0.000501,10.0,828.0,2.200000,0.000510,11.0,829.0,2.090909,0.000557,14.0,873.0,2.142857,0.000587,16.0,900.0,2.250000,0.000755,20.0,1119.0,2.100000,0.000802,22.0,1124.0,2.045455,0.000813,24.0,1173.0,2.041667,0.000819,25,1174,2.000000
2,0.002369,-1.109201e-05,0.000173,-0.002074,0.004801,-0.000008,0.000352,-0.001493,0.000331,0.000246,0.062228,0.000725,0.000164,0.136330,0.000197,0.000170,0.036955,-0.000198,0.000171,-0.037243,141.414894,108.891243,26586,416.351064,138.433034,78274,0.999542,0.000864,187.913849,0.000640,-4.641993e-05,0.000143,-0.000882,0.002509,-2.604851e-05,0.000591,-0.000495,0.000403,0.000258,0.007657,0.000553,0.000092,0.010505,0.000335,0.000264,0.006367,-0.000247,0.000125,-0.004691,162.105263,131.846668,3080,450.421053,130.228140,8558,0.997783,0.000365,18.957882,0.001158,-0.000033,0.000173,-0.001469,0.002972,-0.000042,0.000451,-0.001831,0.000365,0.000282,0.016055,0.000605,0.000105,0.026608,0.000186,0.000217,0.008186,-0.000208,0.000168,-0.009143,156.113636,102.024670,6869,459.113636,116.212559,20201,0.998237,0.000541,43.922425,0.001179,-3.326700e-05,0.000163,-0.001697,0.003034,-0.000046,0.000427,-0.002332,0.000411,0.000299,0.020971,0.000625,0.000120,0.031877,0.000167,0.000207,0.008521,-0.000204,0.000164,-0.010387,152.509804,100.093231,7778,454.000000,115.120632,23154,0.998356,0.000586,50.916172,0.001284,-3.679629e-05,0.000159,-0.002318,0.003195,-0.000025,0.000405,-0.001597,0.000418,0.000300,0.026317,0.000672,0.000155,0.042318,0.000145,0.000192,0.009143,-0.000223,0.000173,-0.014073,149.507937,102.797777,9419,468.015873,114.869981,29485,0.998576,0.000700,62.910286,0.001295,-3.810560e-05,0.000153,-0.002591,0.003196,...,-0.002286,0.000418,0.000279,0.034313,0.000713,0.000174,0.058453,0.000141,0.000172,0.011585,-0.000268,0.000211,-0.021971,129.231707,100.552707,10597,426.475610,135.297042,34971,0.998841,0.000803,81.904967,0.001940,-2.217922e-05,0.000193,-0.002240,0.003900,-0.000024,0.000389,-0.002434,0.000396,0.000286,0.039989,0.000683,0.000174,0.068989,0.000146,0.000158,0.014792,-0.000263,0.000201,-0.026519,143.514851,116.258558,14495,440.544554,138.584092,44495,0.998944,0.000757,100.893294,0.002138,-0.000024,0.000195,-0.002854,0.004019,-2.509050e-05,0.000369,-0.002986,0.000373,0.000276,0.044347,0.000679,0.000163,0.080811,0.000161,0.000155,0.019100,-0.000241,0.000195,-0.028626,132.084034,114.924631,15718,428.537815,135.376048,50996,0.999126,0.000829,118.896016,0.002205,-1.872567e-05,0.000183,-0.002715,0.004106,-1.841958e-05,0.000342,-0.002671,0.000345,0.000260,0.050096,0.000680,0.000149,0.098615,0.000162,0.000154,0.023552,-0.000212,0.000187,-0.030732,133.337931,106.949574,19334,400.620690,140.906641,58090,0.999359,0.000902,144.907030,0-16,0.001961,25,2161,2.720000,0.000723,4.0,661.0,3.500000,0.001048,9.0,1085.0,3.666667,0.001048,10.0,1087.0,3.400000,0.001137,12.0,1189.0,3.166667,0.001137,12.0,1189.0,3.166667,0.001515,15.0,1482.0,3.000000,0.001575,18.0,1691.0,2.833333,0.001621,20.0,2010.0,2.950000,0.001875,23,2032,2.739130
3,0.002574,-2.376661e-05,0.000236,-0.002828,0.003637,-0.000017,0.000334,-0.002053,0.000380,0.000248,0.045611,0.000860,0.000280,0.103252,0.000190,0.000199,0.022764,-0.000108,0.000091,-0.013001,146.216667,121.533215,17546,435.266667,156.120334,52232,0.998832,0.000757,119.859781,0.000987,-5.288889e-05,0.000292,-0.000635,0.001360,-7.138177e-05,0.000403,-0.000857,0.000202,0.000091,0.002419,0.001062,0.000090,0.012745,0.000131,0.000048,0.001573,-0.000050,0.000013,-0.000601,178.333333,112.091954,2140,539.166667,176.489934,6470,0.998074,0.000534,11.976885,0.000993,-0.000029,0.000239,-0.000526,0.001424,-0.000049,0.000342,-0.000882,0.000358,0.000253,0.006441,0.001058,0.000074,0.019047,0.000116,0.000046,0.002082,-0.000049,0.000011,-0.000879,146.000000,106.693624,2628,540.000000,153.413704,9720,0.998079,0.000430,17.965415,0.001003,-3.119674e-05,0.000211,-0.000718,0.001513,-0.000025,0.000322,-0.000574,0.000350,0.000250,0.008060,0.001050,0.000067,0.024143,0.000155,0.000147,0.003563,-0.000048,0.000010,-0.001111,153.826087,95.507569,3538,498.956522,156.965682,11476,0.998079,0.000379,22.955812,0.001776,-2.638866e-05,0.000269,-0.001161,0.002652,-0.000009,0.000404,-0.000377,0.000293,0.000225,0.012904,0.000881,0.000281,0.038784,0.000177,0.000176,0.007773,-0.000079,0.000064,-0.003470,161.340909,102.690763,7099,437.386364,151.999574,19245,0.998391,0.000543,43.929225,0.001776,-2.224226e-05,0.000245,-0.001179,0.002713,...,-0.000574,0.000353,0.000228,0.022211,0.000824,0.000260,0.051898,0.000141,0.000157,0.008884,-0.000109,0.000089,-0.006848,133.079365,99.939224,8384,415.269841,141.298039,26162,0.998474,0.000472,62.903835,0.001855,-1.255555e-05,0.000225,-0.000866,0.002880,-0.000007,0.000349,-0.000456,0.000339,0.000225,0.023407,0.000856,0.000269,0.059033,0.000133,0.000152,0.009162,-0.000114,0.000088,-0.007865,137.217391,101.947632,9468,424.782609,140.057013,29310,0.998472,0.000452,68.894597,0.002196,-0.000016,0.000245,-0.001290,0.003273,-1.372564e-05,0.000366,-0.001112,0.000362,0.000247,0.029323,0.000920,0.000296,0.074552,0.000170,0.000191,0.013789,-0.000108,0.000085,-0.008745,151.765432,124.293028,12293,424.234568,156.628404,34363,0.998464,0.000432,80.875601,0.002552,-2.451326e-05,0.000251,-0.002525,0.003580,-2.040528e-05,0.000354,-0.002102,0.000363,0.000258,0.037411,0.000900,0.000283,0.092658,0.000213,0.000206,0.021932,-0.000114,0.000097,-0.011706,155.038835,124.004263,15969,434.048544,158.774929,44707,0.998587,0.000491,102.854482,0-31,0.001561,15,1962,3.933333,0.000327,2.0,509.0,5.000000,0.000802,3.0,514.0,3.666667,0.000802,3.0,514.0,3.666667,0.001050,8.0,1301.0,3.875000,0.001089,9.0,1556.0,5.111111,0.001090,10.0,1561.0,4.700000,0.001090,10.0,1561.0,4.700000,0.001401,11.0,1631.0,4.545455,0.001561,13,1933,4.384615
4,0.001894,-1.057099e-08,0.000144,-0.000002,0.003257,-0.000002,0.000247,-0.000281,0.000254,0.000188,0.044783,0.000397,0.000130,0.069901,0.000191,0.000083,0.033565,-0.000109,0.000076,-0.019206,123.846591,102.407501,21797,343.221591,158.054066,60407,0.999619,0.000258,175.932865,0.001124,3.369865e-05,0.000279,0.000573,0.000780,-1.017439e-05,0.000195,-0.000173,0.000360,0.000175,0.006128,0.000414,0.000072,0.007042,0.000195,0.000018,0.003310,-0.000140,0.000066,-0.002377,108.882353,112.990532,1851,349.352941,147.725227,5939,0.999454,0.000307,16.990718,0.001378,0.000011,0.000233,0.000397,0.000966,-0.000008,0.000163,-0.000298,0.000364,0.000203,0.013087,0.000519,0.000138,0.018700,0.000196,0.000024,0.007040,-0.000136,0.000066,-0.004895,117.000000,99.328028,4212,391.944444,123.180227,14110,0.999518,0.000257,35.982653,0.001435,1.431020e-05,0.000195,0.000787,0.001516,0.000008,0.000206,0.000433,0.000298,0.000208,0.016410,0.000469,0.000143,0.025789,0.000209,0.000071,0.011515,-0.000126,0.000083,-0.006946,85.618182,93.493413,4709,339.945455,129.296590,18697,0.999473,0.000243,54.971001,0.001512,5.518365e-06,0.000178,0.000403,0.001832,-0.000003,0.000216,-0.000239,0.000247,0.000204,0.018038,0.000435,0.000150,0.031759,0.000204,0.000062,0.014872,-0.000123,0.000077,-0.008997,103.356164,97.637420,7545,373.465753,152.658759,27263,0.999463,0.000216,72.960818,0.001520,7.249930e-06,0.000162,0.000645,0.002188,...,-0.000279,0.000252,0.000188,0.024920,0.000425,0.000142,0.042110,0.000184,0.000076,0.018182,-0.000116,0.000074,-0.011514,106.767677,97.226417,10570,407.212121,163.304540,40314,0.999513,0.000218,98.951821,0.001571,2.401551e-06,0.000148,0.000274,0.002461,-0.000003,0.000231,-0.000310,0.000230,0.000186,0.026236,0.000414,0.000137,0.047192,0.000174,0.000076,0.019859,-0.000108,0.000073,-0.012260,116.956140,102.595726,13333,388.394737,164.649352,44277,0.999575,0.000259,113.951545,0.001609,0.000004,0.000139,0.000491,0.002927,2.213193e-06,0.000253,0.000299,0.000242,0.000193,0.032718,0.000395,0.000137,0.053347,0.000187,0.000088,0.025220,-0.000117,0.000080,-0.015757,131.474074,109.275622,17749,371.266667,162.610706,50121,0.999618,0.000259,134.948413,0.001617,3.553627e-06,0.000132,0.000533,0.002975,4.033350e-06,0.000244,0.000605,0.000248,0.000187,0.037210,0.000389,0.000133,0.058384,0.000185,0.000086,0.027691,-0.000112,0.000079,-0.016736,126.686667,105.347118,19003,364.100000,156.327343,54615,0.999611,0.000246,149.941716,0-62,0.000871,22,1791,4.045455,0.000348,3.0,40.0,4.000000,0.000360,4.0,43.0,3.500000,0.000395,6.0,162.0,3.666667,0.000452,10.0,1216.0,5.200000,0.000453,11.0,1219.0,4.909091,0.000493,12.0,1451.0,4.916667,0.000498,14.0,1458.0,4.428571,0.000550,16.0,1570.0,4.500000,0.000551,18,1574,4.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7655,0.003723,-3.007246e-06,0.000213,-0.000920,0.004996,-0.000002,0.000286,-0.000669,0.000330,0.000262,0.101194,0.000597,0.000154,0.183425,0.000157,0.000118,0.048230,-0.000118,0.000078,-0.036274,125.013029,100.510754,38379,296.185668,136.093134,90929,1.000142,0.000396,307.043621,0.001061,-1.551967e-05,0.000167,-0.000636,0.001577,-7.713759e-07,0.000249,-0.000032,0.000223,0.000164,0.009149,0.000586,0.000127,0.024043,0.000136,0.000065,0.005575,-0.000105,0.000058,-0.004299,179.585366,88.411248,7363,333.439024,115.251041,13671,1.000057,0.000266,41.002339,0.001206,-0.000011,0.000164,-0.000593,0.001719,-0.000007,0.000234,-0.000369,0.000238,0.000169,0.013090,0.000584,0.000143,0.032100,0.000162,0.000094,0.008933,-0.000110,0.000069,-0.006046,144.818182,100.028716,7965,327.436364,103.552950,18009,1.000162,0.000301,55.008899,0.001584,-8.637874e-06,0.000177,-0.000700,0.001957,0.000002,0.000219,0.000152,0.000268,0.000185,0.021747,0.000605,0.000147,0.049028,0.000169,0.000101,0.013703,-0.000100,0.000060,-0.008128,132.111111,96.108532,10701,359.691358,109.037682,29135,1.000157,0.000269,81.012679,0.002076,-9.910199e-06,0.000202,-0.001050,0.002841,-0.000004,0.000277,-0.000470,0.000288,0.000204,0.030480,0.000630,0.000153,0.066762,0.000179,0.000110,0.018942,-0.000108,0.000070,-0.011488,128.528302,100.405768,13624,353.943396,123.634933,37518,1.000119,0.000300,106.012576,0.002212,1.521659e-06,0.000195,0.000198,0.002954,...,-0.000291,0.000293,0.000218,0.047805,0.000635,0.000175,0.103569,0.000163,0.000119,0.026600,-0.000108,0.000068,-0.017600,120.398773,93.264627,19625,304.018405,147.914326,49555,1.000079,0.000303,163.012945,0.002973,6.773769e-07,0.000212,0.000133,0.003814,0.000002,0.000272,0.000400,0.000277,0.000219,0.054587,0.000608,0.000178,0.119695,0.000152,0.000113,0.029959,-0.000121,0.000073,-0.023847,129.730964,102.989318,25557,291.416244,146.845328,57409,1.000000,0.000338,196.999918,0.003157,-0.000004,0.000205,-0.000934,0.004246,-3.584046e-06,0.000276,-0.000853,0.000326,0.000261,0.077587,0.000594,0.000168,0.141317,0.000164,0.000122,0.039027,-0.000115,0.000072,-0.027273,130.483193,101.720753,31055,299.264706,141.159639,71225,1.000092,0.000386,238.021882,0.003323,5.019906e-07,0.000204,0.000134,0.004750,-4.949621e-07,0.000292,-0.000132,0.000319,0.000260,0.084951,0.000588,0.000160,0.156496,0.000160,0.000124,0.042587,-0.000121,0.000082,-0.032176,133.338346,103.287610,35468,303.631579,139.650614,80766,1.000105,0.000383,266.027841,1-32751,0.001776,49,3249,2.775510,0.000753,9.0,392.0,3.222222,0.000890,12.0,803.0,3.333333,0.001028,16.0,1532.0,3.937500,0.001237,19.0,1837.0,3.894737,0.001280,23.0,1889.0,3.608696,0.001376,27.0,1902.0,3.296296,0.001462,31.0,2290.0,3.129032,0.001585,36.0,2693.0,3.027778,0.001655,41,2808,2.853659
7656,0.010829,2.896266e-05,0.000487,0.014279,0.012168,0.000029,0.000548,0.014143,0.000403,0.000333,0.199038,0.000922,0.000333,0.455691,0.000159,0.000120,0.078643,-0.000125,0.000087,-0.061925,254.006073,218.335158,125479,567.840081,299.637077,280513,1.007503,0.006260,497.706589,0.003772,-1.421354e-05,0.000419,-0.001166,0.004826,-3.104165e-05,0.000535,-0.002545,0.000418,0.000265,0.034253,0.000848,0.000206,0.069561,0.000258,0.000148,0.021176,-0.000116,0.000086,-0.009544,261.609756,224.107324,21452,537.609756,263.892784,44084,1.011380,0.001086,82.933142,0.004968,-0.000027,0.000453,-0.003253,0.006510,-0.000025,0.000594,-0.003019,0.000483,0.000311,0.058446,0.001004,0.000302,0.121513,0.000234,0.000142,0.028282,-0.000114,0.000086,-0.013792,247.347107,217.854222,29929,526.917355,274.752330,63757,1.011677,0.001062,122.412925,0.006736,-6.913655e-06,0.000534,-0.001106,0.007670,-0.000005,0.000608,-0.000732,0.000511,0.000351,0.081737,0.001110,0.000349,0.177596,0.000222,0.000133,0.035457,-0.000113,0.000084,-0.018041,235.212500,214.432097,37634,527.625000,265.685343,84420,1.011841,0.001038,161.894584,0.007866,-2.442783e-05,0.000560,-0.004837,0.009282,-0.000023,0.000661,-0.004537,0.000496,0.000351,0.098132,0.001105,0.000338,0.218791,0.000212,0.000139,0.042073,-0.000129,0.000103,-0.025634,229.429293,206.377045,45427,510.075758,264.564520,100995,1.012027,0.001304,200.381294,0.008499,-1.017934e-05,0.000549,-0.002453,0.009971,...,0.000215,0.000462,0.000340,0.129842,0.001072,0.000312,0.301263,0.000181,0.000131,0.050780,-0.000127,0.000093,-0.035596,223.918149,200.038863,62921,498.814947,254.176938,140167,1.012203,0.001377,284.429179,0.009946,8.471347e-06,0.000556,0.002719,0.011071,0.000009,0.000619,0.002804,0.000468,0.000367,0.150186,0.001037,0.000335,0.332876,0.000177,0.000132,0.056770,-0.000131,0.000093,-0.041933,229.679128,200.472925,73727,513.903427,263.518524,164963,1.011773,0.001836,324.779147,0.010290,0.000025,0.000540,0.009161,0.011509,2.479579e-05,0.000604,0.009026,0.000452,0.000354,0.164685,0.000989,0.000348,0.360153,0.000169,0.000126,0.061578,-0.000131,0.000092,-0.047715,234.881868,197.822345,85497,541.458791,275.282526,197091,1.010809,0.003205,367.934584,0.010436,2.709498e-05,0.000520,0.010919,0.011705,2.575951e-05,0.000583,0.010381,0.000434,0.000349,0.174927,0.000967,0.000340,0.389600,0.000165,0.000123,0.066314,-0.000130,0.000091,-0.052381,247.285360,210.785891,99656,553.980149,277.365322,223254,1.009805,0.004330,406.951325,1-32753,0.008492,183,75903,7.874317,0.002413,27.0,10656.0,6.444444,0.003531,39.0,13829.0,6.410256,0.003885,51.0,19713.0,6.862745,0.005431,72.0,26847.0,8.236111,0.006310,88.0,30858.0,8.136364,0.006871,105.0,38345.0,8.333333,0.007641,126.0,49265.0,8.293651,0.007915,145.0,62044.0,8.586207,0.008042,153,65460,8.411765
7657,0.003135,5.514784e-06,0.000178,0.001721,0.004268,0.000005,0.000242,0.001602,0.000243,0.000168,0.076213,0.000648,0.000150,0.202752,0.000141,0.000064,0.044025,-0.000132,0.000070,-0.041168,163.645367,116.099173,51221,426.603834,151.330709,133527,1.000854,0.000564,313.267395,0.001541,1.047280e-05,0.000202,0.000618,0.001900,4.614660e-06,0.000249,0.000272,0.000294,0.000177,0.017358,0.000621,0.000129,0.036658,0.000109,0.000041,0.006429,-0.000157,0.000077,-0.009286,137.440678,98.647277,8109,409.474576,157.400692,24159,1.001224,0.000276,59.072213,0.001710,0.000005,0.000200,0.000333,0.002064,0.000007,0.000241,0.000542,0.000310,0.000181,0.022921,0.000640,0.000129,0.047362,0.000125,0.000053,0.009286,-0.000153,0.000072,-0.011340,131.040541,93.007356,9697,448.175676,172.875706,33165,1.001191,0.000276,74.088114,0.001737,3.838176e-06,0.000181,0.000357,0.002177,0.000009,0.000227,0.000852,0.000279,0.000179,0.025982,0.000634,0.000122,0.058958,0.000132,0.000057,0.012232,-0.000146,0.000072,-0.013572,131.526882,89.459803,12232,433.182796,166.261446,40286,1.001216,0.000253,93.113057,0.001819,2.895893e-07,0.000164,0.000036,0.002576,0.000004,0.000232,0.000543,0.000267,0.000179,0.033141,0.000612,0.000130,0.075904,0.000148,0.000072,0.018396,-0.000147,0.000075,-0.018215,128.153226,91.891332,15891,424.346774,150.160946,52619,1.001248,0.000244,124.154799,0.002108,4.481081e-06,0.000173,0.000668,0.003184,...,0.000399,0.000270,0.000182,0.049688,0.000621,0.000155,0.114179,0.000149,0.000068,0.027504,-0.000139,0.000071,-0.025626,158.559783,121.331217,29175,431.505435,137.228938,79397,1.001226,0.000237,184.225642,0.002363,5.505807e-06,0.000161,0.001195,0.003635,0.000006,0.000247,0.001359,0.000258,0.000179,0.056083,0.000620,0.000148,0.134527,0.000143,0.000066,0.030987,-0.000138,0.000071,-0.030002,154.732719,115.692970,33577,404.843318,147.061033,87851,1.001174,0.000296,217.254683,0.002417,0.000005,0.000158,0.001251,0.003720,5.527394e-06,0.000244,0.001293,0.000255,0.000176,0.059604,0.000626,0.000146,0.146402,0.000145,0.000064,0.033845,-0.000138,0.000069,-0.032324,160.521368,115.695404,37562,404.965812,144.351171,94762,1.001122,0.000343,234.262478,0.002638,7.048599e-06,0.000163,0.001840,0.003806,6.635777e-06,0.000236,0.001732,0.000248,0.000172,0.064822,0.000632,0.000140,0.165065,0.000141,0.000064,0.036881,-0.000133,0.000067,-0.034737,159.084291,112.683289,41521,402.333333,145.181822,105009,1.001026,0.000435,261.267772,1-32758,0.001927,26,2239,2.615385,0.000987,4.0,387.0,2.750000,0.001134,5.0,487.0,2.400000,0.001355,7.0,654.0,2.714286,0.001480,9.0,855.0,2.444444,0.001567,11.0,980.0,2.727273,0.001622,15.0,1431.0,3.266667,0.001692,20.0,1734.0,2.700000,0.001804,21.0,1738.0,2.666667,0.001804,21,1738,2.666667
7658,0.003750,9.030349e-06,0.000180,0.003910,0.005773,0.000009,0.000278,0.003777,0.000199,0.000155,0.086215,0.000421,0.000201,0.182829,0.000190,0.000100,0.082293,-0.000231,0.000153,-0.100252,138.235023,110.170056,59994,526.317972,157.860523,228422,1.003032,0.001669,435.315910,0.001712,-1.158581e-05,0.000204,-0.000823,0.002597,-6.932860e-06,0.000310,-0.000492,0.000165,0.000120,0.011721,0.000380,0.000188,0.026960,0.000147,0.000061,0.010456,-0.000229,0.000159,-0.016236,173.126761,124.972332,12292,510.873239,164.203961,36272,1.003786,0.000355,71.268795,0.001906,-0.000011,0.000187,-0.001169,0.003578,-0.000013,0.000351,-0.001366,0.000183,0.000146,0.019256,0.000380,0.000168,0.039940,0.000166,0.000090,0.017467,-0.000216,0.000138,-0.022633,168.828571,118.741191,17727,539.476190,190.391807,56645,1.004019,0.000453,105.421962,0.002287,-3.279920e-06,0.000193,-0.000466,0.003912,-0.000002,0.000329,-0.000233,0.000209,0.000153,0.029612,0.000394,0.000165,0.055984,0.000166,0.000093,0.023617,-0.000235,0.000154,-0.033335,155.471831,112.824265,22077,526.260563,181.522547,74729,1.004099,0.000447,142.582034,0.002530,-1.032464e-05,0.000188,-0.001879,0.004197,-0.000010,0.000312,-0.001757,0.000196,0.000154,0.035646,0.000392,0.000159,0.071407,0.000162,0.000086,0.029399,-0.000241,0.000161,-0.043791,153.549451,109.904405,27946,516.351648,181.308039,93976,1.004251,0.000544,182.773591,0.002728,-3.615554e-07,0.000183,-0.000081,0.004435,...,0.000423,0.000198,0.000152,0.052124,0.000421,0.000178,0.110612,0.000174,0.000088,0.045882,-0.000241,0.000163,-0.063349,134.745247,106.219202,35438,524.920152,164.982665,138054,1.004173,0.000607,264.097496,0.003087,4.958993e-06,0.000179,0.001473,0.005227,0.000007,0.000304,0.002124,0.000200,0.000155,0.059342,0.000439,0.000208,0.130477,0.000180,0.000092,0.053509,-0.000239,0.000161,-0.070976,130.053872,105.149558,38626,524.074074,160.097968,155650,1.004028,0.000719,298.196244,0.003181,0.000007,0.000175,0.002373,0.005359,7.077217e-06,0.000296,0.002328,0.000193,0.000154,0.063542,0.000432,0.000205,0.142142,0.000183,0.000097,0.060151,-0.000245,0.000162,-0.080571,129.142857,102.496792,42488,526.638298,154.455791,173264,1.003818,0.000941,330.256001,0.003350,6.269822e-06,0.000174,0.002320,0.005515,7.898478e-06,0.000287,0.002922,0.000193,0.000153,0.071317,0.000434,0.000202,0.160444,0.000186,0.000098,0.068640,-0.000243,0.000159,-0.089920,127.564865,102.627447,47199,529.645946,156.267339,195969,1.003559,0.001155,371.316959,1-32763,0.002856,109,16648,2.935780,0.001448,23.0,2912.0,2.608696,0.001494,32.0,4427.0,2.656250,0.001684,37.0,5279.0,2.837838,0.001803,50.0,7810.0,2.820000,0.001919,57.0,8274.0,2.701754,0.002065,67.0,9427.0,2.626866,0.002478,76.0,10960.0,2.710526,0.002557,87.0,12238.0,2.643678,0.002759,93,13268,2.709677


# トレーニングデータ

In [51]:
train = pd.read_csv(data_dir + 'train.csv')
train_ids = train.stock_id.unique()

In [52]:
%%time
df_train = preprocessor(list_stock_ids=train_ids, is_train=True)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:  7.2min finished


Wall time: 7min 17s


In [53]:
train['row_id'] = train['stock_id'].astype(str) + '-' + train['time_id'].astype(str)
train = train[['row_id', 'target']]
df_train = train.merge(df_train, on=['row_id'], how='left')

In [54]:
df_train.head()

Unnamed: 0,row_id,target,log_return_realized_volatility,log_return_mean,log_return_std,log_return_sum,log_return2_realized_volatility,log_return2_mean,log_return2_std,log_return2_sum,wap_balance_mean,wap_balance_std,wap_balance_sum,price_spread_mean,price_spread_std,price_spread_sum,bid_spread_mean,bid_spread_std,bid_spread_sum,ask_spread_mean,ask_spread_std,ask_spread_sum,volume_imbalance_mean,volume_imbalance_std,volume_imbalance_sum,total_volume_mean,total_volume_std,total_volume_sum,wap_mean,wap_std,wap_sum,log_return_realized_volatility_500,log_return_mean_500,log_return_std_500,log_return_sum_500,log_return2_realized_volatility_500,log_return2_mean_500,log_return2_std_500,log_return2_sum_500,wap_balance_mean_500,wap_balance_std_500,wap_balance_sum_500,price_spread_mean_500,price_spread_std_500,price_spread_sum_500,bid_spread_mean_500,bid_spread_std_500,bid_spread_sum_500,ask_spread_mean_500,ask_spread_std_500,ask_spread_sum_500,volume_imbalance_mean_500,volume_imbalance_std_500,volume_imbalance_sum_500,total_volume_mean_500,total_volume_std_500,total_volume_sum_500,wap_mean_500,wap_std_500,wap_sum_500,log_return_realized_volatility_450,log_return_mean_450,log_return_std_450,log_return_sum_450,log_return2_realized_volatility_450,log_return2_mean_450,log_return2_std_450,log_return2_sum_450,wap_balance_mean_450,wap_balance_std_450,wap_balance_sum_450,price_spread_mean_450,price_spread_std_450,price_spread_sum_450,bid_spread_mean_450,bid_spread_std_450,bid_spread_sum_450,ask_spread_mean_450,ask_spread_std_450,ask_spread_sum_450,volume_imbalance_mean_450,volume_imbalance_std_450,volume_imbalance_sum_450,total_volume_mean_450,total_volume_std_450,total_volume_sum_450,wap_mean_450,wap_std_450,wap_sum_450,log_return_realized_volatility_400,log_return_mean_400,log_return_std_400,log_return_sum_400,log_return2_realized_volatility_400,log_return2_mean_400,log_return2_std_400,log_return2_sum_400,wap_balance_mean_400,wap_balance_std_400,wap_balance_sum_400,price_spread_mean_400,price_spread_std_400,price_spread_sum_400,bid_spread_mean_400,bid_spread_std_400,bid_spread_sum_400,ask_spread_mean_400,ask_spread_std_400,ask_spread_sum_400,volume_imbalance_mean_400,volume_imbalance_std_400,volume_imbalance_sum_400,total_volume_mean_400,total_volume_std_400,total_volume_sum_400,wap_mean_400,wap_std_400,wap_sum_400,log_return_realized_volatility_350,log_return_mean_350,log_return_std_350,log_return_sum_350,log_return2_realized_volatility_350,log_return2_mean_350,log_return2_std_350,log_return2_sum_350,wap_balance_mean_350,wap_balance_std_350,wap_balance_sum_350,price_spread_mean_350,price_spread_std_350,price_spread_sum_350,bid_spread_mean_350,bid_spread_std_350,bid_spread_sum_350,ask_spread_mean_350,ask_spread_std_350,ask_spread_sum_350,volume_imbalance_mean_350,volume_imbalance_std_350,volume_imbalance_sum_350,total_volume_mean_350,total_volume_std_350,total_volume_sum_350,wap_mean_350,wap_std_350,wap_sum_350,log_return_realized_volatility_300,log_return_mean_300,log_return_std_300,...,log_return2_std_250,log_return2_sum_250,wap_balance_mean_250,wap_balance_std_250,wap_balance_sum_250,price_spread_mean_250,price_spread_std_250,price_spread_sum_250,bid_spread_mean_250,bid_spread_std_250,bid_spread_sum_250,ask_spread_mean_250,ask_spread_std_250,ask_spread_sum_250,volume_imbalance_mean_250,volume_imbalance_std_250,volume_imbalance_sum_250,total_volume_mean_250,total_volume_std_250,total_volume_sum_250,wap_mean_250,wap_std_250,wap_sum_250,log_return_realized_volatility_200,log_return_mean_200,log_return_std_200,log_return_sum_200,log_return2_realized_volatility_200,log_return2_mean_200,log_return2_std_200,log_return2_sum_200,wap_balance_mean_200,wap_balance_std_200,wap_balance_sum_200,price_spread_mean_200,price_spread_std_200,price_spread_sum_200,bid_spread_mean_200,bid_spread_std_200,bid_spread_sum_200,ask_spread_mean_200,ask_spread_std_200,ask_spread_sum_200,volume_imbalance_mean_200,volume_imbalance_std_200,volume_imbalance_sum_200,total_volume_mean_200,total_volume_std_200,total_volume_sum_200,wap_mean_200,wap_std_200,wap_sum_200,log_return_realized_volatility_150,log_return_mean_150,log_return_std_150,log_return_sum_150,log_return2_realized_volatility_150,log_return2_mean_150,log_return2_std_150,log_return2_sum_150,wap_balance_mean_150,wap_balance_std_150,wap_balance_sum_150,price_spread_mean_150,price_spread_std_150,price_spread_sum_150,bid_spread_mean_150,bid_spread_std_150,bid_spread_sum_150,ask_spread_mean_150,ask_spread_std_150,ask_spread_sum_150,volume_imbalance_mean_150,volume_imbalance_std_150,volume_imbalance_sum_150,total_volume_mean_150,total_volume_std_150,total_volume_sum_150,wap_mean_150,wap_std_150,wap_sum_150,log_return_realized_volatility_100,log_return_mean_100,log_return_std_100,log_return_sum_100,log_return2_realized_volatility_100,log_return2_mean_100,log_return2_std_100,log_return2_sum_100,wap_balance_mean_100,wap_balance_std_100,wap_balance_sum_100,price_spread_mean_100,price_spread_std_100,price_spread_sum_100,bid_spread_mean_100,bid_spread_std_100,bid_spread_sum_100,ask_spread_mean_100,ask_spread_std_100,ask_spread_sum_100,volume_imbalance_mean_100,volume_imbalance_std_100,volume_imbalance_sum_100,total_volume_mean_100,total_volume_std_100,total_volume_sum_100,wap_mean_100,wap_std_100,wap_sum_100,trade_log_return_realized_volatility,trade_seconds_in_bucket_count_unique,trade_size_sum,trade_order_count_mean,trade_log_return_realized_volatility_500,trade_seconds_in_bucket_count_unique_500,trade_size_sum_500,trade_order_count_mean_500,trade_log_return_realized_volatility_450,trade_seconds_in_bucket_count_unique_450,trade_size_sum_450,trade_order_count_mean_450,trade_log_return_realized_volatility_400,trade_seconds_in_bucket_count_unique_400,trade_size_sum_400,trade_order_count_mean_400,trade_log_return_realized_volatility_350,trade_seconds_in_bucket_count_unique_350,trade_size_sum_350,trade_order_count_mean_350,trade_log_return_realized_volatility_300,trade_seconds_in_bucket_count_unique_300,trade_size_sum_300,trade_order_count_mean_300,trade_log_return_realized_volatility_250,trade_seconds_in_bucket_count_unique_250,trade_size_sum_250,trade_order_count_mean_250,trade_log_return_realized_volatility_200,trade_seconds_in_bucket_count_unique_200,trade_size_sum_200,trade_order_count_mean_200,trade_log_return_realized_volatility_150,trade_seconds_in_bucket_count_unique_150,trade_size_sum_150,trade_order_count_mean_150,trade_log_return_realized_volatility_100,trade_seconds_in_bucket_count_unique_100,trade_size_sum_100,trade_order_count_mean_100
0,0-5,0.004136,0.004499,7.613599e-06,0.00026,0.002292,0.006999,8e-06,0.000404,0.002325,0.000388,0.000295,0.117051,0.000852,0.000211,0.257255,0.000176,0.000162,0.053006,-0.000151,0.000126,-0.045557,134.89404,107.260583,40738,323.496689,138.101214,97696,1.003725,0.000693,303.125061,0.001459,-5.25394e-06,0.000208,-0.000263,0.003018,-5e-06,0.000431,-0.000272,0.000355,0.000247,0.017729,0.000781,0.000179,0.039065,0.000258,0.000158,0.012877,-0.000176,0.000131,-0.008792,144.82,71.123833,7241.0,249.7,100.559709,12485.0,1.003369,0.000546,50.168442,0.001721,-5e-06,0.00021,-0.000361,0.004114,1e-06,0.000503,6.8e-05,0.000366,0.000277,0.024868,0.000783,0.000181,0.053236,0.000262,0.000178,0.01779,-0.000166,0.000126,-0.011274,141.470588,84.467864,9620,263.941176,116.940077,17948,1.003482,0.000514,68.236749,0.0023,2.110692e-06,0.000234,0.000207,0.004589,-8e-06,0.000466,-0.00075,0.00039,0.000291,0.038194,0.000783,0.000181,0.076778,0.000214,0.000168,0.020996,-0.000191,0.00014,-0.018721,124.326531,82.090066,12184,262.489796,118.188932,25724,1.003633,0.000497,98.356007,0.002459,-2.722273e-07,0.000228,-3.2e-05,0.0047,2e-06,0.000436,0.000268,0.00039,0.000283,0.045605,0.000761,0.000196,0.089035,0.000237,0.000177,0.027719,-0.000179,0.000136,-0.020997,130.82906,88.045275,15307,266.538462,117.329887,31185,1.00371,0.000504,117.434121,0.002953,1.131529e-06,0.000251,...,0.000416,-0.000327,0.00037,0.000271,0.059532,0.000844,0.000241,0.135856,0.000207,0.000171,0.033251,-0.000166,0.000129,-0.026736,146.745342,110.699327,23626,298.236025,134.93737,48016,1.003762,0.000464,161.605654,0.003402,-1e-06,0.000246,-0.000269,0.005802,1e-06,0.000419,0.00026,0.000379,0.000278,0.073062,0.000865,0.000238,0.167016,0.000205,0.000174,0.03956,-0.000155,0.000124,-0.029837,134.772021,106.8663,26011,321.455959,151.899636,62041,1.003836,0.000466,193.740261,0.003796,1e-06,0.00025,0.000276,0.006087,1.295471e-08,0.0004,3e-06,0.000397,0.000281,0.091997,0.000858,0.000221,0.199058,0.000188,0.000165,0.043697,-0.000147,0.00012,-0.034024,123.586207,103.533216,28672,327.431034,142.761068,75964,1.003832,0.000445,232.888919,0.004041,-2e-06,0.00025,-0.000396,0.006566,-3.291241e-07,0.000406,-8.6e-05,0.000404,0.000289,0.105931,0.000848,0.000214,0.222287,0.000184,0.000164,0.048196,-0.000151,0.000127,-0.039558,130.854962,107.857691,34284,332.167939,141.27019,87028,1.003875,0.000453,263.01517,0.002006,40.0,3179.0,2.75,0.000959,10.0,737.0,2.6,0.00106,14.0,1042.0,2.642857,0.001121,16.0,1045.0,2.4375,0.001271,20.0,1584.0,2.6,0.001308,21.0,1587.0,2.571429,0.001372,25.0,1796.0,2.4,0.001666,27.0,1901.0,2.555556,0.001701,30.0,2069.0,2.433333,0.001852,34.0,2411.0,2.411765
1,0-11,0.001445,0.001204,1.810239e-06,8.6e-05,0.00036,0.002476,4e-06,0.000176,0.000801,0.000212,0.000155,0.042312,0.000394,0.000157,0.078836,0.000142,0.000148,0.028358,-0.000135,6.5e-05,-0.027001,142.05,102.139758,28410,411.45,172.263581,82290,1.000239,0.000262,200.047768,0.000857,-3.608732e-07,0.000143,-1.3e-05,0.001435,1.6e-05,0.000239,0.000575,0.000277,0.000186,0.010265,0.000339,0.000163,0.01254,0.000296,0.000255,0.010942,-0.000148,6.3e-05,-0.00547,109.702703,97.631639,4059.0,425.810811,180.407846,15755.0,1.00061,0.00023,37.022563,0.000918,-1e-06,0.000126,-5.9e-05,0.001883,9e-06,0.000258,0.000488,0.000269,0.000175,0.014524,0.000348,0.000144,0.018812,0.000233,0.000239,0.012598,-0.000143,6.6e-05,-0.007729,97.685185,88.144569,5275,447.981481,177.264272,24191,1.000518,0.000235,54.027991,0.000934,8.459831e-07,0.00011,6.2e-05,0.001907,3e-06,0.000225,0.000237,0.000261,0.00017,0.019041,0.000367,0.000129,0.02679,0.000186,0.00022,0.013552,-0.000133,6.3e-05,-0.009736,96.136986,79.708203,7018,480.0,167.075582,35040,1.00048,0.000217,73.035016,0.000976,6.782933e-07,9.7e-05,6.9e-05,0.001981,1e-06,0.000197,0.000136,0.000251,0.000161,0.025584,0.000366,0.000122,0.037378,0.000167,0.000191,0.017015,-0.000125,6e-05,-0.012748,129.77451,111.295068,13237,492.970588,173.353382,50283,1.000419,0.00021,102.042731,0.000981,8.383753e-07,9.2e-05,...,0.000177,0.000661,0.000213,0.000164,0.028342,0.00034,0.000121,0.045255,0.000149,0.000172,0.019776,-0.000129,5.8e-05,-0.017164,135.090226,104.794348,17967,445.030075,189.128021,59189,1.000364,0.000211,133.04845,0.001014,2e-06,8.3e-05,0.000331,0.002105,6e-06,0.000171,0.00095,0.00021,0.000165,0.031864,0.000348,0.000115,0.052886,0.000139,0.000163,0.021131,-0.000123,5.9e-05,-0.01877,151.407895,108.481656,23014,438.921053,178.623008,66716,1.000332,0.000215,152.050502,0.001058,2e-06,8.1e-05,0.000298,0.002262,5.044579e-06,0.000172,0.000873,0.000205,0.000158,0.035454,0.000353,0.000112,0.061017,0.000141,0.000154,0.024394,-0.000127,5.8e-05,-0.022032,151.566474,104.576846,26221,419.277457,178.652395,72535,1.000301,0.000221,173.052001,0.00114,3e-06,8.4e-05,0.000561,0.002432,3.798911e-06,0.000178,0.00071,0.000213,0.000157,0.0398,0.000378,0.000143,0.070603,0.000146,0.000151,0.027304,-0.000129,5.6e-05,-0.02414,146.914439,102.961696,27473,420.112299,173.976587,78561,1.000264,0.000251,187.049275,0.000901,30.0,1289.0,1.9,0.000451,7.0,546.0,2.0,0.000501,10.0,828.0,2.2,0.00051,11.0,829.0,2.090909,0.000557,14.0,873.0,2.142857,0.000587,16.0,900.0,2.25,0.000755,20.0,1119.0,2.1,0.000802,22.0,1124.0,2.045455,0.000813,24.0,1173.0,2.041667,0.000819,25.0,1174.0,2.0
2,0-16,0.002168,0.002369,-1.109201e-05,0.000173,-0.002074,0.004801,-8e-06,0.000352,-0.001493,0.000331,0.000246,0.062228,0.000725,0.000164,0.13633,0.000197,0.00017,0.036955,-0.000198,0.000171,-0.037243,141.414894,108.891243,26586,416.351064,138.433034,78274,0.999542,0.000864,187.913849,0.00064,-4.641993e-05,0.000143,-0.000882,0.002509,-2.6e-05,0.000591,-0.000495,0.000403,0.000258,0.007657,0.000553,9.2e-05,0.010505,0.000335,0.000264,0.006367,-0.000247,0.000125,-0.004691,162.105263,131.846668,3080.0,450.421053,130.22814,8558.0,0.997783,0.000365,18.957882,0.001158,-3.3e-05,0.000173,-0.001469,0.002972,-4.2e-05,0.000451,-0.001831,0.000365,0.000282,0.016055,0.000605,0.000105,0.026608,0.000186,0.000217,0.008186,-0.000208,0.000168,-0.009143,156.113636,102.02467,6869,459.113636,116.212559,20201,0.998237,0.000541,43.922425,0.001179,-3.3267e-05,0.000163,-0.001697,0.003034,-4.6e-05,0.000427,-0.002332,0.000411,0.000299,0.020971,0.000625,0.00012,0.031877,0.000167,0.000207,0.008521,-0.000204,0.000164,-0.010387,152.509804,100.093231,7778,454.0,115.120632,23154,0.998356,0.000586,50.916172,0.001284,-3.679629e-05,0.000159,-0.002318,0.003195,-2.5e-05,0.000405,-0.001597,0.000418,0.0003,0.026317,0.000672,0.000155,0.042318,0.000145,0.000192,0.009143,-0.000223,0.000173,-0.014073,149.507937,102.797777,9419,468.015873,114.869981,29485,0.998576,0.0007,62.910286,0.001295,-3.81056e-05,0.000153,...,0.000403,-0.002286,0.000418,0.000279,0.034313,0.000713,0.000174,0.058453,0.000141,0.000172,0.011585,-0.000268,0.000211,-0.021971,129.231707,100.552707,10597,426.47561,135.297042,34971,0.998841,0.000803,81.904967,0.00194,-2.2e-05,0.000193,-0.00224,0.0039,-2.4e-05,0.000389,-0.002434,0.000396,0.000286,0.039989,0.000683,0.000174,0.068989,0.000146,0.000158,0.014792,-0.000263,0.000201,-0.026519,143.514851,116.258558,14495,440.544554,138.584092,44495,0.998944,0.000757,100.893294,0.002138,-2.4e-05,0.000195,-0.002854,0.004019,-2.50905e-05,0.000369,-0.002986,0.000373,0.000276,0.044347,0.000679,0.000163,0.080811,0.000161,0.000155,0.0191,-0.000241,0.000195,-0.028626,132.084034,114.924631,15718,428.537815,135.376048,50996,0.999126,0.000829,118.896016,0.002205,-1.9e-05,0.000183,-0.002715,0.004106,-1.841958e-05,0.000342,-0.002671,0.000345,0.00026,0.050096,0.00068,0.000149,0.098615,0.000162,0.000154,0.023552,-0.000212,0.000187,-0.030732,133.337931,106.949574,19334,400.62069,140.906641,58090,0.999359,0.000902,144.90703,0.001961,25.0,2161.0,2.72,0.000723,4.0,661.0,3.5,0.001048,9.0,1085.0,3.666667,0.001048,10.0,1087.0,3.4,0.001137,12.0,1189.0,3.166667,0.001137,12.0,1189.0,3.166667,0.001515,15.0,1482.0,3.0,0.001575,18.0,1691.0,2.833333,0.001621,20.0,2010.0,2.95,0.001875,23.0,2032.0,2.73913
3,0-31,0.002195,0.002574,-2.376661e-05,0.000236,-0.002828,0.003637,-1.7e-05,0.000334,-0.002053,0.00038,0.000248,0.045611,0.00086,0.00028,0.103252,0.00019,0.000199,0.022764,-0.000108,9.1e-05,-0.013001,146.216667,121.533215,17546,435.266667,156.120334,52232,0.998832,0.000757,119.859781,0.000987,-5.288889e-05,0.000292,-0.000635,0.00136,-7.1e-05,0.000403,-0.000857,0.000202,9.1e-05,0.002419,0.001062,9e-05,0.012745,0.000131,4.8e-05,0.001573,-5e-05,1.3e-05,-0.000601,178.333333,112.091954,2140.0,539.166667,176.489934,6470.0,0.998074,0.000534,11.976885,0.000993,-2.9e-05,0.000239,-0.000526,0.001424,-4.9e-05,0.000342,-0.000882,0.000358,0.000253,0.006441,0.001058,7.4e-05,0.019047,0.000116,4.6e-05,0.002082,-4.9e-05,1.1e-05,-0.000879,146.0,106.693624,2628,540.0,153.413704,9720,0.998079,0.00043,17.965415,0.001003,-3.119674e-05,0.000211,-0.000718,0.001513,-2.5e-05,0.000322,-0.000574,0.00035,0.00025,0.00806,0.00105,6.7e-05,0.024143,0.000155,0.000147,0.003563,-4.8e-05,1e-05,-0.001111,153.826087,95.507569,3538,498.956522,156.965682,11476,0.998079,0.000379,22.955812,0.001776,-2.638866e-05,0.000269,-0.001161,0.002652,-9e-06,0.000404,-0.000377,0.000293,0.000225,0.012904,0.000881,0.000281,0.038784,0.000177,0.000176,0.007773,-7.9e-05,6.4e-05,-0.00347,161.340909,102.690763,7099,437.386364,151.999574,19245,0.998391,0.000543,43.929225,0.001776,-2.224226e-05,0.000245,...,0.000365,-0.000574,0.000353,0.000228,0.022211,0.000824,0.00026,0.051898,0.000141,0.000157,0.008884,-0.000109,8.9e-05,-0.006848,133.079365,99.939224,8384,415.269841,141.298039,26162,0.998474,0.000472,62.903835,0.001855,-1.3e-05,0.000225,-0.000866,0.00288,-7e-06,0.000349,-0.000456,0.000339,0.000225,0.023407,0.000856,0.000269,0.059033,0.000133,0.000152,0.009162,-0.000114,8.8e-05,-0.007865,137.217391,101.947632,9468,424.782609,140.057013,29310,0.998472,0.000452,68.894597,0.002196,-1.6e-05,0.000245,-0.00129,0.003273,-1.372564e-05,0.000366,-0.001112,0.000362,0.000247,0.029323,0.00092,0.000296,0.074552,0.00017,0.000191,0.013789,-0.000108,8.5e-05,-0.008745,151.765432,124.293028,12293,424.234568,156.628404,34363,0.998464,0.000432,80.875601,0.002552,-2.5e-05,0.000251,-0.002525,0.00358,-2.040528e-05,0.000354,-0.002102,0.000363,0.000258,0.037411,0.0009,0.000283,0.092658,0.000213,0.000206,0.021932,-0.000114,9.7e-05,-0.011706,155.038835,124.004263,15969,434.048544,158.774929,44707,0.998587,0.000491,102.854482,0.001561,15.0,1962.0,3.933333,0.000327,2.0,509.0,5.0,0.000802,3.0,514.0,3.666667,0.000802,3.0,514.0,3.666667,0.00105,8.0,1301.0,3.875,0.001089,9.0,1556.0,5.111111,0.00109,10.0,1561.0,4.7,0.00109,10.0,1561.0,4.7,0.001401,11.0,1631.0,4.545455,0.001561,13.0,1933.0,4.384615
4,0-62,0.001747,0.001894,-1.057099e-08,0.000144,-2e-06,0.003257,-2e-06,0.000247,-0.000281,0.000254,0.000188,0.044783,0.000397,0.00013,0.069901,0.000191,8.3e-05,0.033565,-0.000109,7.6e-05,-0.019206,123.846591,102.407501,21797,343.221591,158.054066,60407,0.999619,0.000258,175.932865,0.001124,3.369865e-05,0.000279,0.000573,0.00078,-1e-05,0.000195,-0.000173,0.00036,0.000175,0.006128,0.000414,7.2e-05,0.007042,0.000195,1.8e-05,0.00331,-0.00014,6.6e-05,-0.002377,108.882353,112.990532,1851.0,349.352941,147.725227,5939.0,0.999454,0.000307,16.990718,0.001378,1.1e-05,0.000233,0.000397,0.000966,-8e-06,0.000163,-0.000298,0.000364,0.000203,0.013087,0.000519,0.000138,0.0187,0.000196,2.4e-05,0.00704,-0.000136,6.6e-05,-0.004895,117.0,99.328028,4212,391.944444,123.180227,14110,0.999518,0.000257,35.982653,0.001435,1.43102e-05,0.000195,0.000787,0.001516,8e-06,0.000206,0.000433,0.000298,0.000208,0.01641,0.000469,0.000143,0.025789,0.000209,7.1e-05,0.011515,-0.000126,8.3e-05,-0.006946,85.618182,93.493413,4709,339.945455,129.29659,18697,0.999473,0.000243,54.971001,0.001512,5.518365e-06,0.000178,0.000403,0.001832,-3e-06,0.000216,-0.000239,0.000247,0.000204,0.018038,0.000435,0.00015,0.031759,0.000204,6.2e-05,0.014872,-0.000123,7.7e-05,-0.008997,103.356164,97.63742,7545,373.465753,152.658759,27263,0.999463,0.000216,72.960818,0.00152,7.24993e-06,0.000162,...,0.000244,-0.000279,0.000252,0.000188,0.02492,0.000425,0.000142,0.04211,0.000184,7.6e-05,0.018182,-0.000116,7.4e-05,-0.011514,106.767677,97.226417,10570,407.212121,163.30454,40314,0.999513,0.000218,98.951821,0.001571,2e-06,0.000148,0.000274,0.002461,-3e-06,0.000231,-0.00031,0.00023,0.000186,0.026236,0.000414,0.000137,0.047192,0.000174,7.6e-05,0.019859,-0.000108,7.3e-05,-0.01226,116.95614,102.595726,13333,388.394737,164.649352,44277,0.999575,0.000259,113.951545,0.001609,4e-06,0.000139,0.000491,0.002927,2.213193e-06,0.000253,0.000299,0.000242,0.000193,0.032718,0.000395,0.000137,0.053347,0.000187,8.8e-05,0.02522,-0.000117,8e-05,-0.015757,131.474074,109.275622,17749,371.266667,162.610706,50121,0.999618,0.000259,134.948413,0.001617,4e-06,0.000132,0.000533,0.002975,4.03335e-06,0.000244,0.000605,0.000248,0.000187,0.03721,0.000389,0.000133,0.058384,0.000185,8.6e-05,0.027691,-0.000112,7.9e-05,-0.016736,126.686667,105.347118,19003,364.1,156.327343,54615,0.999611,0.000246,149.941716,0.000871,22.0,1791.0,4.045455,0.000348,3.0,40.0,4.0,0.00036,4.0,43.0,3.5,0.000395,6.0,162.0,3.666667,0.000452,10.0,1216.0,5.2,0.000453,11.0,1219.0,4.909091,0.000493,12.0,1451.0,4.916667,0.000498,14.0,1458.0,4.428571,0.00055,16.0,1570.0,4.5,0.000551,18.0,1574.0,4.166667


# テストデータ

In [55]:
test = pd.read_csv(data_dir + 'test.csv')
test_ids = test.stock_id.unique()

In [56]:
%%time
df_test = preprocessor(list_stock_ids= test_ids, is_train = False)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Wall time: 251 ms


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.1s finished


In [57]:
df_test = test.merge(df_test, on = ['row_id'], how = 'left')

# ターゲットエンコーディング

In [58]:
from sklearn.model_selection import KFold

df_train['stock_id'] = df_train['row_id'].apply(lambda x:x.split('-')[0])
df_test['stock_id'] = df_test['row_id'].apply(lambda x:x.split('-')[0])

stock_id_target_mean = df_train.groupby('stock_id')['target'].mean()
df_test['stock_id_target_enc'] = df_test['stock_id'].map(stock_id_target_mean)

tmp = np.repeat(np.nan, df_train.shape[0])
kf = KFold(n_splits=10, shuffle=True, random_state=55)
for idx_1, idx_2 in kf.split(df_train):
    target_mean = df_train.iloc[idx_1].groupby('stock_id')['target'].mean()
    
    tmp[idx_2] = df_train['stock_id'].iloc[idx_2].map(target_mean)
df_train['stock_id_target_enc'] = tmp

ModuleNotFoundError: No module named 'sklearn.utils'

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
# 重要度の表示フラグ
# 今回のコンペではテストデータが隠されているため、手元で動かすときのみ表示
DO_FEAT_IMP = False
if len(df_test)==3:
    DO_FEAT_IMP = True

# LightGBM

In [None]:
import lightgbm as lgbm

In [None]:
# ref https://www.kaggle.com/corochann/permutation-importance-for-feature-selection-part1
def calc_model_importance(model, feature_names=None, importance_type='gain'):
    importance_df = pd.DataFrame(
        model.feature_importance(importance_type=importance_type),
        index=feature_names,
        columns=['importance']
    ).sort_values('importance')
    
    return importance_df

def plot_importance(importance_df, title='', save_filepath=None, figsize=(8, 12)):
    fig, ax = plt.subplots(figsize=figsize)
    importance_df.plot.barh(ax=ax)
    if title:
        plt.title(title)
    plt.tight_layout()
    if save_filepath is None:
        plt.show()
    else:
        plt.savefig(save_filepath)
    plt.close()

In [None]:
df_train['stock_id'] = df_train['stock_id'].astype(int)
df_test['stock_id'] = df_test['stock_id'].astype(int)

In [None]:
X = df_train.drop(['row_id','target'],axis=1)
y = df_train['target']

In [None]:
def rmspe(y_true, y_pred):
    return  (np.sqrt(np.mean(np.square((y_true - y_pred) / y_true))))

def feval_RMSPE(preds, lgbm_train):
    labels = lgbm_train.get_label()
    return 'RMSPE', round(rmspe(y_true = labels, y_pred = preds), 5), False

params = {
      "objective": "rmse", 
      "metric": "rmse", 
      "boosting_type": "gbdt",
      'early_stopping_rounds': 30,
      'learning_rate': 0.01,
      'lambda_l1': 1,
      'lambda_l2': 1,
      'feature_fraction': 0.8,
      'bagging_fraction': 0.8,
  }

In [None]:
kf = KFold(n_splits=5, random_state=55, shuffle=True)
oof = pd.DataFrame()
models = []
scores = 0.0

gain_importance_list = []
split_importance_list = []

In [None]:
%%time
for fold, (trn_idx, val_idx) in enumerate(kf.split(X, y)):
    print(f'Fold: {fold+1}')
    X_train, y_train = X.loc[trn_idx], y.loc[trn_idx]
    X_valid, y_valid = X.loc[val_idx], y.loc[val_idx]
    
    # RMSPEで最適化を行うため? RMSEの損失関数に1/yi^2の重み付けをすればOK
    # https://www.kaggle.com/c/optiver-realized-volatility-prediction/discussion/250324
    weights = 1/np.square(y_train)
    lgbm_train = lgbm.Dataset(X_train, y_train, weight=weights)
    
    weights = 1/np.square(y_valid)
    lgbm_valid = lgbm.Dataset(X_valid, y_valid, reference=lgbm_train, weight=weights)
    
    model = lgbm.train(params=params,
                  train_set=lgbm_train,
                  valid_sets=[lgbm_train, lgbm_valid],
                  num_boost_round=5000,         
                  feval=feval_RMSPE,
                  verbose_eval=100,
                  categorical_feature = ['stock_id']                
                 )
    
    y_pred = model.predict(X_valid, num_iteration=model.best_iteration)

    RMSPE = round(rmspe(y_true = y_valid, y_pred = y_pred),3)
    print(f'Performance of the　prediction: , RMSPE: {RMSPE}')

    #keep scores and models
    scores += RMSPE / 5
    models.append(model)
    print("*" * 100)
    
    # --- calc model feature importance ---
    if DO_FEAT_IMP:    
        feature_names = X_train.columns.values.tolist()
        gain_importance_df = calc_model_importance(
            model, feature_names=feature_names, importance_type='gain')
        gain_importance_list.append(gain_importance_df)

        split_importance_df = calc_model_importance(
            model, feature_names=feature_names, importance_type='split')
        split_importance_list.append(split_importance_df)

In [None]:
scores

In [None]:
def calc_mean_importance(importance_df_list):
    mean_importance = np.mean(np.array([df['importance'].values for df in importance_df_list]), axis=0)
    mean_df = importance_df_list[0].copy()
    mean_df['importance'] = mean_importance
    return mean_df

In [None]:
if DO_FEAT_IMP:
    mean_gain_df = calc_mean_importance(gain_importance_list)
    plot_importance(mean_gain_df, title='Model feature importance by gain')
    mean_gain_df = mean_gain_df.reset_index().rename(columns={'index': 'feature_names'})
    mean_gain_df.to_csv('gain_importance_mean.csv', index=False)

In [None]:
if DO_FEAT_IMP:
    mean_split_df = calc_mean_importance(split_importance_list)
    plot_importance(mean_split_df, title='Model feature importance by split')
    mean_split_df = mean_split_df.reset_index().rename(columns={'index': 'feature_names'})
    mean_split_df.to_csv('split_importance_mean.csv', index=False)

In [None]:
df_test.columns

In [None]:
df_train.columns

In [None]:
y_pred = df_test[['row_id']]
X_test = df_test.drop(['time_id', 'row_id'], axis=1)
X_test

In [None]:
target = np.zeros(len(X_test))
for model in models:
    pred = model.predict(X_test[X_valid.columns], num_iteration=model.best_iteration)
    target += pred / len(models)
    
y_pred = y_pred.assign(target=target)
y_pred

In [None]:
y_pred.to_csv('submission.csv',index = False)