In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')


  from pandas.core import (


In [2]:
def log_return(list_stock_prices):
    return np.log(list_stock_prices).diff()
def realized_volatility(series):
    return np.sqrt(np.sum(series**2))
def flatten_name(prefix, src_names):
    ret = []
    for c in src_names:
        if c[0] in ['time_id', 'stock_id']:
            ret.append(c[0])
        else:
            ret.append('.'.join([prefix] + list(c)))
    return ret

In [15]:
def preprocessor_book(stock_id):
        
        book_dtypes = {
            'stock_id':np.uint16, 
            'time_id': np.uint16,
            'seconds_in_bucket': np.uint16,
            'bid_price1': np.float32,
            'ask_price1': np.float32,
            'bid_price2': np.float32,
            'ask_price2': np.float32,
            'bid_size1': np.uint32,
            'ask_size1': np.uint32,
            'bid_size2': np.uint32,
            'ask_size2': np.uint32,
        }

        dir_path=f"individual_book_train/stock_{stock_id}.csv"
        data=pd.read_csv(dir_path)
        for column, dtype in book_dtypes.items():
            data[column] = data[column].astype(dtype)
        

        return data

In [25]:
def feature_generator(stock_id,forecast_period=60):
    
    def difference(series):
        return np.max(series)-np.min(series)
    
    data=preprocessor_book(stock_id=stock_id)
    data['wap1'] =(data['bid_price1'] * data['ask_size1'] + data['ask_price1'] * data['bid_size1'])/(data['bid_size1'] + data['ask_size1'])
    data['log_return1'] = data.groupby('time_id')['wap1'].transform(log_return)
    data['wap2'] = (data['bid_price2'] * data['ask_size2'] + data['ask_price2'] * data['bid_size2'])/(data['bid_size2'] + data['ask_size2'])
    data['log_return2'] = data.groupby('time_id')['wap2'].transform(log_return)

    data['wap_balance'] = abs(data['wap1'] - data['wap2'])
    data["BidAskSpread"]=(data['ask_price1'] / data['bid_price1'])-1
    
    data['bid_spread'] = data['bid_price1'] - data['bid_price2']
    data['ask_spread'] = data['ask_price1'] - data['ask_price2']
    data['total_volume'] = (data['ask_size1'] + data['ask_size2']) + (data['bid_size1'] + data['bid_size2'])
    data['volume_imbalance'] = abs((data['ask_size1'] + data['ask_size2']) - (data['bid_size1'] + data['bid_size2']))
    data['log_return_ask1'] = data.groupby(['time_id'])['ask_price1'].transform(log_return)
    data['log_return_ask2'] = data.groupby(['time_id'])['ask_price2'].transform(log_return)
    data['log_return_bid1'] = data.groupby(['time_id'])['bid_price1'].transform(log_return)
    data['log_return_bid2'] = data.groupby(['time_id'])['bid_price2'].transform(log_return)

    data_test=data[data["seconds_in_bucket"]>=600-forecast_period]
    data_train=data[data["seconds_in_bucket"]<600-forecast_period]
    #dict for aggregate
    create_feature_dict = {
    'seconds_in_bucket': ['count'],
    'wap1': [np.nanmean, np.nanstd,difference],
    'wap2': [np.nanmean, np.nanstd,difference],
    'log_return1': [np.nansum, realized_volatility],
    'log_return2': [np.nansum, realized_volatility],
    'log_return_ask1': [np.nansum, realized_volatility],
    'log_return_ask2': [np.nansum, realized_volatility],
    'log_return_bid1': [np.nansum, realized_volatility],
    'log_return_bid2': [np.nansum, realized_volatility],
    'wap_balance': [np.nanmean, np.nanstd],
    'BidAskSpread': [np.nanmax, np.nanmean,difference],
    'bid_spread': [np.nanmax, np.nanmean,difference],
    'ask_spread': [np.nanmax, np.nanmean,difference],
    'total_volume': [np.nansum, np.nanmean,difference],
    'volume_imbalance': [np.nansum, np.nanmean,np.nanmax]
}


    df_feature = pd.DataFrame(data_train.groupby(['time_id'],sort=False).agg(create_feature_dict)).reset_index()
    df_feature.columns = flatten_name('book', df_feature.columns)
    df_feature["missing_values"]=pd.DataFrame(data_train.groupby(['time_id'],sort=False).apply(lambda x: 540-len(x))).reset_index().iloc[:,1]
    for start_time in [250,350,450]:
        d = data_train[(data_train['seconds_in_bucket'] > start_time)].groupby('time_id').agg(create_feature_dict).reset_index(drop=False)
        d.columns = flatten_name(f'book_{start_time/60}to_9', d.columns)
        df_feature = pd.merge(df_feature, d, on='time_id', how='left')
    create_target_dict = {
        'log_return1':[realized_volatility],
            }
    target = pd.DataFrame(data_test.dropna(subset=["log_return1"]).groupby(['time_id'],sort=False).agg(create_target_dict)).reset_index()
    target.columns = flatten_name('book', target.columns)
    df_feature = pd.merge(df_feature,target,how='left',left_on='time_id',right_on=f'time_id')
    df_feature.rename(columns={'book.log_return1.realized_volatility_y': 'target'}, inplace=True)
    df_feature["stock_id"]=stock_id
    return df_feature

In [26]:
df_feat=feature_generator(1)
df_feat

Unnamed: 0,time_id,book.seconds_in_bucket.count,book.wap1.nanmean,book.wap1.nanstd,book.wap1.difference,book.wap2.nanmean,book.wap2.nanstd,book.wap2.difference,book.log_return1.nansum,book.log_return1.realized_volatility_x,book.log_return2.nansum,book.log_return2.realized_volatility,book.log_return_ask1.nansum,book.log_return_ask1.realized_volatility,book.log_return_ask2.nansum,book.log_return_ask2.realized_volatility,book.log_return_bid1.nansum,book.log_return_bid1.realized_volatility,book.log_return_bid2.nansum,book.log_return_bid2.realized_volatility,book.wap_balance.nanmean,book.wap_balance.nanstd,book.BidAskSpread.nanmax,book.BidAskSpread.nanmean,book.BidAskSpread.difference,book.bid_spread.nanmax,book.bid_spread.nanmean,book.bid_spread.difference,book.ask_spread.nanmax,book.ask_spread.nanmean,book.ask_spread.difference,book.total_volume.nansum,book.total_volume.nanmean,book.total_volume.difference,book.volume_imbalance.nansum,book.volume_imbalance.nanmean,book.volume_imbalance.nanmax,missing_values,book_4.166666666666667to_9.seconds_in_bucket.count,book_4.166666666666667to_9.wap1.nanmean,book_4.166666666666667to_9.wap1.nanstd,book_4.166666666666667to_9.wap1.difference,book_4.166666666666667to_9.wap2.nanmean,book_4.166666666666667to_9.wap2.nanstd,book_4.166666666666667to_9.wap2.difference,book_4.166666666666667to_9.log_return1.nansum,book_4.166666666666667to_9.log_return1.realized_volatility,book_4.166666666666667to_9.log_return2.nansum,book_4.166666666666667to_9.log_return2.realized_volatility,book_4.166666666666667to_9.log_return_ask1.nansum,book_4.166666666666667to_9.log_return_ask1.realized_volatility,book_4.166666666666667to_9.log_return_ask2.nansum,book_4.166666666666667to_9.log_return_ask2.realized_volatility,book_4.166666666666667to_9.log_return_bid1.nansum,book_4.166666666666667to_9.log_return_bid1.realized_volatility,book_4.166666666666667to_9.log_return_bid2.nansum,book_4.166666666666667to_9.log_return_bid2.realized_volatility,book_4.166666666666667to_9.wap_balance.nanmean,book_4.166666666666667to_9.wap_balance.nanstd,book_4.166666666666667to_9.BidAskSpread.nanmax,book_4.166666666666667to_9.BidAskSpread.nanmean,book_4.166666666666667to_9.BidAskSpread.difference,book_4.166666666666667to_9.bid_spread.nanmax,book_4.166666666666667to_9.bid_spread.nanmean,book_4.166666666666667to_9.bid_spread.difference,book_4.166666666666667to_9.ask_spread.nanmax,book_4.166666666666667to_9.ask_spread.nanmean,book_4.166666666666667to_9.ask_spread.difference,book_4.166666666666667to_9.total_volume.nansum,book_4.166666666666667to_9.total_volume.nanmean,book_4.166666666666667to_9.total_volume.difference,book_4.166666666666667to_9.volume_imbalance.nansum,book_4.166666666666667to_9.volume_imbalance.nanmean,book_4.166666666666667to_9.volume_imbalance.nanmax,book_5.833333333333333to_9.seconds_in_bucket.count,book_5.833333333333333to_9.wap1.nanmean,book_5.833333333333333to_9.wap1.nanstd,book_5.833333333333333to_9.wap1.difference,book_5.833333333333333to_9.wap2.nanmean,book_5.833333333333333to_9.wap2.nanstd,book_5.833333333333333to_9.wap2.difference,book_5.833333333333333to_9.log_return1.nansum,book_5.833333333333333to_9.log_return1.realized_volatility,book_5.833333333333333to_9.log_return2.nansum,book_5.833333333333333to_9.log_return2.realized_volatility,book_5.833333333333333to_9.log_return_ask1.nansum,book_5.833333333333333to_9.log_return_ask1.realized_volatility,book_5.833333333333333to_9.log_return_ask2.nansum,book_5.833333333333333to_9.log_return_ask2.realized_volatility,book_5.833333333333333to_9.log_return_bid1.nansum,book_5.833333333333333to_9.log_return_bid1.realized_volatility,book_5.833333333333333to_9.log_return_bid2.nansum,book_5.833333333333333to_9.log_return_bid2.realized_volatility,book_5.833333333333333to_9.wap_balance.nanmean,book_5.833333333333333to_9.wap_balance.nanstd,book_5.833333333333333to_9.BidAskSpread.nanmax,book_5.833333333333333to_9.BidAskSpread.nanmean,book_5.833333333333333to_9.BidAskSpread.difference,book_5.833333333333333to_9.bid_spread.nanmax,book_5.833333333333333to_9.bid_spread.nanmean,book_5.833333333333333to_9.bid_spread.difference,book_5.833333333333333to_9.ask_spread.nanmax,book_5.833333333333333to_9.ask_spread.nanmean,book_5.833333333333333to_9.ask_spread.difference,book_5.833333333333333to_9.total_volume.nansum,book_5.833333333333333to_9.total_volume.nanmean,book_5.833333333333333to_9.total_volume.difference,book_5.833333333333333to_9.volume_imbalance.nansum,book_5.833333333333333to_9.volume_imbalance.nanmean,book_5.833333333333333to_9.volume_imbalance.nanmax,book_7.5to_9.seconds_in_bucket.count,book_7.5to_9.wap1.nanmean,book_7.5to_9.wap1.nanstd,book_7.5to_9.wap1.difference,book_7.5to_9.wap2.nanmean,book_7.5to_9.wap2.nanstd,book_7.5to_9.wap2.difference,book_7.5to_9.log_return1.nansum,book_7.5to_9.log_return1.realized_volatility,book_7.5to_9.log_return2.nansum,book_7.5to_9.log_return2.realized_volatility,book_7.5to_9.log_return_ask1.nansum,book_7.5to_9.log_return_ask1.realized_volatility,book_7.5to_9.log_return_ask2.nansum,book_7.5to_9.log_return_ask2.realized_volatility,book_7.5to_9.log_return_bid1.nansum,book_7.5to_9.log_return_bid1.realized_volatility,book_7.5to_9.log_return_bid2.nansum,book_7.5to_9.log_return_bid2.realized_volatility,book_7.5to_9.wap_balance.nanmean,book_7.5to_9.wap_balance.nanstd,book_7.5to_9.BidAskSpread.nanmax,book_7.5to_9.BidAskSpread.nanmean,book_7.5to_9.BidAskSpread.difference,book_7.5to_9.bid_spread.nanmax,book_7.5to_9.bid_spread.nanmean,book_7.5to_9.bid_spread.difference,book_7.5to_9.ask_spread.nanmax,book_7.5to_9.ask_spread.nanmean,book_7.5to_9.ask_spread.difference,book_7.5to_9.total_volume.nansum,book_7.5to_9.total_volume.nanmean,book_7.5to_9.total_volume.difference,book_7.5to_9.volume_imbalance.nansum,book_7.5to_9.volume_imbalance.nanmean,book_7.5to_9.volume_imbalance.nanmax,target,stock_id
0,5,520,1.003659,0.001203,0.005658,1.003692,0.001242,0.005632,0.002215,0.006001,0.002142,0.007367,0.001898,0.003313,0.002029,0.003248,0.002227,0.003361,2.226747e-03,0.003455,0.000271,0.000204,0.001176,0.000675,0.000915,0.000525,0.000114,0.000459,-0.000066,-0.000105,0.000328,152873,293.986538,1983,858993489841,1.651911e+09,4294967295,20,278,1.003994,0.000803,0.003524,1.004065,0.000822,0.003652,-0.003418,0.004146,-0.003128,0.005143,-0.003068,0.002413,-0.003003,0.002442,-0.002874,0.002489,-0.002874,0.002537,0.000256,0.000195,0.001176,0.000637,0.000915,0.000394,0.000100,0.000328,-0.000066,-0.000103,0.000328,75224,270.589928,663,330712501768,1.189613e+09,4294967293,179,1.003659,0.000557,0.002332,1.003755,0.000647,0.002361,-0.000364,0.003323,-0.000805,0.004118,-0.000327,0.001742,-0.000196,0.001917,-0.000262,0.002047,-0.000196,0.002152,0.000282,0.000214,0.001176,0.000655,0.000850,0.000394,0.000102,0.000328,-0.000066,-0.000103,0.000328,48593,271.469274,663,188978576461,1.055746e+09,4294967292,86,1.003947,0.000468,0.001939,1.004121,0.000597,0.001933,-0.001260,0.002283,-0.001090,0.002715,-0.001111,0.001282,-0.000980,0.001381,-0.001046,0.001257,-0.001046,0.001284,0.000308,0.000229,0.001176,0.000683,0.000784,0.000328,0.000084,0.000262,-0.000066,-0.000085,0.000262,23279,270.686047,596,103079220407,1.198596e+09,4294967291,0.001727,1
1,11,338,1.001762,0.000370,0.001784,1.001766,0.000403,0.001941,-0.000298,0.002148,-0.000549,0.002916,0.000067,0.001544,0.000067,0.001491,-0.000134,0.001583,-1.335472e-04,0.001407,0.000230,0.000167,0.000802,0.000481,0.000602,0.000334,0.000117,0.000268,-0.000067,-0.000107,0.000268,108429,320.795858,830,652835027215,1.931465e+09,4294967295,202,174,1.001855,0.000290,0.001588,1.001840,0.000385,0.001680,-0.000166,0.001581,-0.000318,0.002084,0.000334,0.001162,0.000334,0.001062,0.000334,0.001099,0.000334,0.001050,0.000215,0.000135,0.000801,0.000438,0.000601,0.000334,0.000131,0.000268,-0.000067,-0.000102,0.000268,54794,314.908046,755,313532615702,1.801912e+09,4294967295,109,1.001933,0.000322,0.001261,1.001976,0.000407,0.001503,-0.000031,0.001378,0.000117,0.001769,0.000267,0.000958,0.000267,0.000915,0.000267,0.000939,0.000334,0.000898,0.000225,0.000148,0.000801,0.000458,0.000601,0.000334,0.000142,0.000268,-0.000067,-0.000099,0.000268,33043,303.146789,755,137438959781,1.260908e+09,4294967282,56,1.001890,0.000294,0.001070,1.001835,0.000312,0.001284,-0.001065,0.001140,-0.001190,0.000997,-0.000734,0.000770,-0.000734,0.000734,-0.000868,0.000586,-0.000734,0.000571,0.000212,0.000173,0.000801,0.000538,0.000401,0.000334,0.000142,0.000268,-0.000067,-0.000104,0.000268,17966,320.821429,755,34359744932,6.135669e+08,4294967276,0.000837,1
2,16,312,1.000972,0.000893,0.003273,1.000947,0.000900,0.003458,0.001881,0.002356,0.001916,0.002840,0.001810,0.002264,0.001810,0.002350,0.002001,0.002085,2.001286e-03,0.002268,0.000087,0.000073,0.000667,0.000293,0.000572,0.000286,0.000117,0.000191,-0.000095,-0.000118,0.000191,188854,605.301282,1148,648540056198,2.078654e+09,4294967291,228,176,1.001660,0.000486,0.002074,1.001639,0.000495,0.002325,0.000728,0.001954,0.000943,0.002243,0.000857,0.001828,0.000857,0.001896,0.000857,0.001768,0.000952,0.001914,0.000084,0.000067,0.000572,0.000258,0.000476,0.000286,0.000118,0.000191,-0.000095,-0.000114,0.000095,109377,621.460227,1148,399431956591,2.269500e+09,4294967290,113,1.001935,0.000321,0.001461,1.001907,0.000355,0.001698,0.000440,0.001600,0.000455,0.001832,0.000381,0.001398,0.000285,0.001421,0.000571,0.001431,0.000571,0.001558,0.000085,0.000065,0.000571,0.000269,0.000476,0.000286,0.000122,0.000191,-0.000095,-0.000117,0.000095,68677,607.761062,736,279172869793,2.470556e+09,4294967290,59,1.002106,0.000228,0.001068,1.002087,0.000280,0.001329,-0.000534,0.000904,-0.000537,0.001136,-0.000476,0.000887,-0.000476,0.000975,-0.000285,0.000898,-0.000190,0.000989,0.000087,0.000071,0.000381,0.000235,0.000286,0.000191,0.000115,0.000095,-0.000095,-0.000132,0.000095,37681,638.661017,541,124554052261,2.111086e+09,4294967290,0.000886,1
3,31,152,0.997548,0.000609,0.002284,0.997471,0.000639,0.002682,-0.002105,0.003376,-0.002041,0.004092,-0.002108,0.001678,-0.002035,0.001777,-0.002109,0.002886,-2.036914e-03,0.002939,0.000341,0.000238,0.001676,0.000796,0.001240,0.000726,0.000188,0.000653,-0.000073,-0.000154,0.000508,65168,428.736842,1272,292057778338,1.921433e+09,4294967295,388,52,0.996946,0.000499,0.001568,0.996794,0.000349,0.001681,-0.001403,0.002262,-0.001039,0.002220,-0.001236,0.001071,-0.001164,0.001084,-0.001092,0.002178,-0.001092,0.002257,0.000357,0.000278,0.001676,0.000941,0.001167,0.000726,0.000234,0.000653,-0.000073,-0.000176,0.000435,23628,454.384615,1205,111669148652,2.147484e+09,4294967295,32,0.996860,0.000503,0.001435,0.996685,0.000244,0.001001,-0.000478,0.001606,0.000096,0.001763,-0.000509,0.000759,-0.000800,0.000759,0.000364,0.001047,0.000437,0.001070,0.000412,0.000303,0.001384,0.000764,0.000875,0.000726,0.000322,0.000653,-0.000073,-0.000150,0.000363,11884,371.375000,491,60129544118,1.879048e+09,4294967295,21,0.996978,0.000556,0.001435,0.996734,0.000184,0.000855,-0.000006,0.001313,0.000081,0.000953,-0.000218,0.000724,-0.000364,0.000621,0.000000,0.000763,0.000073,0.000858,0.000478,0.000300,0.000728,0.000676,0.000219,0.000726,0.000377,0.000653,-0.000073,-0.000121,0.000218,8861,421.952381,458,42949674569,2.045223e+09,4294967295,0.001161,1
4,62,201,1.000698,0.000379,0.001978,1.000654,0.000466,0.002938,0.000467,0.002834,-0.000034,0.004271,0.000924,0.001670,0.001065,0.002064,0.000782,0.000755,7.823546e-04,0.000982,0.000324,0.000251,0.001137,0.000725,0.000853,0.000427,0.000118,0.000356,-0.000071,-0.000206,0.000569,54268,269.990050,811,382252092924,1.901752e+09,4294967295,339,109,1.000700,0.000302,0.001216,1.000596,0.000479,0.002161,-0.000407,0.001970,-0.000464,0.003684,0.000071,0.001330,0.000071,0.001636,-0.000426,0.000461,-0.000497,0.000624,0.000350,0.000255,0.001137,0.000733,0.000782,0.000142,0.000108,0.000071,-0.000071,-0.000240,0.000569,24101,221.110092,617,103079226109,9.456810e+08,4294967293,79,1.000671,0.000339,0.001216,1.000486,0.000490,0.002161,-0.000511,0.001686,-0.000377,0.003293,-0.000497,0.001125,-0.000852,0.001294,-0.000355,0.000455,-0.000426,0.000619,0.000384,0.000261,0.001137,0.000768,0.000782,0.000142,0.000122,0.000071,-0.000071,-0.000247,0.000569,17425,220.569620,617,64424518147,8.155002e+08,4294967293,38,1.000510,0.000208,0.000681,1.000279,0.000219,0.001114,-0.000135,0.000883,0.000052,0.001816,0.000639,0.000623,0.000284,0.000550,0.000000,0.000142,0.000000,0.000284,0.000295,0.000236,0.000995,0.000810,0.000355,0.000142,0.000127,0.000071,-0.000071,-0.000122,0.000142,7073,186.131579,468,8589938417,2.260510e+08,4294967278,0.000629,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3825,32751,285,1.000159,0.000403,0.001515,1.000012,0.000379,0.001596,-0.000363,0.003685,-0.000377,0.004903,0.000067,0.001482,0.000000,0.001528,0.000067,0.001811,-2.910383e-11,0.002243,0.000343,0.000266,0.001075,0.000605,0.000941,0.000470,0.000159,0.000403,-0.000067,-0.000118,0.000336,84393,296.115789,691,605590388609,2.124879e+09,4294967295,255,140,1.000107,0.000307,0.001267,1.000049,0.000311,0.001309,0.000554,0.002699,0.000229,0.003334,0.000403,0.001045,0.000403,0.000789,0.000470,0.001507,0.000403,0.001547,0.000315,0.000224,0.001075,0.000656,0.000941,0.000470,0.000168,0.000403,-0.000067,-0.000106,0.000269,42581,304.150000,691,261993006979,1.871379e+09,4294967287,83,1.000166,0.000302,0.001267,0.999995,0.000313,0.001309,-0.000254,0.001992,-0.000561,0.002647,0.000000,0.000930,0.000000,0.000630,0.000134,0.001028,0.000000,0.001062,0.000323,0.000211,0.000941,0.000662,0.000672,0.000403,0.000193,0.000336,-0.000067,-0.000104,0.000269,30770,370.722892,663,128849021180,1.552398e+09,4294967282,33,1.000318,0.000246,0.000856,1.000181,0.000232,0.000920,-0.000035,0.001082,-0.000077,0.001428,0.000000,0.000355,-0.000067,0.000178,0.000067,0.000648,0.000269,0.000685,0.000293,0.000183,0.000941,0.000635,0.000538,0.000336,0.000183,0.000269,-0.000067,-0.000100,0.000202,11473,347.666667,515,30064773901,9.110538e+08,4294967282,0.000533,1
3826,32753,441,1.007107,0.006508,0.021141,1.006926,0.006420,0.021146,0.016325,0.010446,0.016294,0.011507,0.016293,0.008475,0.016292,0.008679,0.016517,0.008397,1.651777e-02,0.008242,0.000400,0.000344,0.001999,0.000929,0.001860,0.000627,0.000148,0.000557,-0.000070,-0.000127,0.000627,251528,570.358277,1892,622770310860,1.412178e+09,4294967295,99,227,1.012540,0.001259,0.006201,1.012309,0.001230,0.006854,0.002076,0.008367,0.002127,0.009736,0.001996,0.006326,0.001996,0.006465,0.001998,0.006570,0.001998,0.006673,0.000471,0.000361,0.001860,0.001121,0.001654,0.000627,0.000165,0.000557,-0.000070,-0.000130,0.000627,110653,487.458150,1341,309237673511,1.362281e+09,4294967295,144,1.012459,0.001127,0.005584,1.012253,0.001099,0.006199,-0.002474,0.007322,-0.001841,0.008379,-0.002678,0.005126,-0.002678,0.005368,-0.002269,0.005506,-0.001926,0.005630,0.000519,0.000385,0.001860,0.001191,0.001654,0.000627,0.000198,0.000557,-0.000070,-0.000136,0.000627,71533,496.756944,1341,249108115839,1.729917e+09,4294967293,67,1.012341,0.000578,0.002755,1.012242,0.000669,0.002845,-0.001333,0.004064,-0.000037,0.005102,-0.001031,0.003097,-0.001031,0.003353,-0.000482,0.003435,-0.000275,0.003563,0.000513,0.000349,0.001655,0.001106,0.001241,0.000557,0.000221,0.000488,-0.000070,-0.000116,0.000348,34171,510.014925,1264,141733924645,2.115432e+09,4294967293,0.002855,1
3827,32758,283,1.000810,0.000571,0.002151,1.000815,0.000587,0.002377,0.001492,0.002914,0.001112,0.004106,0.001071,0.001416,0.001160,0.001886,0.001250,0.001703,1.249984e-03,0.001754,0.000240,0.000168,0.000983,0.000660,0.000626,0.000268,0.000145,0.000179,-0.000089,-0.000124,0.000357,123753,437.289753,797,949187739097,3.354020e+09,4294967289,257,154,1.001217,0.000240,0.000988,1.001216,0.000306,0.001582,0.000235,0.001914,-0.000091,0.003130,0.000178,0.000927,0.000267,0.001367,0.000000,0.001305,0.000089,0.001296,0.000269,0.000185,0.000982,0.000637,0.000625,0.000268,0.000159,0.000179,-0.000089,-0.000128,0.000179,69623,452.097403,790,506806124627,3.290949e+09,4294967289,94,1.001241,0.000252,0.000988,1.001244,0.000252,0.001155,-0.000192,0.001405,0.000052,0.002299,-0.000178,0.000655,-0.000089,0.001012,-0.000268,0.000879,-0.000089,0.000923,0.000265,0.000183,0.000892,0.000636,0.000446,0.000268,0.000163,0.000179,-0.000089,-0.000130,0.000179,42845,455.797872,732,266287968975,2.832851e+09,4294967289,44,1.001136,0.000298,0.000988,1.001272,0.000219,0.000964,0.000105,0.001261,0.000051,0.001705,-0.000267,0.000321,-0.000178,0.000455,-0.000089,0.000557,-0.000089,0.000612,0.000333,0.000188,0.000892,0.000710,0.000357,0.000268,0.000142,0.000179,-0.000089,-0.000122,0.000179,23391,531.613636,732,81604379653,1.854645e+09,4294967281,0.001155,1
3828,32763,388,1.002958,0.001747,0.006537,1.002969,0.001777,0.006824,0.004229,0.003468,0.004146,0.005344,0.004174,0.003845,0.004418,0.004539,0.004175,0.002620,4.298436e-03,0.003102,0.000201,0.000158,0.001227,0.000423,0.001105,0.000615,0.000194,0.000492,-0.000123,-0.000228,0.000615,205326,529.190722,945,627065240314,1.616148e+09,4294967295,152,216,1.004287,0.000593,0.002686,1.004301,0.000667,0.003116,0.000791,0.002445,0.000866,0.004271,0.000490,0.002964,0.000857,0.003372,0.000735,0.001732,0.000736,0.002035,0.000201,0.000156,0.000858,0.000424,0.000736,0.000492,0.000179,0.000369,-0.000123,-0.000237,0.000615,114344,529.370370,945,347892361420,1.610613e+09,4294967294,135,1.004445,0.000444,0.002116,1.004434,0.000549,0.002088,-0.001286,0.002072,-0.000942,0.003556,-0.001224,0.001912,-0.000857,0.002788,-0.001347,0.001465,-0.001347,0.001788,0.000202,0.000162,0.000735,0.000390,0.000613,0.000492,0.000164,0.000369,-0.000123,-0.000235,0.000615,70294,520.696296,945,188978570006,1.399841e+09,4294967290,58,1.004298,0.000320,0.001159,1.004245,0.000356,0.001429,-0.000455,0.001201,0.000316,0.002513,-0.000490,0.000995,-0.000122,0.001572,-0.000490,0.001054,-0.000245,0.001397,0.000179,0.000156,0.000612,0.000363,0.000490,0.000492,0.000172,0.000369,-0.000123,-0.000182,0.000492,33046,569.758621,880,60129547090,1.036716e+09,4294967268,0.001426,1


In [27]:
df_feat["target"].isna().sum()

0

In [19]:
from tqdm import tqdm
non_tradable=[]
for i in tqdm(range(0,127)):
    try:
        dir_path=f"individual_book_train/stock_{i}.csv"
        data=pd.read_csv(dir_path)
    except:
        non_tradable.append(i)

tradable_id=[i for i in range(0,127) if i not in non_tradable]
tradable_id[:15]

100%|██████████| 127/127 [01:21<00:00,  1.55it/s]


[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 46,
 47,
 48,
 50,
 51,
 52,
 53,
 55,
 56,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 66,
 67,
 68,
 69,
 70,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 118,
 119,
 120,
 122,
 123,
 124,
 125,
 126]

In [28]:
def preprocessor(list_stock_ids):
    from joblib import Parallel, delayed 
    df = pd.DataFrame()
    
    def for_joblib(stock_id):
    
            
        df_tmp = feature_generator(stock_id=stock_id)
     
        return pd.concat([df,df_tmp])
    
    df = Parallel(n_jobs=-1, verbose=1)(
        delayed(for_joblib)(stock_id) for stock_id in list_stock_ids
        )

    df =  pd.concat(df,ignore_index = True)
    return df

In [29]:
final_df=preprocessor(tradable_id)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   37.1s
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:  3.8min finished


In [30]:
final_df.isna().sum().value_counts()

0     111
7      33
13      3
78      1
Name: count, dtype: int64

In [31]:
final_df

Unnamed: 0,time_id,book.seconds_in_bucket.count,book.wap1.nanmean,book.wap1.nanstd,book.wap1.difference,book.wap2.nanmean,book.wap2.nanstd,book.wap2.difference,book.log_return1.nansum,book.log_return1.realized_volatility_x,book.log_return2.nansum,book.log_return2.realized_volatility,book.log_return_ask1.nansum,book.log_return_ask1.realized_volatility,book.log_return_ask2.nansum,book.log_return_ask2.realized_volatility,book.log_return_bid1.nansum,book.log_return_bid1.realized_volatility,book.log_return_bid2.nansum,book.log_return_bid2.realized_volatility,book.wap_balance.nanmean,book.wap_balance.nanstd,book.BidAskSpread.nanmax,book.BidAskSpread.nanmean,book.BidAskSpread.difference,book.bid_spread.nanmax,book.bid_spread.nanmean,book.bid_spread.difference,book.ask_spread.nanmax,book.ask_spread.nanmean,book.ask_spread.difference,book.total_volume.nansum,book.total_volume.nanmean,book.total_volume.difference,book.volume_imbalance.nansum,book.volume_imbalance.nanmean,book.volume_imbalance.nanmax,missing_values,book_4.166666666666667to_9.seconds_in_bucket.count,book_4.166666666666667to_9.wap1.nanmean,book_4.166666666666667to_9.wap1.nanstd,book_4.166666666666667to_9.wap1.difference,book_4.166666666666667to_9.wap2.nanmean,book_4.166666666666667to_9.wap2.nanstd,book_4.166666666666667to_9.wap2.difference,book_4.166666666666667to_9.log_return1.nansum,book_4.166666666666667to_9.log_return1.realized_volatility,book_4.166666666666667to_9.log_return2.nansum,book_4.166666666666667to_9.log_return2.realized_volatility,book_4.166666666666667to_9.log_return_ask1.nansum,book_4.166666666666667to_9.log_return_ask1.realized_volatility,book_4.166666666666667to_9.log_return_ask2.nansum,book_4.166666666666667to_9.log_return_ask2.realized_volatility,book_4.166666666666667to_9.log_return_bid1.nansum,book_4.166666666666667to_9.log_return_bid1.realized_volatility,book_4.166666666666667to_9.log_return_bid2.nansum,book_4.166666666666667to_9.log_return_bid2.realized_volatility,book_4.166666666666667to_9.wap_balance.nanmean,book_4.166666666666667to_9.wap_balance.nanstd,book_4.166666666666667to_9.BidAskSpread.nanmax,book_4.166666666666667to_9.BidAskSpread.nanmean,book_4.166666666666667to_9.BidAskSpread.difference,book_4.166666666666667to_9.bid_spread.nanmax,book_4.166666666666667to_9.bid_spread.nanmean,book_4.166666666666667to_9.bid_spread.difference,book_4.166666666666667to_9.ask_spread.nanmax,book_4.166666666666667to_9.ask_spread.nanmean,book_4.166666666666667to_9.ask_spread.difference,book_4.166666666666667to_9.total_volume.nansum,book_4.166666666666667to_9.total_volume.nanmean,book_4.166666666666667to_9.total_volume.difference,book_4.166666666666667to_9.volume_imbalance.nansum,book_4.166666666666667to_9.volume_imbalance.nanmean,book_4.166666666666667to_9.volume_imbalance.nanmax,book_5.833333333333333to_9.seconds_in_bucket.count,book_5.833333333333333to_9.wap1.nanmean,book_5.833333333333333to_9.wap1.nanstd,book_5.833333333333333to_9.wap1.difference,book_5.833333333333333to_9.wap2.nanmean,book_5.833333333333333to_9.wap2.nanstd,book_5.833333333333333to_9.wap2.difference,book_5.833333333333333to_9.log_return1.nansum,book_5.833333333333333to_9.log_return1.realized_volatility,book_5.833333333333333to_9.log_return2.nansum,book_5.833333333333333to_9.log_return2.realized_volatility,book_5.833333333333333to_9.log_return_ask1.nansum,book_5.833333333333333to_9.log_return_ask1.realized_volatility,book_5.833333333333333to_9.log_return_ask2.nansum,book_5.833333333333333to_9.log_return_ask2.realized_volatility,book_5.833333333333333to_9.log_return_bid1.nansum,book_5.833333333333333to_9.log_return_bid1.realized_volatility,book_5.833333333333333to_9.log_return_bid2.nansum,book_5.833333333333333to_9.log_return_bid2.realized_volatility,book_5.833333333333333to_9.wap_balance.nanmean,book_5.833333333333333to_9.wap_balance.nanstd,book_5.833333333333333to_9.BidAskSpread.nanmax,book_5.833333333333333to_9.BidAskSpread.nanmean,book_5.833333333333333to_9.BidAskSpread.difference,book_5.833333333333333to_9.bid_spread.nanmax,book_5.833333333333333to_9.bid_spread.nanmean,book_5.833333333333333to_9.bid_spread.difference,book_5.833333333333333to_9.ask_spread.nanmax,book_5.833333333333333to_9.ask_spread.nanmean,book_5.833333333333333to_9.ask_spread.difference,book_5.833333333333333to_9.total_volume.nansum,book_5.833333333333333to_9.total_volume.nanmean,book_5.833333333333333to_9.total_volume.difference,book_5.833333333333333to_9.volume_imbalance.nansum,book_5.833333333333333to_9.volume_imbalance.nanmean,book_5.833333333333333to_9.volume_imbalance.nanmax,book_7.5to_9.seconds_in_bucket.count,book_7.5to_9.wap1.nanmean,book_7.5to_9.wap1.nanstd,book_7.5to_9.wap1.difference,book_7.5to_9.wap2.nanmean,book_7.5to_9.wap2.nanstd,book_7.5to_9.wap2.difference,book_7.5to_9.log_return1.nansum,book_7.5to_9.log_return1.realized_volatility,book_7.5to_9.log_return2.nansum,book_7.5to_9.log_return2.realized_volatility,book_7.5to_9.log_return_ask1.nansum,book_7.5to_9.log_return_ask1.realized_volatility,book_7.5to_9.log_return_ask2.nansum,book_7.5to_9.log_return_ask2.realized_volatility,book_7.5to_9.log_return_bid1.nansum,book_7.5to_9.log_return_bid1.realized_volatility,book_7.5to_9.log_return_bid2.nansum,book_7.5to_9.log_return_bid2.realized_volatility,book_7.5to_9.wap_balance.nanmean,book_7.5to_9.wap_balance.nanstd,book_7.5to_9.BidAskSpread.nanmax,book_7.5to_9.BidAskSpread.nanmean,book_7.5to_9.BidAskSpread.difference,book_7.5to_9.bid_spread.nanmax,book_7.5to_9.bid_spread.nanmean,book_7.5to_9.bid_spread.difference,book_7.5to_9.ask_spread.nanmax,book_7.5to_9.ask_spread.nanmean,book_7.5to_9.ask_spread.difference,book_7.5to_9.total_volume.nansum,book_7.5to_9.total_volume.nanmean,book_7.5to_9.total_volume.difference,book_7.5to_9.volume_imbalance.nansum,book_7.5to_9.volume_imbalance.nanmean,book_7.5to_9.volume_imbalance.nanmax,target,stock_id
0,5,274,1.003792,0.000671,0.003486,1.003724,0.000767,0.003735,0.002150,0.004323,0.001810,0.006440,0.002113,0.002230,0.002319,0.002469,2.115126e-03,0.002391,0.001806,0.002729,0.000392,0.000298,0.001394,0.000864,0.001034,0.000672,0.000173,0.000621,-0.000052,-0.000153,0.000517,91752,334.861314,728,644245095236,2.351259e+09,4294967295,266,133,1.003906,0.000273,0.001341,1.003819,0.000446,0.002061,-0.000798,0.002888,-0.000843,0.004493,-0.000360,0.001620,-0.000154,0.001450,-0.000361,0.001459,-0.000155,0.001972,0.000375,0.000272,0.001340,0.000867,0.000980,0.000672,0.000208,0.000621,-0.000052,-0.000173,0.000466,42072,316.330827,663,304942682262,2.292802e+09,4294967294,88,1.003912,0.000262,0.001115,1.003821,0.000482,0.002061,-0.000169,0.002119,-0.000247,0.003819,-0.000257,0.001423,-0.000051,0.001266,-0.000206,0.001117,-0.000258,0.001930,0.000405,0.000287,0.001135,0.000767,0.000774,0.000672,0.000247,0.000621,-0.000052,-0.000196,0.000414,24937,283.375000,525,240518167681,2.733161e+09,4294967294,40.0,1.003765,0.000179,0.000671,1.003662,0.000498,0.001956,-5.033757e-04,0.001188,-0.000448,0.003069,0.000155,0.001104,0.000360,0.001051,-0.000206,0.000831,-0.000464,0.001697,0.000380,0.000283,0.001135,0.000816,0.000774,0.000620,0.000304,0.000569,-0.000052,-0.000189,0.000362,12004.0,300.100000,525.0,9.019431e+10,2.254858e+09,4.294967e+09,0.001246,0
1,11,178,1.000179,0.000199,0.001040,1.000170,0.000257,0.001491,0.000720,0.000918,0.001262,0.002110,0.000702,0.000920,0.000802,0.000706,1.154305e-03,0.000795,0.001054,0.001080,0.000194,0.000141,0.000904,0.000397,0.000753,0.000401,0.000106,0.000351,-0.000050,-0.000136,0.000301,71571,402.084270,830,485331289793,2.726580e+09,4294967292,362,111,1.000293,0.000122,0.000705,1.000280,0.000230,0.001366,0.000647,0.000627,0.001122,0.001564,0.000752,0.000565,0.000802,0.000610,0.000903,0.000572,0.000803,0.001004,0.000186,0.000144,0.000552,0.000334,0.000402,0.000401,0.000092,0.000351,-0.000050,-0.000130,0.000251,48470,436.666667,830,283467833766,2.553764e+09,4294967270,79,1.000337,0.000114,0.000595,1.000300,0.000250,0.001366,0.000433,0.000587,0.000596,0.001497,0.000502,0.000507,0.000702,0.000572,0.000502,0.000454,0.000502,0.000909,0.000223,0.000142,0.000552,0.000367,0.000402,0.000401,0.000093,0.000351,-0.000050,-0.000125,0.000251,39217,496.417722,830,223338292965,2.827067e+09,4294967270,31.0,1.000382,0.000131,0.000582,1.000329,0.000316,0.001366,4.384232e-04,0.000466,0.000598,0.001319,0.000351,0.000446,0.000602,0.000431,0.000401,0.000369,0.000401,0.000843,0.000212,0.000141,0.000552,0.000333,0.000301,0.000401,0.000094,0.000351,-0.000050,-0.000159,0.000251,12809.0,413.193548,499.0,8.589934e+10,2.770947e+09,4.294967e+09,0.000779,0
2,16,180,0.999636,0.000756,0.003423,0.999773,0.000750,0.003979,-0.002223,0.002346,-0.002469,0.004511,-0.002347,0.001677,-0.002347,0.002671,-2.205301e-03,0.001457,-0.002542,0.001961,0.000326,0.000244,0.001150,0.000732,0.000719,0.000670,0.000189,0.000622,-0.000048,-0.000195,0.000670,74847,415.816667,626,429496727291,2.386093e+09,4294967295,360,74,0.998994,0.000687,0.002736,0.999223,0.000746,0.003788,-0.001704,0.001829,-0.003262,0.003241,-0.001486,0.001391,-0.001868,0.002383,-0.001630,0.001203,-0.002207,0.001485,0.000415,0.000281,0.001150,0.000728,0.000670,0.000670,0.000117,0.000622,-0.000048,-0.000268,0.000670,31544,426.270270,582,107374186336,1.451003e+09,4294967295,55,0.998743,0.000581,0.002370,0.999082,0.000797,0.003414,-0.002467,0.001242,-0.002573,0.002741,-0.002347,0.001157,-0.002682,0.001714,-0.002109,0.001133,-0.002686,0.001410,0.000413,0.000306,0.000958,0.000686,0.000479,0.000670,0.000113,0.000622,-0.000048,-0.000217,0.000622,26058,473.781818,476,64424513656,1.171355e+09,4294967279,35.0,0.998397,0.000408,0.001629,0.998649,0.000654,0.003323,-1.632301e-03,0.001111,-0.002809,0.002477,-0.001677,0.001002,-0.001533,0.001365,-0.001678,0.001047,-0.002303,0.001357,0.000339,0.000285,0.000815,0.000610,0.000336,0.000670,0.000149,0.000622,-0.000048,-0.000197,0.000622,16327.0,466.485714,476.0,3.006478e+10,8.589936e+08,4.294967e+09,0.000323,0
3,31,113,0.998899,0.000720,0.002397,0.998677,0.000637,0.002609,-0.001717,0.002399,-0.001039,0.003378,-0.001435,0.001133,-0.001388,0.001273,-1.806316e-03,0.001880,-0.001807,0.002504,0.000391,0.000250,0.001624,0.000846,0.001299,0.000694,0.000192,0.000648,-0.000046,-0.000112,0.000416,48457,428.823009,737,214748371689,1.900428e+09,4294967294,427,56,0.998564,0.000389,0.001413,0.998488,0.000438,0.002065,-0.000079,0.001589,0.000441,0.002535,-0.000093,0.000926,-0.000046,0.000998,0.000000,0.001584,-0.000046,0.001927,0.000371,0.000232,0.001624,0.000789,0.001299,0.000648,0.000139,0.000602,-0.000046,-0.000116,0.000278,22387,399.767857,497,146028887761,2.607659e+09,4294967294,37,0.998514,0.000468,0.001413,0.998633,0.000455,0.002065,-0.000050,0.001512,0.000637,0.002283,0.000371,0.000861,0.000278,0.000937,-0.000093,0.001582,-0.000139,0.001924,0.000310,0.000237,0.001624,0.000839,0.001299,0.000648,0.000180,0.000602,-0.000046,-0.000085,0.000231,15470,418.108108,490,94489280640,2.553764e+09,4294967286,11.0,0.998290,0.000239,0.000584,0.998746,0.000090,0.000331,5.852527e-04,0.000344,0.000132,0.000455,0.000000,0.000113,0.000046,0.000139,0.000000,0.000000,0.000000,0.000066,0.000456,0.000272,0.001066,0.001028,0.000093,0.000093,0.000088,0.000046,-0.000046,-0.000050,0.000046,5945.0,540.454545,287.0,3.435974e+10,3.123613e+09,4.294967e+09,0.000931,0
4,62,165,0.999628,0.000248,0.000918,0.999624,0.000323,0.001147,-0.000845,0.001587,-0.000403,0.003193,-0.000466,0.000935,-0.000373,0.000978,-2.798411e-04,0.001231,-0.000233,0.001284,0.000245,0.000185,0.000793,0.000397,0.000700,0.000466,0.000190,0.000420,-0.000047,-0.000107,0.000373,57293,347.230303,711,459561489111,2.785221e+09,4294967295,375,88,0.999518,0.000195,0.000791,0.999617,0.000285,0.000947,-0.000685,0.001167,-0.000402,0.002328,-0.000280,0.000688,-0.000187,0.000722,-0.000420,0.000911,-0.000560,0.001018,0.000234,0.000181,0.000793,0.000429,0.000607,0.000466,0.000182,0.000420,-0.000047,-0.000113,0.000373,37200,422.727273,628,219043328804,2.489129e+09,4294967293,62,0.999461,0.000182,0.000783,0.999608,0.000278,0.000947,-0.000440,0.001102,-0.000362,0.001717,-0.000186,0.000666,-0.000187,0.000669,-0.000280,0.000794,-0.000187,0.000795,0.000221,0.000196,0.000793,0.000442,0.000607,0.000466,0.000205,0.000420,-0.000047,-0.000120,0.000373,24149,389.500000,628,180388623371,2.909494e+09,4294967293,25.0,0.999536,0.000200,0.000779,0.999821,0.000206,0.000589,-4.457768e-04,0.000909,-0.000421,0.000724,-0.000186,0.000280,-0.000233,0.000291,-0.000280,0.000280,-0.000187,0.000187,0.000350,0.000210,0.000793,0.000575,0.000467,0.000280,0.000194,0.000093,-0.000047,-0.000134,0.000140,10996.0,439.840000,293.0,8.589934e+10,3.435974e+09,4.294967e+09,0.001035,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
428927,32751,278,0.999618,0.000491,0.002485,0.999648,0.000610,0.002838,-0.000841,0.003425,-0.000905,0.005712,-0.001306,0.002077,-0.001371,0.001926,-1.176235e-03,0.002112,-0.000980,0.002334,0.000366,0.000277,0.001570,0.000870,0.001178,0.000686,0.000088,0.000653,-0.000033,-0.000212,0.000555,115411,415.147482,871,790273953089,2.842712e+09,4294967295,262,139,0.999305,0.000356,0.001551,0.999223,0.000427,0.001821,0.000555,0.002819,-0.000348,0.004414,0.000980,0.001418,0.000490,0.001420,0.000621,0.001402,0.000589,0.001602,0.000324,0.000275,0.001570,0.000921,0.001145,0.000686,0.000093,0.000653,-0.000033,-0.000161,0.000425,56361,405.474820,695,425201743811,3.059005e+09,4294967294,84,0.999375,0.000263,0.001010,0.999306,0.000294,0.001484,-0.000419,0.001533,-0.000776,0.002380,-0.000065,0.001155,-0.000065,0.000847,0.000458,0.000801,0.000425,0.001017,0.000300,0.000218,0.001406,0.000838,0.000981,0.000359,0.000067,0.000327,-0.000033,-0.000173,0.000425,33153,394.678571,695,261992996395,3.118964e+09,4294967294,43.0,0.999324,0.000254,0.000941,0.999145,0.000307,0.001484,4.032303e-07,0.001003,-0.000810,0.002038,-0.000065,0.000763,-0.000033,0.000585,-0.000098,0.000614,-0.000131,0.000912,0.000349,0.000241,0.001210,0.000759,0.000785,0.000359,0.000087,0.000327,-0.000033,-0.000138,0.000359,15587.0,362.488372,695.0,1.417339e+11,3.296138e+09,4.294967e+09,0.001374,126
428928,32753,191,1.002089,0.000853,0.003050,1.002215,0.000929,0.003032,0.002199,0.003394,0.002444,0.004442,0.002407,0.001988,0.002441,0.002185,2.614460e-03,0.001886,0.002614,0.002213,0.000301,0.000226,0.001135,0.000694,0.000859,0.000724,0.000132,0.000689,-0.000034,-0.000131,0.000483,45680,239.162304,649,631360166596,3.305551e+09,4294967295,349,99,1.002808,0.000442,0.001883,1.003029,0.000404,0.001833,0.001666,0.002857,0.001887,0.003047,0.001546,0.001764,0.001581,0.001935,0.001650,0.001564,0.001822,0.001959,0.000305,0.000228,0.001135,0.000719,0.000791,0.000724,0.000148,0.000689,-0.000034,-0.000157,0.000483,21595,218.131313,649,335007434615,3.383913e+09,4294967295,52,1.003070,0.000352,0.001464,1.003269,0.000278,0.001226,0.000528,0.002388,0.000358,0.002396,0.000687,0.001638,0.000378,0.001455,0.000722,0.001235,0.001031,0.001410,0.000299,0.000242,0.001135,0.000696,0.000791,0.000483,0.000200,0.000448,-0.000034,-0.000127,0.000483,7050,135.576923,634,150323851676,2.890843e+09,4294967295,22.0,1.003288,0.000303,0.000817,1.003396,0.000239,0.000771,3.951282e-04,0.001227,0.000116,0.001448,0.000515,0.000632,0.000446,0.000446,0.000584,0.000660,0.000584,0.000923,0.000299,0.000285,0.000859,0.000609,0.000516,0.000483,0.000111,0.000448,-0.000034,-0.000135,0.000310,3972.0,180.545455,634.0,4.294967e+10,1.952258e+09,4.294967e+09,0.002307,126
428929,32758,224,1.000989,0.000398,0.002092,1.000916,0.000512,0.002298,0.000237,0.002919,0.001449,0.005683,0.000345,0.001505,0.000591,0.001816,1.085473e-03,0.002543,0.000888,0.002733,0.000374,0.000288,0.001283,0.000737,0.001135,0.000790,0.000175,0.000741,-0.000049,-0.000181,0.000444,76979,343.656250,925,880468243061,3.930662e+09,4294967295,316,141,1.001149,0.000358,0.001588,1.001095,0.000486,0.001709,0.000781,0.002584,0.001076,0.005034,0.000838,0.001219,0.000937,0.001560,0.001530,0.001905,0.001333,0.002201,0.000397,0.000305,0.001283,0.000700,0.001135,0.000790,0.000226,0.000741,-0.000049,-0.000185,0.000395,47257,335.156028,925,545460815197,3.868516e+09,4294967295,116,1.001210,0.000326,0.001588,1.001133,0.000498,0.001670,0.000082,0.002402,0.000055,0.004694,-0.000197,0.000885,0.000049,0.001376,0.000247,0.001815,0.000049,0.002054,0.000401,0.000305,0.001283,0.000679,0.001135,0.000790,0.000244,0.000741,-0.000049,-0.000191,0.000395,38433,331.318966,925,459561473157,3.961737e+09,4294967295,54.0,1.001409,0.000148,0.000811,1.001290,0.000469,0.001326,1.372473e-04,0.000958,0.000940,0.002340,0.000000,0.000406,0.000246,0.000706,0.000937,0.001191,0.000740,0.001224,0.000408,0.000341,0.001135,0.000588,0.000790,0.000790,0.000354,0.000741,-0.000049,-0.000195,0.000346,17383.0,321.907407,902.0,2.276333e+11,4.215431e+09,4.294967e+09,0.001096,126
428930,32763,362,1.001871,0.000430,0.002401,1.001862,0.000472,0.002472,0.000914,0.003602,0.000812,0.005224,0.000658,0.002666,0.000394,0.003036,7.235296e-04,0.002592,0.000658,0.002723,0.000233,0.000180,0.001117,0.000540,0.000985,0.000790,0.000143,0.000724,-0.000066,-0.000134,0.000461,153026,422.723757,983,476741396028,1.316965e+09,4294967295,178,187,1.001949,0.000397,0.001903,1.001952,0.000411,0.001985,-0.000352,0.002696,-0.000282,0.003513,-0.000329,0.002336,-0.000394,0.002414,-0.000329,0.001981,-0.000263,0.001956,0.000221,0.000173,0.001117,0.000524,0.000985,0.000790,0.000156,0.000724,-0.000066,-0.000109,0.000329,82803,442.796791,842,244813151175,1.309161e+09,4294967295,121,1.001866,0.000382,0.001903,1.001877,0.000426,0.001985,-0.001220,0.001980,-0.001283,0.002221,-0.001248,0.001668,-0.001248,0.001904,-0.000723,0.001432,-0.000789,0.001485,0.000182,0.000133,0.001117,0.000488,0.000985,0.000461,0.000126,0.000395,-0.000066,-0.000114,0.000329,59136,488.727273,678,111669164540,9.228857e+08,4294967295,62.0,1.001935,0.000324,0.001879,1.001897,0.000380,0.001777,-2.559906e-04,0.001574,-0.000432,0.001554,-0.000263,0.001468,-0.000263,0.001616,-0.000329,0.001045,-0.000395,0.000952,0.000183,0.000136,0.001117,0.000482,0.000985,0.000461,0.000140,0.000395,-0.000066,-0.000130,0.000329,29564.0,476.838710,615.0,3.865471e+10,6.234631e+08,4.294967e+09,0.000654,126


In [14]:
final_df.to_csv("merge_regression_data.csv")

In [24]:
final_df.to_pickle("merge_regression_data.pkl")