In [1]:
import psycopg2
import pandas as pd
import numpy as np
import re
from datetime import datetime
import pytz
import re
from operator import itemgetter

pd.set_option('display.max_columns', 999)

VALID_FEATURES = [
    'pair_id','open_time','close_time','dow','tod',
    'open','high','low','close',
    'number_of_trades','volume','quote_asset_volume','taker_buy_base_asset_volume','taker_buy_quote_asset_volume',
    'ma14','ma30','ma90',
    'sup14','sup30','sup90',
    'res14','res30','res90',
    'atr','atr_diff','atr_ma14',
    'rsi','rsi_diff','rsi_ma14',
    'trend_up','trend_up3','trend_up14','trend_up30',
    'cs_ss','cs_ssr','cs_hm','cs_hmr','cs_brh','cs_buh','cs_ebu','cs_ebr'
]

prepath = 't4'

In [2]:
# local postgres connection only
import hidden
sql_string = hidden.psycopg2(hidden.secrets())
print('PostgreSQL connection data taken from hidden.py')

# Make the connection and cursor
conn = psycopg2.connect(sql_string, connect_timeout=3)

PostgreSQL connection data taken from hidden.py


In [3]:
# These are only used for caching
import hashlib
import pickle

def get_batch_data(base_coin, quote_coin, start_time, end_time, columns, batch_size=30000, extra_rows=0, use_cache=True):
    column_info = [(x,) + re.match('^(?P<feature>[a-z][a-z0-9]*(?:_[a-z][a-z0-9]*)*)(?:_(?P<shift>[0-9]{1,3}))?$',x).groups() for x in columns]
    max_lookback = max([(0 if x==None else int(x)) for _,_,x in column_info])
    s = base_coin+quote_coin+f"{start_time}"+f"{end_time}"+"".join(columns)+str(batch_size)+str(extra_rows)
    h = hashlib.md5(s.encode('utf-8')).hexdigest()
    should_use_cache = use_cache and (datetime.strptime(end_time, '%Y-%m-%d') < datetime.now(pytz.timezone('UTC')).replace(tzinfo=None))
    if should_use_cache:
        # Can use cache
        try:
            with open(f'../data/{prepath}/{h}.pkl', 'rb') as fp:
                print(f"Using cache file: ../data/{prepath}/{h}.pkl")
                return pickle.load(fp)
        except:
            print(f"No cache found")
            pass

    sql = f"""
select
    f.*, open_time, open, high, low, close, volume, close_time, quote_asset_volume, number_of_trades, taker_buy_base_asset_volume, taker_buy_quote_asset_volume	
from
    (
        (select * from (select id as the_pair from pairs p where p.coin1='{base_coin}' and p.coin2='{quote_coin}') z inner join candlestick_15m on the_pair=pair_id where close_time notnull and open_time < '{start_time}' order by open_time desc limit {max_lookback + extra_rows})
            union all
        (select * from (select id as the_pair from pairs p where p.coin1='{base_coin}' and p.coin2='{quote_coin}') z inner join candlestick_15m on the_pair=pair_id where close_time notnull and open_time between '{start_time}' and '{end_time}' order by open_time limit {batch_size})
    ) cm
inner join 
    features f on f.pair_id = cm.pair_id and f.candle_open_time = cm.open_time
order by
    open_time desc
"""
    base_df = pd.read_sql_query(sql, conn)
    df = base_df[['candle_open_time']].copy()
    for name, feature, shift in column_info:
        assert feature in VALID_FEATURES, f"Invalid feature: {feature} for {name}"
        df[name] = base_df[feature].shift((0 if shift==None else -int(shift)))
        
    if extra_rows == 0:
        extra_df = None
    else:
        extra_df = df.copy()
        extra_df['is_extra'] = ~extra_df['candle_open_time'].between(start_time, end_time)
        extra_df = extra_df.set_index('candle_open_time').sort_index()
        
    df = df[df['candle_open_time'].between(start_time, end_time)]
    df = df.set_index('candle_open_time').sort_index()

    ref_df = base_df[['open_time','open', 'high', 'low', 'close']].copy()
    ref_df = ref_df[ref_df['open_time'].between(start_time, end_time)]
    ref_df = ref_df.set_index('open_time').sort_index()

    batch_close_time = base_df['close_time'].max()
    
    if should_use_cache:
        print(f"Saving cache to: ../data/{prepath}/{h}.pkl")
        with open(f'../data/{prepath}/{h}.pkl', 'wb') as fp:
            pickle.dump((df, ref_df, extra_df, batch_close_time), fp, protocol=4)

    return df, ref_df, extra_df, batch_close_time

In [4]:
static_columns = ['open']
repeat_columns = ['high', 'low', 'close', 'rsi', 'trend_up3','trend_up14', 
                  'cs_ss','cs_ssr','cs_hm','cs_hmr','cs_ebu','cs_ebr']

columns = static_columns + [f"{rc}_{i}" for rc in repeat_columns for i in range(0,24)]

In [5]:
#columns

In [6]:
#len(repeat_columns)

In [7]:
mapping = {
    'ETHBTC':0,
    'BTCUSDT':1,
    'ETHUSDT':2,
    'BTCETH':-1,
    'USDTBTC':-2,
    'USDTETH':-3
}

In [8]:
from itertools import permutations

# download raw data pre-processing, this will be the same data that the simulator receives

for a,b in permutations(['ETH','BTC','USDT'],2):
    df, ref_df, _, _ = get_batch_data(a, b, '2018-01-01', '2021-08-07', columns, 500000, 0, True)
    df = df.astype(float)
    
    with open(f'../data/{prepath}_{a}_{b}.pkl', 'wb') as fp:
        pickle.dump(df, fp, protocol=4)

Using cache file: ../data/t4/18286b0cdf17618dcef3abe64553d1c3.pkl
Using cache file: ../data/t4/a982a909803ec8b7ca4388a04d3c245c.pkl
Using cache file: ../data/t4/8c4308c7bcf0dd7b87615dddc7959f43.pkl
Using cache file: ../data/t4/82c7aca7ca8c83fea53c3f75cea04ded.pkl
Using cache file: ../data/t4/eb955af59f73f18597e288971c1e1b95.pkl
Using cache file: ../data/t4/b4a7412a0032e36e55f96287d222edb2.pkl


In [9]:
# additional ETL here, like normalization, scaling and other stuff, also create rows for target use

dfs = []

for a,b in permutations(['ETH','BTC','USDT'],2):
    with open(f'../data/{prepath}_{a}_{b}.pkl', 'rb') as fp:
        df = pickle.load(fp)
        df = df.astype(float)
    
        for c in df.columns:
            if c.startswith("trend_up"):
                d = 'tc2x_' + c.replace('_up','')
                df[d] = (df[c] - 0.5) * 2
            elif c.startswith("rsi_"):
                df['tc2x_'+c] = (df[c] - 50) / 50
            elif c.startswith("high_") or c.startswith("low_") or c.startswith("close_"):
                df['tc2x_'+c] = ((df[c] / df['open']) - 1) * 30
        
        flag_pairs = [('tc2x_ss','cs_ss','cs_ssr'),('tc2x_hm','cs_hm','cs_hmr'),('tc2x_eb','cs_ebu','cs_ebr')]
        
        for newp,p1,p2 in flag_pairs:
             for i in range(0,24):
                    df[f"{newp}_{i}"] = df[f"{p1}_{i}"] - df[f"{p2}_{i}"]
        
        shifts = []
        for i in range(1,5):
            shifts.append(((df['close_0'].shift(-i) / df['open']) - 1))
            #df[f'tc2y_close_{i}'] =((df['close_0'].shift(-i) / df['open']) - 1) * 30
        shift_df = pd.concat(shifts, axis=1).dropna()
        
        df = df[[c for c in df.columns if c.startswith("tc2")]]

        df['y'] = (shift_df < 0).astype(int).sum(axis=1) / 4
        df['pair_id'] = mapping[f"{a}{b}"]
        df = df.reset_index().set_index(['pair_id','candle_open_time']).dropna()
        
        dfs.append(df)

df = pd.concat(dfs)
dfs = []

In [10]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,tc2x_high_0,tc2x_high_1,tc2x_high_2,tc2x_high_3,tc2x_high_4,tc2x_high_5,tc2x_high_6,tc2x_high_7,tc2x_high_8,tc2x_high_9,tc2x_high_10,tc2x_high_11,tc2x_high_12,tc2x_high_13,tc2x_high_14,tc2x_high_15,tc2x_high_16,tc2x_high_17,tc2x_high_18,tc2x_high_19,tc2x_high_20,tc2x_high_21,tc2x_high_22,tc2x_high_23,tc2x_low_0,tc2x_low_1,tc2x_low_2,tc2x_low_3,tc2x_low_4,tc2x_low_5,tc2x_low_6,tc2x_low_7,tc2x_low_8,tc2x_low_9,tc2x_low_10,tc2x_low_11,tc2x_low_12,tc2x_low_13,tc2x_low_14,tc2x_low_15,tc2x_low_16,tc2x_low_17,tc2x_low_18,tc2x_low_19,tc2x_low_20,tc2x_low_21,tc2x_low_22,tc2x_low_23,tc2x_close_0,tc2x_close_1,tc2x_close_2,tc2x_close_3,tc2x_close_4,tc2x_close_5,tc2x_close_6,tc2x_close_7,tc2x_close_8,tc2x_close_9,tc2x_close_10,tc2x_close_11,tc2x_close_12,tc2x_close_13,tc2x_close_14,tc2x_close_15,tc2x_close_16,tc2x_close_17,tc2x_close_18,tc2x_close_19,tc2x_close_20,tc2x_close_21,tc2x_close_22,tc2x_close_23,tc2x_rsi_0,tc2x_rsi_1,tc2x_rsi_2,tc2x_rsi_3,tc2x_rsi_4,tc2x_rsi_5,tc2x_rsi_6,tc2x_rsi_7,tc2x_rsi_8,tc2x_rsi_9,tc2x_rsi_10,tc2x_rsi_11,tc2x_rsi_12,tc2x_rsi_13,tc2x_rsi_14,tc2x_rsi_15,tc2x_rsi_16,tc2x_rsi_17,tc2x_rsi_18,tc2x_rsi_19,tc2x_rsi_20,tc2x_rsi_21,tc2x_rsi_22,tc2x_rsi_23,tc2x_trend3_0,tc2x_trend3_1,tc2x_trend3_2,tc2x_trend3_3,tc2x_trend3_4,tc2x_trend3_5,tc2x_trend3_6,tc2x_trend3_7,tc2x_trend3_8,tc2x_trend3_9,tc2x_trend3_10,tc2x_trend3_11,tc2x_trend3_12,tc2x_trend3_13,tc2x_trend3_14,tc2x_trend3_15,tc2x_trend3_16,tc2x_trend3_17,tc2x_trend3_18,tc2x_trend3_19,tc2x_trend3_20,tc2x_trend3_21,tc2x_trend3_22,tc2x_trend3_23,tc2x_trend14_0,tc2x_trend14_1,tc2x_trend14_2,tc2x_trend14_3,tc2x_trend14_4,tc2x_trend14_5,tc2x_trend14_6,tc2x_trend14_7,tc2x_trend14_8,tc2x_trend14_9,tc2x_trend14_10,tc2x_trend14_11,tc2x_trend14_12,tc2x_trend14_13,tc2x_trend14_14,tc2x_trend14_15,tc2x_trend14_16,tc2x_trend14_17,tc2x_trend14_18,tc2x_trend14_19,tc2x_trend14_20,tc2x_trend14_21,tc2x_trend14_22,tc2x_trend14_23,tc2x_ss_0,tc2x_ss_1,tc2x_ss_2,tc2x_ss_3,tc2x_ss_4,tc2x_ss_5,tc2x_ss_6,tc2x_ss_7,tc2x_ss_8,tc2x_ss_9,tc2x_ss_10,tc2x_ss_11,tc2x_ss_12,tc2x_ss_13,tc2x_ss_14,tc2x_ss_15,tc2x_ss_16,tc2x_ss_17,tc2x_ss_18,tc2x_ss_19,tc2x_ss_20,tc2x_ss_21,tc2x_ss_22,tc2x_ss_23,tc2x_hm_0,tc2x_hm_1,tc2x_hm_2,tc2x_hm_3,tc2x_hm_4,tc2x_hm_5,tc2x_hm_6,tc2x_hm_7,tc2x_hm_8,tc2x_hm_9,tc2x_hm_10,tc2x_hm_11,tc2x_hm_12,tc2x_hm_13,tc2x_hm_14,tc2x_hm_15,tc2x_hm_16,tc2x_hm_17,tc2x_hm_18,tc2x_hm_19,tc2x_hm_20,tc2x_hm_21,tc2x_hm_22,tc2x_hm_23,tc2x_eb_0,tc2x_eb_1,tc2x_eb_2,tc2x_eb_3,tc2x_eb_4,tc2x_eb_5,tc2x_eb_6,tc2x_eb_7,tc2x_eb_8,tc2x_eb_9,tc2x_eb_10,tc2x_eb_11,tc2x_eb_12,tc2x_eb_13,tc2x_eb_14,tc2x_eb_15,tc2x_eb_16,tc2x_eb_17,tc2x_eb_18,tc2x_eb_19,tc2x_eb_20,tc2x_eb_21,tc2x_eb_22,tc2x_eb_23,y
pair_id,candle_open_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1
0,2018-01-01 00:00:00,0.163476,0.038070,-0.013996,-0.106931,-0.132124,-0.091255,-0.216101,-0.216101,-0.271526,-0.300638,-0.292800,-0.363901,-0.325831,-0.367260,-0.218900,-0.153398,-0.180831,-0.143321,-0.216101,-0.218900,-0.245773,-0.202105,-0.202665,-0.202665,-0.062703,-0.134363,-0.279924,-0.212742,-0.443959,-0.284403,-0.349905,-0.349345,-0.437241,-0.415407,-0.441720,-0.490427,-0.496025,-0.524017,-0.475311,-0.277684,-0.362781,-0.288881,-0.353824,-0.382936,-0.474191,-0.327511,-0.327511,-0.358862,0.157877,0.000000,-0.076139,-0.156757,-0.199306,-0.221700,-0.263688,-0.279924,-0.326951,-0.382376,-0.358862,-0.412608,-0.440040,-0.417087,-0.401411,-0.254171,-0.264248,-0.252491,-0.216661,-0.270406,-0.341507,-0.245213,-0.236816,-0.262569,0.348764,0.218204,0.141091,0.048851,-0.003989,-0.032009,-0.084716,-0.104979,-0.163432,-0.234926,-0.215946,-0.283530,-0.318257,-0.303618,-0.294005,-0.197361,-0.207869,-0.200266,-0.177936,-0.225601,-0.289710,-0.239695,-0.235335,-0.255844,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.00
0,2018-01-01 00:15:00,0.098580,0.007240,-0.117516,-0.169312,-0.261766,-0.286828,-0.246171,-0.370370,-0.370370,-0.425508,-0.454470,-0.446672,-0.517405,-0.479532,-0.520746,-0.373155,-0.307992,-0.335283,-0.297967,-0.370370,-0.373155,-0.399889,-0.356447,-0.357004,-0.155945,-0.217767,-0.289056,-0.433862,-0.367029,-0.597048,-0.438318,-0.503481,-0.502924,-0.590365,-0.568644,-0.594820,-0.643275,-0.648844,-0.676692,-0.628237,-0.431635,-0.516291,-0.442774,-0.507380,-0.536341,-0.627123,-0.481203,-0.481203,-0.064049,0.001671,-0.155388,-0.231133,-0.311334,-0.353662,-0.375940,-0.417711,-0.433862,-0.480646,-0.535784,-0.512392,-0.565859,-0.593150,-0.570315,-0.554720,-0.408243,-0.418268,-0.406572,-0.370927,-0.424394,-0.495127,-0.399332,-0.390977,0.254367,0.348764,0.218204,0.141091,0.048851,-0.003989,-0.032009,-0.084716,-0.104979,-0.163432,-0.234926,-0.215946,-0.283530,-0.318257,-0.303618,-0.294005,-0.197361,-0.207869,-0.200266,-0.177936,-0.225601,-0.289710,-0.239695,-0.235335,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.75
0,2018-01-01 00:30:00,0.117209,0.162977,0.071442,-0.053581,-0.105488,-0.198140,-0.223256,-0.182512,-0.306977,-0.306977,-0.362233,-0.391256,-0.383442,-0.454326,-0.416372,-0.457674,-0.309767,-0.244465,-0.271814,-0.234419,-0.306977,-0.309767,-0.336558,-0.293023,-0.062512,-0.092093,-0.154047,-0.225488,-0.370605,-0.303628,-0.534140,-0.375070,-0.440372,-0.439814,-0.527442,-0.505674,-0.531907,-0.580465,-0.586047,-0.613953,-0.565395,-0.368372,-0.453209,-0.379535,-0.444279,-0.473302,-0.564279,-0.418047,-0.008372,0.000000,0.065860,-0.091535,-0.167442,-0.247814,-0.290233,-0.312558,-0.354419,-0.370605,-0.417488,-0.472744,-0.449302,-0.502884,-0.530233,-0.507349,-0.491721,-0.344930,-0.354977,-0.343256,-0.307535,-0.361116,-0.432000,-0.336000,0.242462,0.254367,0.348764,0.218204,0.141091,0.048851,-0.003989,-0.032009,-0.084716,-0.104979,-0.163432,-0.234926,-0.215946,-0.283530,-0.318257,-0.303618,-0.294005,-0.197361,-0.207869,-0.200266,-0.177936,-0.225601,-0.289710,-0.239695,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
0,2018-01-01 00:45:00,0.071463,0.126177,0.171958,0.080396,-0.044664,-0.096587,-0.189266,-0.214389,-0.173633,-0.298135,-0.298135,-0.353408,-0.382439,-0.374623,-0.445528,-0.407563,-0.448878,-0.300927,-0.235605,-0.262962,-0.225556,-0.298135,-0.300927,-0.327725,-0.185916,-0.053597,-0.083188,-0.145159,-0.216623,-0.361782,-0.294785,-0.525366,-0.366249,-0.431570,-0.431012,-0.518666,-0.496892,-0.523132,-0.571705,-0.577288,-0.605203,-0.556631,-0.359549,-0.444411,-0.370715,-0.435478,-0.464510,-0.555514,0.018424,0.000558,0.008933,0.074813,-0.082629,-0.158559,-0.238955,-0.281386,-0.303718,-0.345591,-0.361782,-0.408680,-0.463952,-0.440503,-0.494101,-0.521458,-0.498567,-0.482934,-0.336100,-0.346150,-0.334425,-0.298694,-0.352291,-0.423196,0.258627,0.242462,0.254367,0.348764,0.218204,0.141091,0.048851,-0.003989,-0.032009,-0.084716,-0.104979,-0.163432,-0.234926,-0.215946,-0.283530,-0.318257,-0.303618,-0.294005,-0.197361,-0.207869,-0.200266,-0.177936,-0.225601,-0.289710,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
0,2018-01-01 01:00:00,0.076354,0.018949,0.073568,0.119269,0.027867,-0.096976,-0.148807,-0.241324,-0.266404,-0.225719,-0.350004,-0.350004,-0.405179,-0.434161,-0.426358,-0.497139,-0.459241,-0.500483,-0.352790,-0.287583,-0.314892,-0.277551,-0.350004,-0.352790,-0.043472,-0.237980,-0.105893,-0.135431,-0.197295,-0.268633,-0.413539,-0.346660,-0.576837,-0.417998,-0.483206,-0.482648,-0.570149,-0.548413,-0.574608,-0.623096,-0.628669,-0.656536,-0.608048,-0.411310,-0.496024,-0.422457,-0.487107,-0.516088,-0.015048,-0.033997,-0.051832,-0.043472,0.022293,-0.134874,-0.210671,-0.290927,-0.333284,-0.355577,-0.397377,-0.413539,-0.460355,-0.515531,-0.492123,-0.545627,-0.572936,-0.550085,-0.534480,-0.387902,-0.397934,-0.386230,-0.350561,-0.404065,0.276296,0.258627,0.242462,0.254367,0.348764,0.218204,0.141091,0.048851,-0.003989,-0.032009,-0.084716,-0.104979,-0.163432,-0.234926,-0.215946,-0.283530,-0.318257,-0.303618,-0.294005,-0.197361,-0.207869,-0.200266,-0.177936,-0.225601,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
-2,2021-08-06 22:00:00,0.000000,0.025456,-0.076368,-0.038184,-0.101824,-0.127280,-0.140008,-0.012728,-0.241833,-0.216377,-0.280017,-0.241833,-0.241833,-0.190921,-0.241833,-0.152737,-0.152737,-0.280017,-0.203649,-0.101824,-0.025456,0.140008,0.063640,0.000000,-0.140008,-0.165465,-0.190921,-0.152737,-0.229105,-0.254561,-0.305473,-0.305473,-0.381841,-0.381841,-0.394569,-0.407297,-0.343657,-0.330929,-0.356385,-0.318201,-0.394569,-0.661858,-0.509122,-0.381841,-0.178193,-0.127280,-0.178193,-0.305473,-0.127280,0.000000,-0.127280,-0.089096,-0.114552,-0.190921,-0.241833,-0.152737,-0.280017,-0.330929,-0.369113,-0.292745,-0.318201,-0.254561,-0.292745,-0.241833,-0.152737,-0.330929,-0.496394,-0.343657,-0.101824,-0.063640,-0.012728,-0.063640,-0.018596,-0.282815,-0.124226,-0.261995,-0.146302,-0.333910,-0.189688,-0.290084,-0.304288,-0.336753,-0.340253,-0.301561,-0.363528,-0.271656,-0.397327,-0.247729,-0.363122,-0.396869,-0.511011,-0.333457,-0.426580,-0.316782,-0.406549,-0.358871,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,1.00
-2,2021-08-06 22:15:00,0.000000,0.127823,0.153387,0.051129,0.089476,0.025565,0.000000,-0.012782,0.115040,-0.115040,-0.089476,-0.153387,-0.115040,-0.115040,-0.063911,-0.115040,-0.025565,-0.025565,-0.153387,-0.076694,0.025565,0.102258,0.268428,0.191734,-0.178952,-0.012782,-0.038347,-0.063911,-0.025565,-0.102258,-0.127823,-0.178952,-0.178952,-0.255646,-0.255646,-0.268428,-0.281210,-0.217299,-0.204516,-0.230081,-0.191734,-0.268428,-0.536856,-0.383468,-0.255646,-0.051129,0.000000,-0.051129,-0.051129,0.000000,0.127823,0.000000,0.038347,0.012782,-0.063911,-0.115040,-0.025565,-0.153387,-0.204516,-0.242863,-0.166170,-0.191734,-0.127823,-0.166170,-0.115040,-0.025565,-0.204516,-0.370686,-0.217299,0.025565,0.063911,0.115040,-0.348766,-0.018596,-0.282815,-0.124226,-0.261995,-0.146302,-0.333910,-0.189688,-0.290084,-0.304288,-0.336753,-0.340253,-0.301561,-0.363528,-0.271656,-0.397327,-0.247729,-0.363122,-0.396869,-0.511011,-0.333457,-0.426580,-0.316782,-0.406549,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,1.00
-2,2021-08-06 22:30:00,0.076825,0.051216,0.179257,0.204866,0.102433,0.140845,0.076825,0.051216,0.038412,0.166453,-0.064020,-0.038412,-0.102433,-0.064020,-0.064020,-0.012804,-0.064020,0.025608,0.025608,-0.102433,-0.025608,0.076825,0.153649,0.320102,0.000000,-0.128041,0.038412,0.012804,-0.012804,0.025608,-0.051216,-0.076825,-0.128041,-0.128041,-0.204866,-0.204866,-0.217670,-0.230474,-0.166453,-0.153649,-0.179257,-0.140845,-0.217670,-0.486556,-0.332907,-0.204866,0.000000,0.051216,0.025608,0.000000,0.051216,0.179257,0.051216,0.089629,0.064020,-0.012804,-0.064020,0.025608,-0.102433,-0.153649,-0.192061,-0.115237,-0.140845,-0.076825,-0.115237,-0.064020,0.025608,-0.153649,-0.320102,-0.166453,0.076825,0.115237,-0.056772,-0.348766,-0.018596,-0.282815,-0.124226,-0.261995,-0.146302,-0.333910,-0.189688,-0.290084,-0.304288,-0.336753,-0.340253,-0.301561,-0.363528,-0.271656,-0.397327,-0.247729,-0.363122,-0.396869,-0.511011,-0.333457,-0.426580,-0.316782,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.75
-2,2021-08-06 22:45:00,0.051173,0.051173,0.025586,0.153518,0.179104,0.076759,0.115139,0.051173,0.025586,0.012793,0.140725,-0.089552,-0.063966,-0.127932,-0.089552,-0.089552,-0.038380,-0.089552,0.000000,0.000000,-0.127932,-0.051173,0.051173,0.127932,-0.102345,-0.025586,-0.153518,0.012793,-0.012793,-0.038380,0.000000,-0.076759,-0.102345,-0.153518,-0.153518,-0.230277,-0.230277,-0.243070,-0.255864,-0.191898,-0.179104,-0.204691,-0.166311,-0.243070,-0.511727,-0.358209,-0.230277,-0.025586,-0.089552,0.000000,-0.025586,0.025586,0.153518,0.025586,0.063966,0.038380,-0.038380,-0.089552,0.000000,-0.127932,-0.179104,-0.217484,-0.140725,-0.166311,-0.102345,-0.140725,-0.089552,0.000000,-0.179104,-0.345416,-0.191898,0.051173,-0.322570,-0.056772,-0.348766,-0.018596,-0.282815,-0.124226,-0.261995,-0.146302,-0.333910,-0.189688,-0.290084,-0.304288,-0.336753,-0.340253,-0.301561,-0.363528,-0.271656,-0.397327,-0.247729,-0.363122,-0.396869,-0.511011,-0.333457,-0.426580,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.00


In [11]:
df['y'].quantile(np.linspace(0.0, 1.0, num=21))

0.00    0.00
0.05    0.00
0.10    0.00
0.15    0.00
0.20    0.00
0.25    0.00
0.30    0.00
0.35    0.25
0.40    0.25
0.45    0.50
0.50    0.50
0.55    0.50
0.60    0.75
0.65    0.75
0.70    1.00
0.75    1.00
0.80    1.00
0.85    1.00
0.90    1.00
0.95    1.00
1.00    1.00
Name: y, dtype: float64

In [12]:
with open(f'../data/{prepath}/full.pkl', 'wb') as fp:
    pickle.dump(df, fp, protocol=4)

In [13]:
with open(f'../data/{prepath}/train.pkl', 'wb') as fp:
    pickle.dump((df[df.index.get_level_values(1) < '2021-01-01']).reset_index(drop=True), fp, protocol=4)

In [14]:
with open(f'../data/{prepath}/test.pkl', 'wb') as fp:
    pickle.dump((df[df.index.get_level_values(1) >= '2021-01-01']).reset_index(drop=True), fp, protocol=4)

In [15]:
df['y'].value_counts()

0.00    233065
1.00    229130
0.25    100488
0.75    100177
0.50     91045
Name: y, dtype: int64