In [1]:
import pandas as pd 
import numpy as np
import public_timeseries_testing_util as optiver2023
from torch.nn.utils.rnn import pack_padded_sequence, pack_sequence, unpack_sequence, unpad_sequence
import torch
from tqdm.notebook import trange,tqdm
import torch.nn as nn 
import torch.optim as optim
import wandb
import torch_classes
import torch_classes_v2
from model_saver import model_saver_wandb as model_saver
import training_testing
from itertools import combinations
import gc
from sklearn.decomposition import PCA
import sys
import lightgbm as lgb
import time

In [2]:
%env "WANDB_NOTEBOOK_NAME" os.path.basename(__file__)

env: "WANDB_NOTEBOOK_NAME"=os.path.basename(__file__)


In [3]:
env = optiver2023.make_env()
iter_test = env.iter_test()

In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc.
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [5]:
train = pd.read_csv('train.csv')
train.head()
train.date_id.value_counts()

date_id
480    11000
353    11000
363    11000
362    11000
360    11000
       ...  
4      10560
2      10505
1      10505
3      10505
0      10505
Name: count, Length: 481, dtype: int64

In [6]:
import importlib

In [7]:
lgbm_columns = ['stock_id', 'seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',
       'far_price', 'near_price', 'bid_price', 'bid_size', 'ask_price',
       'ask_size', 'wap', 'overall_medvol', 'first5min_medvol',
       'last5min_medvol', 'bid_plus_ask_sizes', 'imbalance_ratio', 'imb_s1',
       'imb_s2', 'ask_x_size', 'bid_x_size', 'ask_minus_bid',
       'bid_price_over_ask_price', 'reference_price_minus_far_price',
       'reference_price_times_far_price', 'reference_price_times_near_price',
       'reference_price_minus_ask_price', 'reference_price_times_ask_price',
       'reference_price_ask_price_imb', 'reference_price_minus_bid_price',
       'reference_price_times_bid_price', 'reference_price_bid_price_imb',
       'reference_price_minus_wap', 'reference_price_times_wap',
       'reference_price_wap_imb', 'far_price_minus_near_price',
       'far_price_times_near_price', 'far_price_minus_ask_price',
       'far_price_times_ask_price', 'far_price_minus_bid_price',
       'far_price_times_bid_price', 'far_price_times_wap', 'far_price_wap_imb',
       'near_price_minus_ask_price', 'near_price_times_ask_price',
       'near_price_ask_price_imb', 'near_price_minus_bid_price',
       'near_price_times_bid_price', 'near_price_bid_price_imb',
       'near_price_minus_wap', 'near_price_wap_imb',
       'ask_price_minus_bid_price', 'ask_price_times_bid_price',
       'ask_price_minus_wap', 'ask_price_times_wap', 'ask_price_wap_imb',
       'bid_price_minus_wap', 'bid_price_times_wap', 'bid_price_wap_imb',
       'reference_price_far_price_near_price_imb2',
       'reference_price_far_price_ask_price_imb2',
       'reference_price_far_price_bid_price_imb2',
       'reference_price_far_price_wap_imb2',
       'reference_price_near_price_ask_price_imb2',
       'reference_price_near_price_bid_price_imb2',
       'reference_price_near_price_wap_imb2',
       'reference_price_ask_price_bid_price_imb2',
       'reference_price_ask_price_wap_imb2',
       'reference_price_bid_price_wap_imb2',
       'far_price_near_price_ask_price_imb2',
       'far_price_near_price_bid_price_imb2', 'far_price_near_price_wap_imb2',
       'far_price_ask_price_bid_price_imb2', 'far_price_ask_price_wap_imb2',
       'far_price_bid_price_wap_imb2', 'near_price_ask_price_bid_price_imb2',
       'near_price_ask_price_wap_imb2', 'near_price_bid_price_wap_imb2',
       'ask_price_bid_price_wap_imb2', 'pca_prices']
weights = [
    0.004, 0.001, 0.002, 0.006, 0.004, 0.004, 0.002, 0.006, 0.006, 0.002, 0.002, 0.008,
    0.006, 0.002, 0.008, 0.006, 0.002, 0.006, 0.004, 0.002, 0.004, 0.001, 0.006, 0.004,
    0.002, 0.002, 0.004, 0.002, 0.004, 0.004, 0.001, 0.001, 0.002, 0.002, 0.006, 0.004,
    0.004, 0.004, 0.006, 0.002, 0.002, 0.04 , 0.002, 0.002, 0.004, 0.04 , 0.002, 0.001,
    0.006, 0.004, 0.004, 0.006, 0.001, 0.004, 0.004, 0.002, 0.006, 0.004, 0.006, 0.004,
    0.006, 0.004, 0.002, 0.001, 0.002, 0.004, 0.002, 0.008, 0.004, 0.004, 0.002, 0.004,
    0.006, 0.002, 0.004, 0.004, 0.002, 0.004, 0.004, 0.004, 0.001, 0.002, 0.002, 0.008,
    0.02 , 0.004, 0.006, 0.002, 0.02 , 0.002, 0.002, 0.006, 0.004, 0.002, 0.001, 0.02,
    0.006, 0.001, 0.002, 0.004, 0.001, 0.002, 0.006, 0.006, 0.004, 0.006, 0.001, 0.002,
    0.004, 0.006, 0.006, 0.001, 0.04 , 0.006, 0.002, 0.004, 0.002, 0.002, 0.006, 0.002,
    0.002, 0.004, 0.006, 0.006, 0.002, 0.002, 0.008, 0.006, 0.004, 0.002, 0.006, 0.002,
    0.004, 0.006, 0.002, 0.004, 0.001, 0.004, 0.002, 0.004, 0.008, 0.006, 0.008, 0.002,
    0.004, 0.002, 0.001, 0.004, 0.004, 0.004, 0.006, 0.008, 0.004, 0.001, 0.001, 0.002,
    0.006, 0.004, 0.001, 0.002, 0.006, 0.004, 0.006, 0.008, 0.002, 0.002, 0.004, 0.002,
    0.04 , 0.002, 0.002, 0.004, 0.002, 0.002, 0.006, 0.02 , 0.004, 0.002, 0.006, 0.02,
    0.001, 0.002, 0.006, 0.004, 0.006, 0.004, 0.004, 0.004, 0.004, 0.002, 0.004, 0.04,
    0.002, 0.008, 0.002, 0.004, 0.001, 0.004, 0.006, 0.004,
]

In [8]:
weights_df = pd.DataFrame(data=list(zip(range(0,201),weights)),columns=['stock_id','index_weight'])

In [9]:
train = train.merge(weights_df,on='stock_id')

In [10]:
train['wap_calc'] = (train['bid_price']*train['ask_size']+train['ask_price']*train['bid_size'])/(train['ask_size']+train['bid_size'])

In [11]:
train.columns

Index(['stock_id', 'date_id', 'seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',
       'far_price', 'near_price', 'bid_price', 'bid_size', 'ask_price',
       'ask_size', 'wap', 'target', 'time_id', 'row_id', 'index_weight',
       'wap_calc'],
      dtype='object')

In [12]:
def generate_prev_race(df_in, df_g, rolling_window=10, factor=''):
    df = df_in.copy()
    original_cols = df_in.columns
    df[f'wap_t-60'] = df_g['wap'].shift(6)
    df[f'target_t-60'] = df_g['target'].shift(6)
    df[f'initial_wap'] = df_g['wap_calc'].transform('first')
    df[f'initial_bid_size'] = df_g['bid_size'].transform('first')
    df[f'initial_ask_size'] = df_g['ask_size'].transform('first')
    cols = ['bid_price','ask_price','bid_size','ask_size','wap']
    for i in cols:
        df[f'{i}_t-60'] = df_g[i].shift(-1)

    for i in cols:
        df[f'{i}_t10'] = df_g[i].shift(1)

    return(df)

In [13]:
def generate_index(df_in, df_g, rolling_window=10, factor=''):
    df = df_in.copy()
    df[f'index_wap'] = df_g['wap_weighted'].transform('mean')
    return(df)

def generate_index_2(df_in, df_g, rolling_window=10, factor=''):
    df = df_in.copy()
    df[f'index_wap_t-60'] = df_g['index_wap'].shift(6)
    df[f'index_wap_init'] = df_g['index_wap'].transform('first')
    return(df)

def generate_index_3(df_in, df_g, rolling_window=10, factor=''):
    df = df_in.copy()
    df[f'index_wap_t-60'] = df_g['index_wap_move_to_init'].shift(6)
    return(df)

In [14]:
train['wap_weighted'] = train['wap']*train['index_weight']
train_g = train.groupby(['stock_id','date_id'])
train = generate_prev_race(train,train_g)
train['delta_wap'] = train['wap']/train['wap_t-60']

train_g = train.groupby(['seconds_in_bucket','date_id'])
train = generate_index(train,train_g)


train['wap_move_to_init'] = train['wap_calc']/train['initial_wap']
train_g = train.groupby(['date_id'])
train = generate_index_2(train,train_g)

train['index_wap_move_to_init'] = train['index_wap']/train['index_wap_init']
train_g = train.groupby(['date_id'])
train = generate_index_3(train,train_g)

In [15]:
train_g = train.groupby(['stock_id','date_id'])
train = generate_prev_race(train,train_g)
train['delta_wap'] = train['wap']/train['wap_t-60']

In [16]:
train_g = train.groupby(['seconds_in_bucket','date_id'])
train = generate_index(train,train_g)
train['delta_wap'] = (train['wap']/train['wap_t-60'])

In [17]:
train_g = train.groupby(['date_id'])
train = generate_index_2(train,train_g)
train = generate_index_3(train,train_g)
train['delta_wap'] = (train['wap']/train['wap_t-60'])

In [18]:
train.columns

Index(['stock_id', 'date_id', 'seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',
       'far_price', 'near_price', 'bid_price', 'bid_size', 'ask_price',
       'ask_size', 'wap', 'target', 'time_id', 'row_id', 'index_weight',
       'wap_calc', 'wap_weighted', 'wap_t-60', 'target_t-60', 'initial_wap',
       'initial_bid_size', 'initial_ask_size', 'bid_price_t-60',
       'ask_price_t-60', 'bid_size_t-60', 'ask_size_t-60', 'bid_price_t10',
       'ask_price_t10', 'bid_size_t10', 'ask_size_t10', 'wap_t10', 'delta_wap',
       'index_wap', 'wap_move_to_init', 'index_wap_t-60', 'index_wap_init',
       'index_wap_move_to_init'],
      dtype='object')

In [19]:
train['target_calc'] = -((train['wap_t-60']/train['wap'])-(train['index_wap_t-60']/train['index_wap_move_to_init']))*10000
train['target_delta'] = train['target_t-60']-train['target_calc']

In [20]:
train_stock_0 = train[train['stock_id']==0].dropna(subset='bid_size_t-60').copy()
train_stock_0.head(20)

Unnamed: 0,stock_id,date_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,...,ask_size_t10,wap_t10,delta_wap,index_wap,wap_move_to_init,index_wap_t-60,index_wap_init,index_wap_move_to_init,target_calc,target_delta
0,0,0,0,3180602.69,1,0.999812,13380276.64,,,0.999812,...,,,1.000108,0.005031,1.0,,0.005031,1.0,,
1,0,0,10,1299772.7,1,1.000026,15261106.63,,,0.999812,...,8493.03,1.0,1.00005,0.005033,0.999892,,0.005031,1.000356,,
2,0,0,20,1299772.7,1,0.999919,15261106.63,,,0.999812,...,23519.16,0.999892,0.999757,0.005034,0.999842,,0.005031,1.000525,,
3,0,0,30,1299772.7,1,1.000133,15261106.63,,,1.000026,...,12131.6,0.999842,0.999768,0.005034,1.000085,,0.005031,1.000547,,
4,0,0,40,1218204.43,1,1.000455,15342674.9,,,1.000241,...,46203.3,1.000085,0.999883,0.005035,1.000317,,0.005031,1.000635,,
5,0,0,50,1218204.43,1,1.000455,15342674.9,,,1.000348,...,26610.45,1.000317,0.999917,0.005035,1.000435,,0.005031,1.000668,,
6,0,0,60,1218204.43,1,1.000562,15342674.9,,,1.000455,...,9897.22,1.000434,1.000096,0.005036,1.000517,1.0,0.005031,1.000815,-7.187475,4.157771
7,0,0,70,1264494.89,1,1.000455,15352380.96,,,1.000348,...,10085.04,1.000517,1.000273,0.005036,1.000422,1.000356,0.005031,1.000843,-2.138261,2.528076
8,0,0,80,1189832.86,1,1.000241,15427043.0,,,1.000133,...,17366.82,1.000421,0.999722,0.005033,1.000148,1.000525,0.005031,1.000409,-1.622903,5.842912
9,0,0,90,1189272.89,1,1.000562,15427602.97,,,1.000348,...,61984.4,1.000148,1.000165,0.005033,1.000427,1.000547,0.005031,1.000345,3.671524,1.778725


In [21]:
train[['seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',  
       'far_price', 'near_price', 'bid_price', 'bid_size', 'ask_price', 'ask_size', 
        'wap', 'index_weight','wap_calc','initial_wap','wap_weighted', 'index_wap', 'index_wap_init', 'index_wap_move_to_init']].isna().sum()

seconds_in_bucket                0
imbalance_size                 220
imbalance_buy_sell_flag          0
reference_price                220
matched_size                   220
far_price                  2894342
near_price                 2857180
bid_price                      220
bid_size                         0
ask_price                      220
ask_size                         0
wap                            220
index_weight                     0
wap_calc                       220
initial_wap                    220
wap_weighted                   220
index_wap                        0
index_wap_init                   0
index_wap_move_to_init           0
dtype: int64

In [22]:
# train_stock_0.to_csv('train_with_new_vars_0stock.csv')

In [23]:
median_vol = pd.read_csv("archive/MedianVolV2.csv")
median_vol.index.name = "stock_id"
median_vol = median_vol[['overall_medvol', "first5min_medvol", "last5min_medvol"]]
median_sizes = train.groupby('stock_id')['bid_size'].median() + train.groupby('stock_id')['ask_size'].median()
std_sizes = train.groupby('stock_id')['bid_size'].median() + train.groupby('stock_id')['ask_size'].median()

In [24]:
train['bid_price_target'] = train['bid_price']-train['bid_price_t-60']
train['bid_price_t-60'] = train['bid_price_target']*10_000

In [25]:
train['wap_target'] = train['wap']-train['wap_t-60']
train['wap_price_t-60'] = train['wap_target']*10_000

In [26]:
train['ask_price_target'] = train['ask_price']-train['ask_price_t-60']
train['ask_price_t-60'] = train['ask_price_target']*10_000

In [27]:
train[['bid_price_t-60','bid_price']].head(10)

Unnamed: 0,bid_price_t-60,bid_price
0,0.0,0.999812
1,0.0,0.999812
2,-2.14,0.999812
3,-2.15,1.000026
4,-1.07,1.000241
5,-1.07,1.000348
6,1.07,1.000455
7,2.15,1.000348
8,-2.15,1.000133
9,1.07,1.000348


In [28]:
targets = ['wap', 'bid_price', 'ask_price']
for i in targets:
    train[f'{i}_prev_move'] = (train[f'{i}']-train[f'{i}_t10']).fillna(0)*10000

In [29]:
def feat_eng(df):
    
    cols = [c for c in df.columns if c not in ['row_id', 'time_id']]
    df = df[cols]
    df = df.merge(median_vol, how = "left", left_on = "stock_id", right_index = True)
    
    df['bid_plus_ask_sizes'] = df['bid_size'] + train['ask_size']
#     df['median_size'] = df['stock_id'].map(median_sizes.to_dict())
    df['std_size'] = df['stock_id'].map(std_sizes.to_dict())
#     df['high_volume'] = np.where(df['bid_plus_ask_sizes'] > df['median_size'], 1, 0) 
    df['imbalance_ratio'] = df['imbalance_size'] / df['matched_size']
    
    df['imb_s1'] = df.eval('(bid_size-ask_size)/(bid_size+ask_size)')
    df['imb_s2'] = df.eval('(imbalance_size-matched_size)/(matched_size+imbalance_size)')

    df['ask_x_size'] = df.eval('ask_size*ask_price')
    df['bid_x_size'] = df.eval('bid_size*bid_price')
        
    df['ask_minus_bid'] = df['ask_x_size'] - df['bid_x_size'] 
    
    df["bid_size_over_ask_size"] = df["bid_size"].div(df["ask_size"])
    df["bid_price_over_ask_price"] = df["bid_price"].div(df["ask_price"])
    
    prices = ['reference_price','far_price', 'near_price', 'ask_price', 'bid_price', 'wap']
    
    for c in combinations(prices, 2):
        
        df[f'{c[0]}_minus_{c[1]}'] = (df[f'{c[0]}'] - df[f'{c[1]}']).astype(np.float32)
        df[f'{c[0]}_times_{c[1]}'] = (df[f'{c[0]}'] * df[f'{c[1]}']).astype(np.float32)
        df[f'{c[0]}_{c[1]}_imb'] = df.eval(f'({c[0]}-{c[1]})/({c[0]}+{c[1]})')

    for c in combinations(prices, 3):
        
        max_ = df[list(c)].max(axis=1)
        min_ = df[list(c)].min(axis=1)
        mid_ = df[list(c)].sum(axis=1)-min_-max_

        df[f'{c[0]}_{c[1]}_{c[2]}_imb2'] = (max_-mid_)/(mid_-min_)
    
        
    df.drop(columns=[
        # 'date_id', 
        'reference_price_far_price_imb',
        'reference_price_minus_near_price',
        'reference_price_near_price_imb',
        'far_price_near_price_imb',
        'far_price_ask_price_imb',
        'far_price_bid_price_imb',
        'far_price_minus_wap',
        'std_size',
        'bid_size_over_ask_size',
        'ask_price_bid_price_imb',
        'near_price_times_wap'
    ], inplace=True)
        
    # gc.collect()

    df.replace([np.inf, -np.inf], 0, inplace=True)
    
    return df

In [30]:
y = train['target'].values
X = feat_eng(train)
prices = [c for c in X.columns if ('price' in c) and ('target' not in c) and ('60' not in c)]
print(prices)

['reference_price', 'far_price', 'near_price', 'bid_price', 'ask_price', 'bid_price_t10', 'ask_price_t10', 'bid_price_prev_move', 'ask_price_prev_move', 'bid_price_over_ask_price', 'reference_price_minus_far_price', 'reference_price_times_far_price', 'reference_price_times_near_price', 'reference_price_minus_ask_price', 'reference_price_times_ask_price', 'reference_price_ask_price_imb', 'reference_price_minus_bid_price', 'reference_price_times_bid_price', 'reference_price_bid_price_imb', 'reference_price_minus_wap', 'reference_price_times_wap', 'reference_price_wap_imb', 'far_price_minus_near_price', 'far_price_times_near_price', 'far_price_minus_ask_price', 'far_price_times_ask_price', 'far_price_minus_bid_price', 'far_price_times_bid_price', 'far_price_times_wap', 'far_price_wap_imb', 'near_price_minus_ask_price', 'near_price_times_ask_price', 'near_price_ask_price_imb', 'near_price_minus_bid_price', 'near_price_times_bid_price', 'near_price_bid_price_imb', 'near_price_minus_wap', 'n

In [31]:
n_comp = 10
pca_prices = PCA(n_components=n_comp,copy=False)
pca_prices_items = pca_prices.fit_transform(X[prices].fillna(1))
print('done')

done


In [32]:
# lgbm = lgb.Booster(model_file='lgbm_model.lgb')
# lgbm_preds = lgbm.predict(X[lgbm_columns])
# X['lgbm_preds'] = lgbm_preds

# del pca_prices

In [33]:
pca = pd.DataFrame(pca_prices_items,columns=[f'pca_{x}' for x in range(0,n_comp)])
pca.columns

Index(['pca_0', 'pca_1', 'pca_2', 'pca_3', 'pca_4', 'pca_5', 'pca_6', 'pca_7',
       'pca_8', 'pca_9'],
      dtype='object')

In [34]:
X = X.join(pca)
X = X.dropna(subset='wap_t-60').reset_index()

In [38]:
nans = X.isna().sum()

In [36]:
sys.getsizeof(X) / (1024 ** 3)

4.2721025086939335

In [35]:
# gc.collect()
# sys.getsizeof(train) / (1024 ** 3)

In [41]:
for x in X.columns:
    print(x)

index
stock_id
date_id
seconds_in_bucket
imbalance_size
imbalance_buy_sell_flag
reference_price
matched_size
far_price
near_price
bid_price
bid_size
ask_price
ask_size
wap
target
index_weight
wap_calc
wap_weighted
wap_t-60
target_t-60
initial_wap
initial_bid_size
initial_ask_size
bid_price_t-60
ask_price_t-60
bid_size_t-60
ask_size_t-60
bid_price_t10
ask_price_t10
bid_size_t10
ask_size_t10
wap_t10
delta_wap
index_wap
wap_move_to_init
index_wap_t-60
index_wap_init
index_wap_move_to_init
target_calc
target_delta
bid_price_target
wap_target
wap_price_t-60
ask_price_target
wap_prev_move
bid_price_prev_move
ask_price_prev_move
overall_medvol
first5min_medvol
last5min_medvol
bid_plus_ask_sizes
imbalance_ratio
imb_s1
imb_s2
ask_x_size
bid_x_size
ask_minus_bid
bid_price_over_ask_price
reference_price_minus_far_price
reference_price_times_far_price
reference_price_times_near_price
reference_price_minus_ask_price
reference_price_times_ask_price
reference_price_ask_price_imb
reference_price_minus

In [36]:
del train
# stop

In [42]:
stat_cols = ['seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',  
    #    'far_price', 'near_price', 


       'bid_price', 'bid_size', 'ask_price', 'ask_size', 
        'wap', 'index_weight','wap_calc','initial_wap','wap_weighted', 'index_wap', 'index_wap_init', 'index_wap_move_to_init',

        'wap_prev_move','bid_price_prev_move','ask_price_prev_move',  
        
         'overall_medvol', 'first5min_medvol',
       'last5min_medvol', 'bid_plus_ask_sizes', 'imbalance_ratio', 'imb_s1',
       'imb_s2', 'ask_x_size', 'bid_x_size', 'ask_minus_bid',
       'bid_price_over_ask_price', 'reference_price_minus_far_price',
       'reference_price_times_far_price', 'reference_price_times_near_price',
       'reference_price_minus_ask_price', 'reference_price_times_ask_price',
       'reference_price_ask_price_imb', 'reference_price_minus_bid_price',
       'reference_price_times_bid_price', 'reference_price_bid_price_imb',
       'reference_price_minus_wap', 'reference_price_times_wap',
       'reference_price_wap_imb', 'far_price_minus_near_price',
       'far_price_times_near_price', 'far_price_minus_ask_price',
       'far_price_times_ask_price', 'far_price_minus_bid_price',
       'far_price_times_bid_price', 'far_price_times_wap', 'far_price_wap_imb',
       'near_price_minus_ask_price', 'near_price_times_ask_price',
       'near_price_ask_price_imb', 'near_price_minus_bid_price',
       'near_price_times_bid_price', 'near_price_bid_price_imb',
       'near_price_minus_wap', 'near_price_wap_imb',
       'ask_price_minus_bid_price', 'ask_price_times_bid_price',
       'ask_price_minus_wap', 'ask_price_times_wap', 'ask_price_wap_imb',
       'bid_price_minus_wap', 'bid_price_times_wap', 'bid_price_wap_imb',
       'reference_price_far_price_near_price_imb2',
       'reference_price_far_price_ask_price_imb2',
       'reference_price_far_price_bid_price_imb2',
       'reference_price_far_price_wap_imb2',
       'reference_price_near_price_ask_price_imb2',
       'reference_price_near_price_bid_price_imb2',
       'reference_price_near_price_wap_imb2',
       'reference_price_ask_price_bid_price_imb2',
       'reference_price_ask_price_wap_imb2',
       'reference_price_bid_price_wap_imb2',
       'far_price_near_price_ask_price_imb2',
       'far_price_near_price_bid_price_imb2', 'far_price_near_price_wap_imb2',
       'far_price_ask_price_bid_price_imb2', 'far_price_ask_price_wap_imb2',
       'far_price_bid_price_wap_imb2', 'near_price_ask_price_bid_price_imb2',
       'near_price_ask_price_wap_imb2', 'near_price_bid_price_wap_imb2',
       'ask_price_bid_price_wap_imb2',
       'pca_0', 'pca_1', 'pca_2', 'pca_3', 'pca_4', 'pca_5', 
       'pca_6', 'pca_7',
      'pca_8', 'pca_9']

stat_cols_to_drop = ['seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',  
       'bid_price',  'ask_price', 
       'index_weight','wap_calc','initial_wap','wap_weighted', 'index_wap', 'index_wap_init', 'index_wap_move_to_init',
        
        'overall_medvol', 'first5min_medvol',
       'last5min_medvol', 'bid_plus_ask_sizes', 'imbalance_ratio', 'imb_s1',
       'imb_s2', 'ask_x_size', 'bid_x_size', 'ask_minus_bid',
       'bid_price_over_ask_price', 'reference_price_minus_far_price',
       'reference_price_times_far_price', 'reference_price_times_near_price',
       'reference_price_minus_ask_price', 'reference_price_times_ask_price',
       'reference_price_ask_price_imb', 'reference_price_minus_bid_price',
       'reference_price_times_bid_price', 'reference_price_bid_price_imb',
       'reference_price_minus_wap', 'reference_price_times_wap',
       'reference_price_wap_imb', 'far_price_minus_near_price',
       'far_price_times_near_price', 'far_price_minus_ask_price',
       'far_price_times_ask_price', 'far_price_minus_bid_price',
       'far_price_times_bid_price', 'far_price_times_wap', 'far_price_wap_imb',
       'near_price_minus_ask_price', 'near_price_times_ask_price',
       'near_price_ask_price_imb', 'near_price_minus_bid_price',
       'near_price_times_bid_price', 'near_price_bid_price_imb',
       'near_price_minus_wap', 'near_price_wap_imb',
       'ask_price_minus_bid_price', 'ask_price_times_bid_price',
       'ask_price_minus_wap', 'ask_price_times_wap', 'ask_price_wap_imb',
       'bid_price_minus_wap', 'bid_price_times_wap', 'bid_price_wap_imb',
       'reference_price_far_price_near_price_imb2',
       'reference_price_far_price_ask_price_imb2',
       'reference_price_far_price_bid_price_imb2',
       'reference_price_far_price_wap_imb2',
       'reference_price_near_price_ask_price_imb2',
       'reference_price_near_price_bid_price_imb2',
       'reference_price_near_price_wap_imb2',
       'reference_price_ask_price_bid_price_imb2',
       'reference_price_ask_price_wap_imb2',
       'reference_price_bid_price_wap_imb2',
       'far_price_near_price_ask_price_imb2',
       'far_price_near_price_bid_price_imb2', 'far_price_near_price_wap_imb2',
       'far_price_ask_price_bid_price_imb2', 'far_price_ask_price_wap_imb2',
       'far_price_bid_price_wap_imb2', 'near_price_ask_price_bid_price_imb2',
       'near_price_ask_price_wap_imb2', 'near_price_bid_price_wap_imb2',
       'ask_price_bid_price_wap_imb2',
       'pca_0', 'pca_1', 'pca_2', 'pca_3', 'pca_4', 'pca_5', 
       'pca_6', 'pca_7',
      'pca_8', 'pca_9']

In [43]:
X['stats'] = np.split(np.nan_to_num(X[stat_cols].to_numpy(),nan=-1),indices_or_sections=len(X))

In [44]:
X.drop(columns=stat_cols_to_drop,inplace=True)

In [45]:
sys.getsizeof(X) / (1024 ** 3)

1.9540559388697147

In [46]:
importlib.reload(torch_classes)
trading_data = torch_classes.TradingData(X)
hidden_size = 64
# trading_data.generate_batches()

100%|██████████| 200/200 [04:13<00:00,  1.27s/it]


In [47]:
trading_data.generate_batches()

Length of train: 385, Length of test 96


100%|██████████| 385/385 [00:00<00:00, 3729.63it/s]
100%|██████████| 95/95 [00:00<00:00, 1534.98it/s]


In [48]:
for i,stocks in enumerate(trading_data.stocksDict.values()):
    if i==0:
        continue
    else:
        stocks.data_daily = []
trading_data.train_batches = []
# del train
del X
X = []
torch.cuda.empty_cache()
# gc.collect()
# del pca_prices

In [49]:
del pca, pca_prices_items 

In [50]:
importlib.reload(torch_classes)
importlib.reload(training_testing)

<module 'training_testing' from 'c:\\Users\\Nick\\Documents\\GitHub\\OptiverKaggle\\training_testing.py'>

In [51]:
optim_dict = {
    'RMSProp':optim.RMSprop,
    "Adam":optim.Adam,
    "AdamW":optim.AdamW,
    'SGD':optim.SGD,
    
}

In [52]:
def model_pipeline(trading_df=trading_data, config=None):
    trading_df = trading_data
    with wandb.init(project="Optviver", config=config,save_code=True):
        wandb.define_metric("val_epoch_loss_l1", summary="min")
        wandb.define_metric("epoch_l1_loss", summary="min")
        config = wandb.config

        input_size = len(trading_df.stocksDict[0].data_daily[0][0])
        
        model = torch_classes.GRUNetV2(input_size,config['hidden_size'],num_layers=config['num_layers']).to('cuda:0')
        config = wandb.config
        optimizer = optim.Adagrad(model.parameters(), lr=config['learning_rate'])
        # optimizer = optim.RMSprop(model.parameters(), lr=config['learning_rate'])
        # optimizer = optim.Adadelta(model.parameters())
        print(model)
        trading_df.reset_hidden(hidden_size=config['hidden_size'],num_layers=config['num_layers'])
        criterion = nn.L1Loss()
        training_testing.train_model(trading_df,model,config,optimizer,criterion)
        # try:.
        #     training_testing.train_model(trading_df,model,config,optimizer,criterion)
        # except Exception as e:
        #     print(e)
    return(model)



In [57]:
importlib.reload(torch_classes)
importlib.reload(training_testing)

<module 'training_testing' from 'c:\\Users\\Nick\\Documents\\GitHub\\OptiverKaggle\\training_testing.py'>

In [54]:
trading_data.reset_hidden(16,num_layers=5)

In [58]:
config_static = {'learning_rate':0.0001, 'hidden_size':256, 'num_layers':2, 'batch_norm':1,  'epochs':500}
config = config_static

In [59]:
model = model_pipeline(trading_data, config_static)

GRUNetV2(
  (gru): GRU(256, 256, num_layers=2, dropout=0.3)
  (relu): ReLU()
  (batch_norm): BatchNorm1d(97, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  (drop): Dropout(p=0.5, inplace=False)
  (fc0): Linear(in_features=97, out_features=256, bias=True)
  (fc1): Linear(in_features=256, out_features=256, bias=True)
  (fc_ask_price): Linear(in_features=256, out_features=1, bias=True)
  (fc_bid_price): Linear(in_features=256, out_features=1, bias=True)
  (fc_wap_price): Linear(in_features=256, out_features=1, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=8, bias=True)
)


  0%|          | 0/500 [00:00<?, ?it/s]

created path


Traceback (most recent call last):
  File "C:\Users\Nick\AppData\Local\Temp\ipykernel_23220\957133994.py", line 18, in model_pipeline
    training_testing.train_model(trading_df,model,config,optimizer,criterion)
  File "c:\Users\Nick\Documents\GitHub\OptiverKaggle\training_testing.py", line 71, in train_model
    wandb.log({"loss_ask": torch.mean(loss_ask).item(), })
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\wandb\sdk\wandb_run.py", line 419, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\wandb\sdk\wandb_run.py", line 370, in wrapper_fn
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\wandb\sdk\wandb_run.py", line 360, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\wandb\s

0,1
L1_loss_ask_epoch,█▇▆▆▅▅▄▄▃▃▂▂▁▁
L1_loss_bid_epoch,▁▂▃▄▅▅▆▆▇▇▇▇██
L1_loss_wap_epoch,█▇▅▄▂▂▁▂▂▂▂▂▁▁
epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
epoch_l1_loss,█▇▆▅▅▄▄▃▃▂▂▁▁▁
epoch_loss,█▇▆▅▅▄▄▃▃▂▂▁▁▁
loss_1,█▅▆▇▆▆▅▃▅▅▃▄▁▆
loss_ask,▂█▂▂█▂▂▁▁▃▁▁▃▂▂█▁▂█▁▃▁▁▃▁▁█▁▂█▁▂█▁▃▁▁▃▁▂
output_sd,▅▃█▃▅▅▄▄▅▆▅▃▁▂
relu_sum,█▇▇▆▅▅▄▄▄▃▂▂▁▁

0,1
L1_loss_ask_epoch,2.32894
L1_loss_bid_epoch,2.27896
L1_loss_wap_epoch,2.60557
epoch,13.0
epoch_loss,9.10157
loss_1,7.92916
loss_ask,2.90038
output_sd,0.49521
relu_sum,1120418.875
val_epoch_loss,7.21347


KeyboardInterrupt: 

In [None]:
sweep_config = {"method": "random"}

metric = {"name": "val_epoch_loss", "goal": "minimize"}

sweep_config["metric"] = metric


parameters_dict = {
    "optimizer": {"values": ["adamW", 'adam', 'SGD', 'RMSprop']},
    "f0_layer_size": {"values": [128]},
    "f1_layer_size": {"values": [64]},
    "num_layers": {"values": [2]},
    'hidden_size':{'values':[128,256,512]},
    'learning_rate': {'values':[0.001,0.0005,0.0001,0.00005,0.00001]},
    'epochs':{'value':500}
    # 'batch_norm':{'values':[0,1,2]}
}

sweep_config["parameters"] = parameters_dict

sweep_id = wandb.sweep(sweep_config, project="Optiver Sweeps")
# CUDA_LAUNCH_BLOCKING=1
wandb.agent(sweep_id, function=model_pipeline, count=100)

Create sweep with ID: efecmkf8
Sweep URL: https://wandb.ai/nickojelly/Optiver%20Sweeps/sweeps/efecmkf8


[34m[1mwandb[0m: Agent Starting Run: 5c7g930t with config:
[34m[1mwandb[0m: 	epochs: 500
[34m[1mwandb[0m: 	f0_layer_size: 128
[34m[1mwandb[0m: 	f1_layer_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GRUNetV2(
  (gru): GRU(85, 256, num_layers=2, dropout=0.3)
  (relu): ReLU()
  (batch_norm): BatchNorm1d(85, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  (drop): Dropout(p=0.5, inplace=False)
  (relu0): ReLU()
  (fc0): Linear(in_features=256, out_features=256, bias=True)
  (fc_ask_price): Linear(in_features=256, out_features=1, bias=True)
  (fc_bid_price): Linear(in_features=256, out_features=1, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=8, bias=True)
)


  0%|          | 0/500 [00:00<?, ?it/s]

time_periods=49
len_val=95
9800
9800
9800
9800
9751
9751
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9751
9751
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
9800
created path
time_periods=49
len_

In [None]:
stocks_hidden,targets = trading_data.fetch_daily_data(day=4)

In [None]:
len(targets)

In [None]:
stocks_hidden,targets = trading_data.fetch_daily_data(day=1)

stocks_hidden = [torch.stack(x) for x in stocks_hidden]

X = torch.cat(stocks_hidden,dim=-1)

In [None]:
stocks_hidden[0]

In [None]:
stock_hidden = []
stock_targets = []

for i in range(0,200):
    # print(i)
    
    try:
        stock_lgbm = trading_data.stocksDict[i].lgbm_pred_daily[1]
        stock_targets.append(torch.stack(trading_data.stocksDict[i].target_daily[1]))
    except KeyError as e:
        stock_targets.append(torch.zeros(55,device='cuda:0'))
        

    stock_hidden.append([torch.cat((x,y.reshape(1)),0) for x,y in zip(trading_data.stocksDict[i].hidden_all,stock_lgbm)])

In [None]:
stocks_hidden_og = stocks_hidden
X1 = torch.cat(stocks_hidden_og,dim=-1)

In [None]:
stocks_hidden_og?

In [None]:
X1?

In [None]:
X1.shape

In [None]:
stocks_hidden_og[0] == stocks_hidden[0]

In [None]:
stock_hidden = []
stock_targets = []

for i in range(0,200):
    # print(i)
    
    try:
        stock_lgbm = trading_data.stocksDict[i].lgbm_pred_daily[1]
        stock_targets.append(torch.stack(trading_data.stocksDict[i].target_daily[1]))
    except KeyError as e:
        stock_targets.append(torch.zeros(55,device='cuda:0'))
        

    stock_hidden.append([torch.cat((x,y.reshape(1)),0) for x,y in zip(trading_data.stocksDict[i].hidden_all,stock_lgbm)])



In [None]:
stock_hidden[0]

In [None]:
stock_hidden = []
stock_targets = []

for i in range(0,200):
    # print(i)
    
    try:
        stock_lgbm = torch.stack(trading_data.stocksDict[i].lgbm_pred_daily[1]).reshape(-1,1)
        stock_targets.append(torch.stack(trading_data.stocksDict[i].target_daily[1]))
    except KeyError as e:
        stock_targets.append(torch.zeros(55,device='cuda:0'))
        
    # print(stock_lgbm.shape)
    # print(trading_data.stocksDict[i].hidden_all.shape)
    stock_hidden.append([torch.cat((trading_data.stocksDict[i].hidden_all,stock_lgbm),dim=1)])

In [None]:
stocks_hidden_og = stocks_hidden
X1 = torch.cat(stocks_hidden_og,dim=-1)

In [None]:
X1?

In [None]:
stock_lgbm = torch.stack(trading_data.stocksDict[1].lgbm_pred_daily[1])
stock_lgbm.shape

In [None]:
hidden = trading_data.stocksDict[1].hidden_all
hidden.shape

In [None]:
torch.cat([hidden,stock_lgbm],dim=1)

In [None]:
len(stock_targets)

In [None]:
stocks_hidden = [torch.stack(x) for x in stocks_hidden]
X = torch.cat(stocks_hidden,dim=-1)
Y = torch.stack(targets).transpose(0,1).to('cuda:0')

In [None]:
output, relu = model(X)

In [None]:
output.shape

In [None]:
Y.shape

In [None]:
sweep_config = {"method": "random"}

metric = {"name": "val_epoch_loss", "goal": "minimize"}

sweep_config["metric"] = metric


parameters_dict = {
    "optimizer": {"values": ["adamW", 'adam', 'SGD', 'RMSprop']},
    "f0_layer_size": {"values": [128]},
    "f1_layer_size": {"values": [64]},
    "num_layers": {"values": [2,3,4,5]},
    'hidden_size':{'values':[8,16,32,64,128,256,512,1024]},
    'learning_rate': {'max': 0.001, 'min': 0.00001},
    'batch_norm':{'values':[0,1,2]}
}

sweep_config["parameters"] = parameters_dict

In [None]:
sweep_id = wandb.sweep(sweep_config, project="Optiver Sweeps")
# CUDA_LAUNCH_BLOCKING=1
wandb.agent(sweep_id, function=model_pipeline, count=100)