In [34]:
import os

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
import pandas as pd
import numpy as np
import utils.public_timeseries_testing_util as optiver2023
from torch.nn.utils.rnn import (
    pack_padded_sequence,
    pack_sequence,
    unpack_sequence,
    unpad_sequence,
)
import torch
from tqdm.notebook import trange, tqdm
import torch.nn as nn
import torch.optim as optim
import wandb
import utils.torch_classes
from utils.model_saver import model_saver_wandb as model_saver
import utils.training_testing
from itertools import combinations
import gc
from sklearn.decomposition import PCA
import sys
import lightgbm as lgb
import time
from sklearn.preprocessing import OneHotEncoder
import utils.training_testing_double
from utils.conts import *
import importlib

In [35]:
%env "WANDB_NOTEBOOK_NAME" os.path.basename(__file__)

env: "WANDB_NOTEBOOK_NAME"=os.path.basename(__file__)


In [36]:
env = optiver2023.make_env()
iter_test = env.iter_test()

In [37]:
torch.__version__

'2.1.1'

In [38]:
if torch.cuda.is_available():
    device = torch.device(
        "cuda:0"
    )  # you can continue going on here, like cuda:1 cuda:2....etc.
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [39]:
train = pd.read_csv("data/train.csv")
train.head()
train.date_id.value_counts()

date_id
480    11000
353    11000
363    11000
362    11000
360    11000
       ...  
4      10560
2      10505
1      10505
3      10505
0      10505
Name: count, Length: 481, dtype: int64

In [40]:
lgbm_columns = [
    "stock_id",
    "seconds_in_bucket",
    "imbalance_size",
    "imbalance_buy_sell_flag",
    "reference_price",
    "matched_size",
    "far_price",
    "near_price",
    "bid_price",
    "bid_size",
    "ask_price",
    "ask_size",
    "wap",
    "overall_medvol",
    "first5min_medvol",
    "last5min_medvol",
    "bid_plus_ask_sizes",
    "imbalance_ratio",
    "imb_s1",
    "imb_s2",
    "ask_x_size",
    "bid_x_size",
    "ask_minus_bid",
    "bid_price_over_ask_price",
    "reference_price_minus_far_price",
    "reference_price_times_far_price",
    "reference_price_times_near_price",
    "reference_price_minus_ask_price",
    "reference_price_times_ask_price",
    "reference_price_ask_price_imb",
    "reference_price_minus_bid_price",
    "reference_price_times_bid_price",
    "reference_price_bid_price_imb",
    "reference_price_minus_wap",
    "reference_price_times_wap",
    "reference_price_wap_imb",
    "far_price_minus_near_price",
    "far_price_times_near_price",
    "far_price_minus_ask_price",
    "far_price_times_ask_price",
    "far_price_minus_bid_price",
    "far_price_times_bid_price",
    "far_price_times_wap",
    "far_price_wap_imb",
    "near_price_minus_ask_price",
    "near_price_times_ask_price",
    "near_price_ask_price_imb",
    "near_price_minus_bid_price",
    "near_price_times_bid_price",
    "near_price_bid_price_imb",
    "near_price_minus_wap",
    "near_price_wap_imb",
    "ask_price_minus_bid_price",
    "ask_price_times_bid_price",
    "ask_price_minus_wap",
    "ask_price_times_wap",
    "ask_price_wap_imb",
    "bid_price_minus_wap",
    "bid_price_times_wap",
    "bid_price_wap_imb",
    "reference_price_far_price_near_price_imb2",
    "reference_price_far_price_ask_price_imb2",
    "reference_price_far_price_bid_price_imb2",
    "reference_price_far_price_wap_imb2",
    "reference_price_near_price_ask_price_imb2",
    "reference_price_near_price_bid_price_imb2",
    "reference_price_near_price_wap_imb2",
    "reference_price_ask_price_bid_price_imb2",
    "reference_price_ask_price_wap_imb2",
    "reference_price_bid_price_wap_imb2",
    "far_price_near_price_ask_price_imb2",
    "far_price_near_price_bid_price_imb2",
    "far_price_near_price_wap_imb2",
    "far_price_ask_price_bid_price_imb2",
    "far_price_ask_price_wap_imb2",
    "far_price_bid_price_wap_imb2",
    "near_price_ask_price_bid_price_imb2",
    "near_price_ask_price_wap_imb2",
    "near_price_bid_price_wap_imb2",
    "ask_price_bid_price_wap_imb2",
    "pca_prices",
]

weights = [
    0.004,
    0.001,
    0.002,
    0.006,
    0.004,
    0.004,
    0.002,
    0.006,
    0.006,
    0.002,
    0.002,
    0.008,
    0.006,
    0.002,
    0.008,
    0.006,
    0.002,
    0.006,
    0.004,
    0.002,
    0.004,
    0.001,
    0.006,
    0.004,
    0.002,
    0.002,
    0.004,
    0.002,
    0.004,
    0.004,
    0.001,
    0.001,
    0.002,
    0.002,
    0.006,
    0.004,
    0.004,
    0.004,
    0.006,
    0.002,
    0.002,
    0.04,
    0.002,
    0.002,
    0.004,
    0.04,
    0.002,
    0.001,
    0.006,
    0.004,
    0.004,
    0.006,
    0.001,
    0.004,
    0.004,
    0.002,
    0.006,
    0.004,
    0.006,
    0.004,
    0.006,
    0.004,
    0.002,
    0.001,
    0.002,
    0.004,
    0.002,
    0.008,
    0.004,
    0.004,
    0.002,
    0.004,
    0.006,
    0.002,
    0.004,
    0.004,
    0.002,
    0.004,
    0.004,
    0.004,
    0.001,
    0.002,
    0.002,
    0.008,
    0.02,
    0.004,
    0.006,
    0.002,
    0.02,
    0.002,
    0.002,
    0.006,
    0.004,
    0.002,
    0.001,
    0.02,
    0.006,
    0.001,
    0.002,
    0.004,
    0.001,
    0.002,
    0.006,
    0.006,
    0.004,
    0.006,
    0.001,
    0.002,
    0.004,
    0.006,
    0.006,
    0.001,
    0.04,
    0.006,
    0.002,
    0.004,
    0.002,
    0.002,
    0.006,
    0.002,
    0.002,
    0.004,
    0.006,
    0.006,
    0.002,
    0.002,
    0.008,
    0.006,
    0.004,
    0.002,
    0.006,
    0.002,
    0.004,
    0.006,
    0.002,
    0.004,
    0.001,
    0.004,
    0.002,
    0.004,
    0.008,
    0.006,
    0.008,
    0.002,
    0.004,
    0.002,
    0.001,
    0.004,
    0.004,
    0.004,
    0.006,
    0.008,
    0.004,
    0.001,
    0.001,
    0.002,
    0.006,
    0.004,
    0.001,
    0.002,
    0.006,
    0.004,
    0.006,
    0.008,
    0.002,
    0.002,
    0.004,
    0.002,
    0.04,
    0.002,
    0.002,
    0.004,
    0.002,
    0.002,
    0.006,
    0.02,
    0.004,
    0.002,
    0.006,
    0.02,
    0.001,
    0.002,
    0.006,
    0.004,
    0.006,
    0.004,
    0.004,
    0.004,
    0.004,
    0.002,
    0.004,
    0.04,
    0.002,
    0.008,
    0.002,
    0.004,
    0.001,
    0.004,
    0.006,
    0.004,
]

In [41]:
weights_df = pd.DataFrame(
    data=list(zip(range(0, 201), weights)), columns=["stock_id", "index_weight"]
)

In [42]:
train = train.merge(weights_df, on="stock_id")

In [43]:
train["wap_calc"] = (
    train["bid_price"] * train["ask_size"] + train["ask_price"] * train["bid_size"]
) / (train["ask_size"] + train["bid_size"])

In [44]:
train.columns

Index(['stock_id', 'date_id', 'seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',
       'far_price', 'near_price', 'bid_price', 'bid_size', 'ask_price',
       'ask_size', 'wap', 'target', 'time_id', 'row_id', 'index_weight',
       'wap_calc'],
      dtype='object')

In [45]:
def generate_prev_race(df_in, df_g, rolling_window=10, factor=""):
    df = df_in.copy()
    original_cols = df_in.columns
    df[f"wap_t-60"] = df_g["wap"].shift(6)
    df[f"target_t-60"] = df_g["target"].shift(6)
    df[f"initial_wap"] = df_g["wap_calc"].transform("first")
    df[f"initial_bid_size"] = df_g["bid_size"].transform("first")
    df[f"initial_ask_size"] = df_g["ask_size"].transform("first")
    cols = ["bid_price", "ask_price", "bid_size", "ask_size", "wap"]
    for i in cols:
        df[f"{i}_t-60"] = df_g[i].shift(-6)
    for i in cols:
        df[f"{i}_t10"] = df_g[i].shift(1)

    return df

In [46]:
def generate_index(df_in, df_g, rolling_window=10, factor=""):
    df = df_in.copy()
    df[f"index_wap"] = df_g["wap_weighted"].transform("mean")
    return df


def generate_index_2(df_in, df_g, rolling_window=10, factor=""):
    df = df_in.copy()
    df[f"index_wap_t-60"] = df_g["index_wap"].shift(6)
    df[f"index_wap_init"] = df_g["index_wap"].transform("first")
    return df


def generate_index_3(df_in, df_g, rolling_window=10, factor=""):
    df = df_in.copy()
    df[f"index_wap_t-60"] = df_g["index_wap_move_to_init"].shift(6)
    return df

In [47]:
train["wap_weighted"] = train["wap"] * train["index_weight"]
train_g = train.groupby(["stock_id", "date_id"])
train = generate_prev_race(train, train_g)
train["delta_wap"] = train["wap"] / train["wap_t-60"]

train_g = train.groupby(["seconds_in_bucket", "date_id"])
train = generate_index(train, train_g)


train["wap_move_to_init"] = train["wap_calc"] / train["initial_wap"]
train_g = train.groupby(["date_id"])
train = generate_index_2(train, train_g)

train["index_wap_move_to_init"] = train["index_wap"] / train["index_wap_init"]
train_g = train.groupby(["date_id"])
train = generate_index_3(train, train_g)

In [48]:
train.columns

Index(['stock_id', 'date_id', 'seconds_in_bucket', 'imbalance_size',
       'imbalance_buy_sell_flag', 'reference_price', 'matched_size',
       'far_price', 'near_price', 'bid_price', 'bid_size', 'ask_price',
       'ask_size', 'wap', 'target', 'time_id', 'row_id', 'index_weight',
       'wap_calc', 'wap_weighted', 'wap_t-60', 'target_t-60', 'initial_wap',
       'initial_bid_size', 'initial_ask_size', 'bid_price_t-60',
       'ask_price_t-60', 'bid_size_t-60', 'ask_size_t-60', 'bid_price_t10',
       'ask_price_t10', 'bid_size_t10', 'ask_size_t10', 'wap_t10', 'delta_wap',
       'index_wap', 'wap_move_to_init', 'index_wap_t-60', 'index_wap_init',
       'index_wap_move_to_init'],
      dtype='object')

In [49]:
train["target_calc"] = (
    -(
        (train["wap_t-60"] / train["wap"])
        - (train["index_wap_t-60"] / train["index_wap_move_to_init"])
    )
    * 10000
)
train["target_delta"] = train["target_t-60"] - train["target_calc"]

In [50]:
train_stock_0 = train[train["stock_id"] == 0].dropna(subset="bid_size_t-60").copy()
train_stock_0.head(20)

Unnamed: 0,stock_id,date_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,...,ask_size_t10,wap_t10,delta_wap,index_wap,wap_move_to_init,index_wap_t-60,index_wap_init,index_wap_move_to_init,target_calc,target_delta
0,0,0,0,3180602.69,1,0.999812,13380276.64,,,0.999812,...,,,0.999483,0.005031,1.0,,0.005031,1.0,,
1,0,0,10,1299772.7,1,1.000026,15261106.63,,,0.999812,...,8493.03,1.0,0.999471,0.005033,0.999892,,0.005031,1.000356,,
2,0,0,20,1299772.7,1,0.999919,15261106.63,,,0.999812,...,23519.16,0.999892,0.999694,0.005034,0.999842,,0.005031,1.000525,,
3,0,0,30,1299772.7,1,1.000133,15261106.63,,,1.000026,...,12131.6,0.999842,0.999659,0.005034,1.000085,,0.005031,1.000547,,
4,0,0,40,1218204.43,1,1.000455,15342674.9,,,1.000241,...,46203.3,1.000085,1.000056,0.005035,1.000317,,0.005031,1.000635,,
5,0,0,50,1218204.43,1,1.000455,15342674.9,,,1.000348,...,26610.45,1.000317,1.000392,0.005035,1.000435,,0.005031,1.000668,,
6,0,0,60,1218204.43,1,1.000562,15342674.9,,,1.000455,...,9897.22,1.000434,1.000622,0.005036,1.000517,1.0,0.005031,1.000815,-1.930193,-1.099511
7,0,0,70,1264494.89,1,1.000455,15352380.96,,,1.000348,...,10085.04,1.000517,1.000459,0.005036,1.000422,1.000356,0.005031,1.000843,-0.279044,0.668858
8,0,0,80,1189832.86,1,1.000241,15427043.0,,,1.000133,...,17366.82,1.000421,1.000322,0.005033,1.000148,1.000525,0.005031,1.000409,4.376209,-0.1562
9,0,0,90,1189272.89,1,1.000562,15427602.97,,,1.000348,...,61984.4,1.000148,1.000639,0.005033,1.000427,1.000547,0.005031,1.000345,8.409505,-2.959257


In [51]:
train[
    [
        "seconds_in_bucket",
        "imbalance_size",
        "imbalance_buy_sell_flag",
        "reference_price",
        "matched_size",
        "far_price",
        "near_price",
        "bid_price",
        "bid_size",
        "ask_price",
        "ask_size",
        "wap",
        "index_weight",
        "wap_calc",
        "initial_wap",
        "wap_weighted",
        "index_wap",
        "index_wap_init",
        "index_wap_move_to_init",
    ]
].isna().sum()

seconds_in_bucket                0
imbalance_size                 220
imbalance_buy_sell_flag          0
reference_price                220
matched_size                   220
far_price                  2894342
near_price                 2857180
bid_price                      220
bid_size                         0
ask_price                      220
ask_size                         0
wap                            220
index_weight                     0
wap_calc                       220
initial_wap                    220
wap_weighted                   220
index_wap                        0
index_wap_init                   0
index_wap_move_to_init           0
dtype: int64

In [52]:
# train_stock_0.to_csv('train_with_new_vars_0stock.csv')

In [53]:
median_vol = pd.read_csv("archive/MedianVolV2.csv")
median_vol.index.name = "stock_id"
median_vol = median_vol[["overall_medvol", "first5min_medvol", "last5min_medvol"]]
median_sizes = (
    train.groupby("stock_id")["bid_size"].median()
    + train.groupby("stock_id")["ask_size"].median()
)
std_sizes = (
    train.groupby("stock_id")["bid_size"].median()
    + train.groupby("stock_id")["ask_size"].median()
)

In [54]:
train["bid_price_target"] = train["bid_price"] - train["bid_price_t-60"]
train["bid_price_t-60"] = train["bid_price_target"] * 10_000

In [55]:
train["wap_target"] = train["wap"] - train["wap_t-60"]
train["wap_price_t-60"] = train["wap_target"] * 10_000

In [56]:
targets = ["wap", "bid_price", "ask_price"]
for i in targets:
    train[f"{i}_prev_move"] = (train[f"{i}"] - train[f"{i}_t10"]).fillna(0) * 10000

In [57]:
train["ask_price_target"] = train["ask_price"] - train["ask_price_t-60"]
train["ask_price_t-60"] = train["ask_price_target"] * 10_000

In [58]:
train[["bid_price_t-60", "bid_price"]].head(10)

Unnamed: 0,bid_price_t-60,bid_price
0,-6.43,0.999812
1,-5.36,0.999812
2,-3.21,0.999812
3,-3.22,1.000026
4,0.0,1.000241
5,3.22,1.000348
6,6.43,1.000455
7,5.36,1.000348
8,4.28,1.000133
9,6.43,1.000348


In [59]:
def feat_eng(df):
    cols = [c for c in df.columns if c not in ["row_id", "time_id"]]
    df = df[cols]
    df = df.merge(median_vol, how="left", left_on="stock_id", right_index=True)

    df["bid_plus_ask_sizes"] = df["bid_size"] + train["ask_size"]
    #     df['median_size'] = df['stock_id'].map(median_sizes.to_dict())
    df["std_size"] = df["stock_id"].map(std_sizes.to_dict())
    #     df['high_volume'] = np.where(df['bid_plus_ask_sizes'] > df['median_size'], 1, 0)
    df["imbalance_ratio"] = df["imbalance_size"] / df["matched_size"]

    df["imb_s1"] = df.eval("(bid_size-ask_size)/(bid_size+ask_size)")
    df["imb_s2"] = df.eval(
        "(imbalance_size-matched_size)/(matched_size+imbalance_size)"
    )

    df["ask_x_size"] = df.eval("ask_size*ask_price")
    df["bid_x_size"] = df.eval("bid_size*bid_price")

    df["ask_minus_bid"] = df["ask_x_size"] - df["bid_x_size"]

    df["bid_size_over_ask_size"] = df["bid_size"].div(df["ask_size"])
    df["bid_price_over_ask_price"] = df["bid_price"].div(df["ask_price"])

    prices = [
        "reference_price",
        "far_price",
        "near_price",
        "ask_price",
        "bid_price",
        "wap",
    ]

    for c in combinations(prices, 2):
        df[f"{c[0]}_minus_{c[1]}"] = (df[f"{c[0]}"] - df[f"{c[1]}"]).astype(np.float32)
        df[f"{c[0]}_times_{c[1]}"] = (df[f"{c[0]}"] * df[f"{c[1]}"]).astype(np.float32)
        df[f"{c[0]}_{c[1]}_imb"] = df.eval(f"({c[0]}-{c[1]})/({c[0]}+{c[1]})")

    for c in combinations(prices, 3):
        max_ = df[list(c)].max(axis=1)
        min_ = df[list(c)].min(axis=1)
        mid_ = df[list(c)].sum(axis=1) - min_ - max_

        df[f"{c[0]}_{c[1]}_{c[2]}_imb2"] = (max_ - mid_) / (mid_ - min_)

    df.drop(
        columns=[
            # 'date_id',
            "reference_price_far_price_imb",
            "reference_price_minus_near_price",
            "reference_price_near_price_imb",
            "far_price_near_price_imb",
            "far_price_ask_price_imb",
            "far_price_bid_price_imb",
            "far_price_minus_wap",
            "std_size",
            "bid_size_over_ask_size",
            "ask_price_bid_price_imb",
            "near_price_times_wap",
        ],
        inplace=True,
    )

    # gc.collect()

    df.replace([np.inf, -np.inf], 0, inplace=True)

    return df

In [60]:
y = train["target"].values
X = feat_eng(train)
prices = [
    c for c in X.columns if ("price" in c) and ("target" not in c) and ("60" not in c)
]
print(prices)

['reference_price', 'far_price', 'near_price', 'bid_price', 'ask_price', 'bid_price_t10', 'ask_price_t10', 'bid_price_prev_move', 'ask_price_prev_move', 'bid_price_over_ask_price', 'reference_price_minus_far_price', 'reference_price_times_far_price', 'reference_price_times_near_price', 'reference_price_minus_ask_price', 'reference_price_times_ask_price', 'reference_price_ask_price_imb', 'reference_price_minus_bid_price', 'reference_price_times_bid_price', 'reference_price_bid_price_imb', 'reference_price_minus_wap', 'reference_price_times_wap', 'reference_price_wap_imb', 'far_price_minus_near_price', 'far_price_times_near_price', 'far_price_minus_ask_price', 'far_price_times_ask_price', 'far_price_minus_bid_price', 'far_price_times_bid_price', 'far_price_times_wap', 'far_price_wap_imb', 'near_price_minus_ask_price', 'near_price_times_ask_price', 'near_price_ask_price_imb', 'near_price_minus_bid_price', 'near_price_times_bid_price', 'near_price_bid_price_imb', 'near_price_minus_wap', 'n

In [61]:
prices = [
    c for c in X.columns if ("price" in c) and ("target" not in c) and ("60" not in c)
]
# prices = [c for c in train.columns if 'price' in c]
pca_prices = PCA(n_components=1)
X["pca_prices"] = pca_prices.fit_transform(X[prices].fillna(1))

In [62]:
X.dtypes

stock_id                                 int64
date_id                                  int64
seconds_in_bucket                        int64
imbalance_size                         float64
imbalance_buy_sell_flag                  int64
                                        ...   
near_price_ask_price_bid_price_imb2    float64
near_price_ask_price_wap_imb2          float64
near_price_bid_price_wap_imb2          float64
ask_price_bid_price_wap_imb2           float64
pca_prices                             float64
Length: 115, dtype: object

In [63]:
lgbm = lgb.Booster(model_file="data/lgbm_model_new_t60.lgb")
X_train = X[[c for c in X.columns if ("target" not in c) and ("60" not in c)]].drop(
    columns=["delta_wap", "date_id"]
)
lgbm_preds = lgbm.predict(X_train)
X["lgbm_preds"] = lgbm_preds

del pca_prices

In [64]:
lgbm.feature_name()

['stock_id',
 'seconds_in_bucket',
 'imbalance_size',
 'imbalance_buy_sell_flag',
 'reference_price',
 'matched_size',
 'far_price',
 'near_price',
 'bid_price',
 'bid_size',
 'ask_price',
 'ask_size',
 'wap',
 'index_weight',
 'wap_calc',
 'wap_weighted',
 'initial_wap',
 'initial_bid_size',
 'initial_ask_size',
 'bid_price_t10',
 'ask_price_t10',
 'bid_size_t10',
 'ask_size_t10',
 'wap_t10',
 'index_wap',
 'wap_move_to_init',
 'index_wap_init',
 'index_wap_move_to_init',
 'wap_prev_move',
 'bid_price_prev_move',
 'ask_price_prev_move',
 'overall_medvol',
 'first5min_medvol',
 'last5min_medvol',
 'bid_plus_ask_sizes',
 'imbalance_ratio',
 'imb_s1',
 'imb_s2',
 'ask_x_size',
 'bid_x_size',
 'ask_minus_bid',
 'bid_price_over_ask_price',
 'reference_price_minus_far_price',
 'reference_price_times_far_price',
 'reference_price_times_near_price',
 'reference_price_minus_ask_price',
 'reference_price_times_ask_price',
 'reference_price_ask_price_imb',
 'reference_price_minus_bid_price',
 'r

In [65]:
# X = X.join(pca)
X = X.dropna(subset="wap_t-60").reset_index()

In [66]:
del train

In [67]:
X["stats"] = np.split(
    np.nan_to_num(X[stat_cols].to_numpy(), nan=-1), indices_or_sections=len(X)
)

In [68]:
sys.getsizeof(X) / (1024**3)

4.189441833645105

In [69]:
X.pca_prices.value_counts().head(20)

pca_prices
-1.384228e+11    408262
-1.384228e+11    382449
-1.384228e+11    119352
-1.384228e+11    112550
-1.384228e+11    102265
-1.384228e+11     94369
-1.384228e+11     91487
-1.384228e+11     89059
-1.384228e+11     79446
-1.384228e+11     70583
-1.384228e+11     63254
-1.384228e+11     57635
-1.384228e+11     56887
-1.384228e+11     50057
-1.384228e+11     44411
-1.384228e+11     39646
-1.384228e+11     35161
-1.384228e+11     31039
-1.384228e+11     27870
-1.384228e+11     24789
Name: count, dtype: int64

In [70]:
X["wap_category"] = X["wap_price_t-60"].apply(
    lambda x: 0
    if x < -10
    else (
        1
        if x < -5
        else (
            2
            if x < -1.5
            else (3 if x < 1.5 else (4 if x < 5 else (5 if x < 10 else 6)))
        )
    )
)

In [71]:
X[["wap_category", "wap_price_t-60"]]

Unnamed: 0,wap_category,wap_price_t-60
0,1,-5.17
1,1,-5.29
2,2,-3.06
3,2,-3.41
4,3,0.56
...,...,...
4666363,6,10.05
4666364,4,4.36
4666365,5,5.80
4666366,2,-2.66


In [72]:
# X["stats"] = X["stats"].apply(lambda x: x.reshape(-1))
# print(X["stats"].head(10))
# X.to_feather('train_data_with_features.fth')

0    [0.0, 3180602.69, 1.0, 0.999812, 13380276.64, ...
1    [10.0, 1299772.7, 1.0, 1.000026, 15261106.63, ...
2    [20.0, 1299772.7, 1.0, 0.999919, 15261106.63, ...
3    [30.0, 1299772.7, 1.0, 1.000133, 15261106.63, ...
4    [40.0, 1218204.43, 1.0, 1.000455, 15342674.9, ...
5    [50.0, 1218204.43, 1.0, 1.000455, 15342674.9, ...
6    [60.0, 1218204.43, 1.0, 1.000562, 15342674.9, ...
7    [70.0, 1264494.89, 1.0, 1.000455, 15352380.96,...
8    [80.0, 1189832.86, 1.0, 1.000241, 15427043.0, ...
9    [90.0, 1189272.89, 1.0, 1.000562, 15427602.97,...
Name: stats, dtype: object


In [73]:
X = pd.read_feather('train_data_with_features.fth')

In [74]:
X["wap_category"] = pd.qcut(X["wap_price_t-60"], q=5)
X["target_category"] = pd.qcut(X["target"], q=5)

In [75]:
weights = (X["wap_category"].value_counts(sort=False).reset_index().sort_values("wap_category"))

In [76]:
weights["norm_count"] = 1 - (weights["count"] / weights["count"].sum())
weights
weight = torch.tensor(weights["norm_count"].to_numpy(), device="cuda:0")
weight

tensor([0.8000, 0.7998, 0.8000, 0.8001, 0.8001], device='cuda:0',
       dtype=torch.float64)

In [77]:
means_target = X.groupby('target_category')['target'].median().reset_index().reset_index(names='original_index').rename(columns={'target':'mean_target'})
means_target['target_cat_name'] = means_target['original_index'].astype(str)+means_target['target_category'].astype(str)
means = X.groupby('wap_category')['wap_price_t-60'].median().reset_index().reset_index(names='original_index').rename(columns={'wap_price_t-60':'mean_wap'})
means['wap_cat_name'] = means['original_index'].astype(str)+means['wap_category'].astype(str)
means = means.merge(means_target,on='original_index')
means

  means_target = X.groupby('target_category')['target'].median().reset_index().reset_index(names='original_index').rename(columns={'target':'mean_target'})
  means = X.groupby('wap_category')['wap_price_t-60'].median().reset_index().reset_index(names='original_index').rename(columns={'wap_price_t-60':'mean_wap'})


Unnamed: 0,original_index,wap_category,mean_wap,wap_cat_name,target_category,mean_target,target_cat_name
0,0,"(-379.691, -6.98]",-11.92,"0(-379.691, -6.98]","(-385.291, -5.9]",-9.940267,"0(-385.291, -5.9]"
1,1,"(-6.98, -1.79]",-4.02,"1(-6.98, -1.79]","(-5.9, -1.67]",-3.499985,"1(-5.9, -1.67]"
2,2,"(-1.79, 1.82]",0.01,"2(-1.79, 1.82]","(-1.67, 1.56]",-0.050068,"2(-1.67, 1.56]"
3,3,"(1.82, 7.05]",4.08,"3(1.82, 7.05]","(1.56, 5.76]",3.390312,"3(1.56, 5.76]"
4,4,"(7.05, 392.99]",11.96,"4(7.05, 392.99]","(5.76, 387.779]",9.800196,"4(5.76, 387.779]"


In [78]:
ohe = OneHotEncoder(sparse_output=False)
ohe_out = ohe.fit_transform(
    X["wap_category"].to_numpy().reshape(-1, 1),
)
X["wap_target_OHE"] = [x for x in ohe_out]

ohe = OneHotEncoder(sparse_output=False)
ohe_out = ohe.fit_transform(
    X["target_category"].to_numpy().reshape(-1, 1),
)
X["target_OHE"] = [x for x in ohe_out]

In [79]:
X["target_OHE"][0]

array([0., 1., 0., 0., 0.])

In [80]:
import pandas as pd
import numpy as np

# Example DataFrame
data = {'one_hot_encoded': [
    np.array([0., 1., 0., 0., 0.]),
    np.array([1., 0., 0., 0., 0.]),
    np.array([0., 0., 0., 1., 0.]),
    np.array([0., 0., 1., 0., 0.]),
    np.array([0., 0., 0., 0., 1.])
]}
df = pd.DataFrame(data)

# Function to blur the one-hot encoded vectors
def blur_vector(vector, blur_factor=0.2):
    idx = np.argmax(vector)  # Get the index of the one-hot class
    blurred_vector = np.zeros_like(vector)
    
    blurred_vector[idx] += (1 - blur_factor)  # Highest probability to the original class
    
    # Distribute the blur_factor to the neighbors without wrapping around
    for i in range(1, len(vector)):
        left = idx - i
        right = idx + i
        
        if left >= 0:
            blurred_vector[left] += blur_factor / i
        if right < len(vector):
            blurred_vector[right] += blur_factor / i
        if left < 0 and right >= len(vector):
            break
    
    return blurred_vector

# Apply the blur_vector function to the 'one_hot_encoded' column
df['blurred_encoded'] = df['one_hot_encoded'].apply(blur_vector)

print(df)

             one_hot_encoded                             blurred_encoded
0  [0.0, 1.0, 0.0, 0.0, 0.0]   [0.2, 0.8, 0.2, 0.1, 0.06666666666666667]
1  [1.0, 0.0, 0.0, 0.0, 0.0]  [0.8, 0.2, 0.1, 0.06666666666666667, 0.05]
2  [0.0, 0.0, 0.0, 1.0, 0.0]   [0.06666666666666667, 0.1, 0.2, 0.8, 0.2]
3  [0.0, 0.0, 1.0, 0.0, 0.0]                   [0.1, 0.2, 0.8, 0.2, 0.1]
4  [0.0, 0.0, 0.0, 0.0, 1.0]  [0.05, 0.06666666666666667, 0.1, 0.2, 0.8]


In [81]:
X["target_OHE"] = X["target_OHE"].apply(blur_vector)
X["wap_target_OHE"] = X["wap_target_OHE"].apply(blur_vector)

In [82]:
importlib.reload(utils.torch_classes)
trading_data = utils.torch_classes.TradingData(X,means)
hidden_size = 64
# trading_data.generate_batches()

  self.stocksDict[stock_id].wap_daily_ohe[day] = torch.tensor(stock_daily_data['wap_target_OHE'].to_list(), requires_grad=False, device='cuda:0')
 25%|██▌       | 24089/95232 [00:47<02:08, 553.86it/s]

In [None]:
trading_data.generate_batches()

Length of train: 385, Length of test 96


100%|██████████| 385/385 [00:00<00:00, 387.98it/s]
100%|██████████| 95/95 [00:00<00:00, 456.26it/s]


In [None]:
# for i,stocks in enumerate(trading_data.stocksDict.values()):
#     if i==0:
#         continue
#     else:
#         stocks.data_daily = []
# trading_data.train_batches = []
# del train
# del X
# X = []
torch.cuda.empty_cache()
gc.collect()
# del pca_prices

0

In [None]:
# del pca, pca_prices_items

In [None]:
importlib.reload(utils.torch_classes)
importlib.reload(utils.training_testing)

<module 'utils.training_testing' from 'c:\\Users\\Nick\\Documents\\GitHub\\OptiverKaggle\\utils\\training_testing.py'>

In [None]:
optim_dict = {
    "RMSProp": optim.RMSprop,
    "Adam": optim.Adam,
    "RAdam": optim.RAdam,
    "NAdam": optim.NAdam,
    "AdamW": optim.AdamW,
    "SGD": optim.SGD,
    "Rprop": optim.Rprop,
}

In [None]:
def model_pipeline(
    trading_df=trading_data, config=None, prev_model_file=None, prev_model_version=450
):
    trading_df = trading_data
    with wandb.init(project="Optviver_new", config=config, save_code=True):
        wandb.define_metric("val_epoch_loss_l1", summary="min")
        wandb.define_metric("epoch_l1_loss", summary="min")
        wandb.define_metric("Accuracy", summary="max")
        config['prev_model_file'] = prev_model_file
        config = wandb.config

        input_size = len(trading_df.stocksDict[0].data_daily[0][0])

        target_size_ohe = len(trading_df.stocksDict[0].target_daily_ohe[0][0])

        print(target_size_ohe)
        
        model = utils.torch_classes.GRUNetV4(
            input_size,
            config["hidden_size"],
            num_layers=config["num_layers"],
            fc0_size=config["fc0_size"],
            target_size=target_size_ohe
        ).to("cuda:0")

        wandb.watch(model, log='all') 
        optimizer = optim_dict[config['optim']](model.parameters(), lr=config['learning_rate'], weight_decay=0.01)
        
        config = wandb.config
        print(config)
        print(config['ohe_targets'])
        if prev_model_file != None:
            model_name = prev_model_file
            config['prev_model_file'] = prev_model_file            
            model_loc = f"models/{model_name}/{model_name}_{prev_model_version}.pt"
            model_data = torch.load(model_loc, map_location=torch.device("cuda:0"))
            print(model_data["model_state_dict"].keys())
            print(model_data.keys())

            # del_keys = ['fc_final.weight', 'fc_final.bias', 'fc_wap0.weight']
            # [model_data['model_state_dict'].pop(k) for k in del_keys]
            model.load_state_dict(model_data["model_state_dict"], strict=False)
            # optimizer.load_state_dict(model_data["optim"])

        print(model)
        trading_df.reset_hidden(
            hidden_size=config["hidden_size"], num_layers=config["num_layers"]
        )
        criterion = nn.CrossEntropyLoss()
        print(criterion)
        print(optimizer)
        output = utils.training_testing_double.train_model(
            trading_df, model, config, optimizer, criterion
        )

    return output


# arroios

In [None]:
importlib.reload(utils.torch_classes)
importlib.reload(utils.training_testing)
importlib.reload(utils.training_testing_double)

<module 'utils.training_testing_double' from 'c:\\Users\\Nick\\Documents\\GitHub\\OptiverKaggle\\utils\\training_testing_double.py'>

In [None]:
config_static = {
    "learning_rate": 0.00005,
    "hidden_size": 256,
    "num_layers": 2,
    "batch_norm": 1,
    "epochs": 5000,
    "mini_batches": 20,
    "fc0_size": 256,
    "note": "GRUNetV4, no detaching, wap, 20 mini batch, upping hidden size",
    'optim': 'RMSProp',
    'ohe_targets':means,
}
config = config_static
torch.cuda.empty_cache()
trading_data.detach_hidden()

In [None]:
for s in trading_data.stocksDict.values():
    s.hidden_out = torch.zeros(49,5)

In [None]:
x = torch.rand(49,1000)
x.view((-1,200,5)).shape

NameError: name 'torch' is not defined

In [None]:
CUDA_LAUNCH_BLOCKING = 1
# output = model_pipeline(trading_data, config_static,)
output = model_pipeline(trading_data, config_static)

5
{'learning_rate': 5e-05, 'hidden_size': 256, 'num_layers': 2, 'batch_norm': 1, 'epochs': 5000, 'mini_batches': 20, 'fc0_size': 256, 'note': 'GRUNetV4, no detaching, wap, 20 mini batch, upping hidden size', 'optim': 'RMSProp', 'ohe_targets': '   original_index       wap_category  mean_wap        wap_cat_name  \\\n0               0  (-379.691, -6.98]    -11.92  0(-379.691, -6.98]   \n1               1     (-6.98, -1.79]     -4.02     1(-6.98, -1.79]   \n2               2      (-1.79, 1.82]      0.01      2(-1.79, 1.82]   \n3               3       (1.82, 7.05]      4.08       3(1.82, 7.05]   \n4               4     (7.05, 392.99]     11.96     4(7.05, 392.99]   \n\n    target_category  mean_target    target_cat_name  \n0  (-385.291, -5.9]    -9.940267  0(-385.291, -5.9]  \n1     (-5.9, -1.67]    -3.499985     1(-5.9, -1.67]  \n2     (-1.67, 1.56]    -0.050068     2(-1.67, 1.56]  \n3      (1.56, 5.76]     3.390312      3(1.56, 5.76]  \n4   (5.76, 387.779]     9.800196   4(5.76, 387.779] 

  0%|          | 0/5000 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "C:\Users\Nick\AppData\Local\Temp\ipykernel_18904\241991133.py", line 52, in model_pipeline
    output = utils.training_testing_double.train_model(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\Documents\GitHub\OptiverKaggle\utils\training_testing_double.py", line 109, in train_model
    output_wap_ohe, output_wap, hidden, _, x_h = model(new_x, hidden_in)
                                                 ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\torch\nn\modules\module.py", line 1568, in _call_impl
    result = forward_call(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\Documents\GitHub\OptiverKaggle\utils\torch_classes.py", lin

VBox(children=(Label(value='57.480 MB of 57.480 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
Accuracy_wap,▂▁▅▆▆▆▇▇▇▇█▇███████████████▇▇▇▇▇▇▇▇▇▇▇▇▇
L1_loss_wap_epoch,▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▂▂▃▃▃▅▆▆▇▇▇███▆▇▆▅▅
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch_loss,▅█▆▅▁▄▇▅▇▅▅▁▄▁▅▇▅▄▁▄▂▅█▅▅▁▅▁▅▇▄▄▂▅▂▃▆▅▂▃
loss_1,█▁█▄▅▅▅▇▄▅▆▄▅▅▅▄▅▄▄▄▅▅▄▅▆▂▄▅▆▃▂▃▆▆▆▃▆▆▂▂
losst_to_zero,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
output_sd,█▇▂▄▃▃▂▃▃▃▃▃▄▃▃▃▃▂▃▂▂▂▃▂▂▃▂▂▁▃▄▃▁▁▁▂▁▁▃▃
relu_sum,██▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▃▂▂▂▁▁▁▁▁▁▁
target_calc_loss,▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▇▇███████▇▇▇▆▆
train_class_tgt_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Accuracy_wap,0.26455
L1_loss_wap_epoch,6.18971
epoch,67.0
epoch_loss,11.54975
loss_1,12.04104
losst_to_zero,6.17443
output_sd,0.32217
relu_sum,756514.875
target_calc_loss,6.47479
train_class_tgt_loss,0.0


KeyboardInterrupt: 

In [None]:
output_all,Y_ohe_target,stock_ids = output

NameError: name 'output' is not defined

In [None]:
start = [output.grad_fn.next_functions[0][0]]
outs = []
while start:
    fn = start.pop(0)
    print(fn)
    if fn:
        outs.append(fn)
        next_fns = fn.next_functions
        start.extend([x[0] for x in next_fns])
        print(start)


In [None]:
for fn in outs:
    print(fn)

In [None]:
torch.cat([s.hidden_out for s in trading_data.stocksDict.values()],dim=1).shape

torch.Size([49, 25600])

In [None]:
model = utils.torch_classes.GRUNetV3(
    22,
    config["hidden_size"],
    num_layers=config["num_layers"],
    fc0_size=config["fc0_size"],
    target_size=5
).to("cuda:0")

In [None]:
print(model.named_parameters())

<generator object Module.named_parameters at 0x0000026BB89ACD40>


In [None]:
optimizer = optim.SGD(, lr=0.001)

In [None]:
optimizer.param_groups

[{'params': [Parameter containing:
   tensor([[ 3.7291e-01, -4.4458e-01, -4.3755e-02,  3.4391e-01, -7.2737e-02],
           [ 1.2280e-01,  1.3427e-01,  2.1452e-01, -3.4860e-01,  1.2834e-01],
           [ 3.8206e-01, -4.4528e-01, -1.8777e-01, -2.8338e-02, -2.6217e-01],
           [ 2.9334e-01,  1.8038e-01,  1.3834e-01,  1.9175e-01,  2.0782e-01],
           [-8.5392e-02,  3.2079e-01, -1.3422e-01, -4.0403e-01, -2.3458e-01],
           [ 3.2337e-01,  3.2368e-01, -1.1684e-01,  3.6930e-01, -3.0831e-01],
           [ 2.1491e-01,  1.5638e-01, -2.3402e-01, -1.5434e-02, -2.3061e-01],
           [ 2.8756e-01, -4.8865e-02,  8.7331e-02, -1.9059e-01,  2.7535e-02],
           [ 3.6005e-01, -3.6371e-01, -2.7036e-02,  5.9889e-03, -2.7516e-01],
           [-3.1839e-01,  3.7880e-01,  1.8474e-01, -2.0867e-01,  5.1393e-03],
           [-4.0443e-01, -1.5904e-01,  2.0367e-01, -3.6570e-01,  2.4082e-01],
           [ 1.7567e-01, -2.0604e-01, -2.8393e-01,  2.9547e-01, -2.7353e-01],
           [-1.4293e-03,  8.7

In [None]:
for x in model.named_parameters():
    print(x)

('gru.weight_ih_l0', Parameter containing:
tensor([[ 0.0692,  0.0354,  0.0277,  ...,  0.0724,  0.0817, -0.0029],
        [ 0.0584, -0.0322, -0.0534,  ..., -0.0696, -0.0301, -0.0261],
        [ 0.0235,  0.0013, -0.0771,  ...,  0.0026, -0.0435,  0.0021],
        ...,
        [-0.0506,  0.0410, -0.0296,  ...,  0.0301, -0.0777,  0.0147],
        [ 0.0097, -0.0110, -0.0794,  ...,  0.0062, -0.0580,  0.0042],
        [-0.0327,  0.0709,  0.0339,  ...,  0.0064,  0.0483,  0.0750]],
       device='cuda:0', requires_grad=True))
('gru.weight_hh_l0', Parameter containing:
tensor([[-0.0731,  0.0470,  0.0003,  ...,  0.0390, -0.0785, -0.0132],
        [-0.0131,  0.0352, -0.0494,  ..., -0.0209, -0.0540,  0.0480],
        [-0.0019,  0.0267,  0.0391,  ...,  0.0851, -0.0083, -0.0700],
        ...,
        [ 0.0197, -0.0308, -0.0673,  ...,  0.0164,  0.0050,  0.0471],
        [-0.0039, -0.0608, -0.0284,  ..., -0.0233,  0.0607,  0.0054],
        [ 0.0729, -0.0014, -0.0598,  ...,  0.0774,  0.0224, -0.0415]],
 

In [None]:
new_x = trading_data.train_batches[0]
stocks = [
    trading_data.stocksDict[x] for x in trading_data.stock_batches[0]
]  # Stocks for the Day
hidden_in = torch.stack([x.hidden for x in stocks]).transpose(0, 1)

In [None]:
output, output_wap,output_target, hidden, _,x_h  = model(new_x, hidden_in)

In [None]:
output[0]

tensor([[ 6.3343e-01,  8.8778e-01, -4.2058e-01, -6.4730e-01,  8.7384e-01],
        [-6.2712e-02,  6.1765e-01,  1.7410e-02, -3.4411e-01,  2.0902e-01],
        [-6.1046e-01, -1.9967e-01,  6.1882e-02,  4.8858e-01,  3.1453e-01],
        [-1.6122e-01,  2.8558e-01,  1.4333e-01,  7.7996e-01,  1.7925e-01],
        [-1.0556e+00, -2.4617e-02,  1.4076e+00,  3.1731e-01, -3.9524e-01],
        [-7.3603e-02,  3.2032e-01,  8.8867e-02, -4.2863e-01, -1.4289e-01],
        [ 3.0614e-01,  1.5331e-02, -5.9832e-01, -4.5656e-01, -3.0634e-01],
        [ 5.7669e-01,  2.6611e-01,  3.3720e-01, -1.9185e-01, -2.4626e-01],
        [-5.8149e-01,  1.2545e-01, -4.5483e-01, -1.1380e+00,  1.2437e-01],
        [ 3.2330e-01,  4.1436e-01, -1.2951e-01,  2.2822e-01,  2.1477e-01],
        [-5.2429e-01,  1.6436e-01, -4.9772e-02, -2.0800e-01,  2.0340e-01],
        [ 5.5443e-02, -3.1750e-03, -5.7174e-02,  1.7509e-01, -2.1420e-02],
        [ 1.3214e-01,  1.8137e-01,  6.0655e-01,  1.6703e-01, -1.2482e-01],
        [-2.4990e-01,  2.

In [None]:
x_h[0].shape

torch.Size([49, 128])

In [None]:
hidden.shape

torch.Size([2, 191, 128])

In [None]:
x_h[:,0,:]

tensor([[ 0.0715, -0.3007, -0.3952,  ...,  0.1994,  0.2760,  0.6914],
        [-0.5203, -0.3244,  0.1136,  ...,  0.2139, -0.2839, -0.0521],
        [-0.1306, -0.4088, -0.4979,  ...,  0.1097,  0.1139,  0.3685],
        ...,
        [ 0.2355, -0.4255, -0.3770,  ...,  0.0821,  0.4428,  0.7968],
        [ 0.3527, -0.0424, -0.3906,  ...,  0.2032,  0.5162,  0.7078],
        [ 0.1563, -0.3637, -0.5328,  ...,  0.2227,  0.5381,  0.7269]],
       device='cuda:0', grad_fn=<SliceBackward0>)

In [None]:
x_h.transpose(0,1).view(49,-1).shape

torch.Size([49, 24448])

In [None]:
x = torch.rand((10,5,8))
x

tensor([[[0.9971, 0.1248, 0.6241, 0.2072, 0.3945, 0.6816, 0.8236, 0.6424],
         [0.5264, 0.4756, 0.9043, 0.3493, 0.4849, 0.1512, 0.5880, 0.7806],
         [0.9071, 0.7058, 0.7836, 0.3517, 0.7901, 0.9152, 0.0402, 0.9141],
         [0.3636, 0.0574, 0.8023, 0.4773, 0.7985, 0.3932, 0.4654, 0.4337],
         [0.3068, 0.9018, 0.9335, 0.4926, 0.0184, 0.2860, 0.7080, 0.1708]],

        [[0.7209, 0.0880, 0.2006, 0.6237, 0.2939, 0.5592, 0.2364, 0.0083],
         [0.9159, 0.3644, 0.0816, 0.0738, 0.0552, 0.1786, 0.9659, 0.3292],
         [0.2573, 0.9850, 0.7967, 0.9886, 0.5799, 0.7047, 0.8740, 0.2954],
         [0.3082, 0.5769, 0.9567, 0.2306, 0.0250, 0.8337, 0.5929, 0.0734],
         [0.7718, 0.4955, 0.1319, 0.7638, 0.3982, 0.7447, 0.3115, 0.8503]],

        [[0.6962, 0.2655, 0.0194, 0.1229, 0.9939, 0.5296, 0.1092, 0.2704],
         [0.0443, 0.9956, 0.8027, 0.8369, 0.6567, 0.2214, 0.7674, 0.7787],
         [0.5406, 0.5198, 0.1325, 0.6316, 0.1649, 0.2789, 0.7535, 0.1267],
         [0.3095, 0.5

In [None]:
x.transpose(0,1).reshape(5,-1)

tensor([[0.9971, 0.1248, 0.6241, 0.2072, 0.3945, 0.6816, 0.8236, 0.6424, 0.7209,
         0.0880, 0.2006, 0.6237, 0.2939, 0.5592, 0.2364, 0.0083, 0.6962, 0.2655,
         0.0194, 0.1229, 0.9939, 0.5296, 0.1092, 0.2704, 0.7649, 0.5629, 0.0060,
         0.5607, 0.1823, 0.8384, 0.7839, 0.7994, 0.5819, 0.8299, 0.5122, 0.4393,
         0.9424, 0.3924, 0.2382, 0.8192, 0.9183, 0.3582, 0.5947, 0.2193, 0.3878,
         0.0769, 0.0901, 0.7007, 0.9879, 0.1685, 0.9008, 0.5366, 0.3259, 0.1680,
         0.4694, 0.8939, 0.9315, 0.4870, 0.4023, 0.7973, 0.8637, 0.0342, 0.7653,
         0.2355, 0.2494, 0.7771, 0.4415, 0.3413, 0.5383, 0.6422, 0.1188, 0.8422,
         0.2287, 0.7353, 0.8100, 0.9965, 0.3683, 0.5774, 0.8625, 0.4009],
        [0.5264, 0.4756, 0.9043, 0.3493, 0.4849, 0.1512, 0.5880, 0.7806, 0.9159,
         0.3644, 0.0816, 0.0738, 0.0552, 0.1786, 0.9659, 0.3292, 0.0443, 0.9956,
         0.8027, 0.8369, 0.6567, 0.2214, 0.7674, 0.7787, 0.5683, 0.0966, 0.9478,
         0.0184, 0.0397, 0.2665, 0.

In [None]:
# torch.cuda.memory._dump_snapshot("my_snapshot.pickle")

In [None]:
torch.rand(10).to("cuda:0")

RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
stop

In [None]:
output

In [None]:
sweep_config = {"method": "random"}

metric = {"name": "val_epoch_loss", "goal": "minimize"}

sweep_config["metric"] = metric


parameters_dict = {
    "optimizer": {"values": ["adamW", "adam", "SGD", "RMSprop"]},
    "f0_layer_size": {"values": [128]},
    "f1_layer_size": {"values": [64]},
    "num_layers": {"values": [2]},
    "hidden_size": {"values": [128, 256, 512]},
    "learning_rate": {"values": [0.001, 0.0005, 0.0001, 0.00005, 0.00001]},
    "epochs": {"value": 500}
    # 'batch_norm':{'values':[0,1,2]}
}

sweep_config["parameters"] = parameters_dict

sweep_id = wandb.sweep(sweep_config, project="Optiver Sweeps")
# CUDA_LAUNCH_BLOCKING=1
wandb.agent(sweep_id, function=model_pipeline, count=100)

Create sweep with ID: upepev1k
Sweep URL: https://wandb.ai/nickojelly/Optiver%20Sweeps/sweeps/upepev1k


[34m[1mwandb[0m: Agent Starting Run: t85ko0dj with config:
[34m[1mwandb[0m: 	epochs: 500
[34m[1mwandb[0m: 	f0_layer_size: 128
[34m[1mwandb[0m: 	f1_layer_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: SGD
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Traceback (most recent call last):
  File "C:\Users\Nick\AppData\Local\Temp\ipykernel_18468\3034068021.py", line 10, in model_pipeline
    model = utils.torch_classes.GRUNetV3(input_size,config['hidden_size'],num_layers=config['num_layers']).to('cuda:0')
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\torch\nn\modules\module.py", line 1160, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\torch\nn\modules\module.py", line 810, in _apply
    module._apply(fn)
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\torch\nn\modules\rnn.py", line 213, in _apply
    ret = super()._apply(fn, recurse)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nick\.conda\envs\python311\Lib\site-packages\torch\nn\modules\module.py", line 833, in _apply
    param_applied = fn(pa

VBox(children=(Label(value='0.066 MB of 0.066 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
