In [1]:
import os

print("Current DIR: ", os.getcwd())

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Current DIR:  /kaggle/working
/kaggle/input/optiver-trading-at-the-close/public_timeseries_testing_util.py
/kaggle/input/optiver-trading-at-the-close/train.csv
/kaggle/input/optiver-trading-at-the-close/example_test_files/sample_submission.csv
/kaggle/input/optiver-trading-at-the-close/example_test_files/revealed_targets.csv
/kaggle/input/optiver-trading-at-the-close/example_test_files/test.csv
/kaggle/input/optiver-trading-at-the-close/optiver2023/competition.cpython-310-x86_64-linux-gnu.so
/kaggle/input/optiver-trading-at-the-close/optiver2023/__init__.py
/kaggle/input/feature-engineer/feature_engineer.py
/kaggle/input/model-fcn-2layer/model_fcn_2layer.pth


In [2]:
! cp /kaggle/input/feature-engineer/feature_engineer.py /kaggle/working/feature_engineer.py

In [3]:
import optiver2023
import pandas as pd
from time import time
from feature_engineer import * 
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch.optim import Adam, SGD
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import warnings 
warnings.filterwarnings('ignore')

In [4]:
env = optiver2023.make_env()
iter_test = env.iter_test()

In [5]:
class MyDataset(Dataset):
    def __init__(self, features, target):
        self.y = target
        self.X = features
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx, :], self.y[idx]
    

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    def forward(self, x):
        x = self.flatten(x)
        output = self.linear_relu_stack(x)
        return output


# define a function to predict the target
def predict(model, pred_loader):
    model.eval()
    y_pred = []
    with torch.no_grad():
        for data, target in pred_loader:
            output = model(data.float())
            y_pred.extend(output.tolist())
            
    return y_pred

In [6]:
model = NeuralNetwork()

model.load_state_dict(torch.load("/kaggle/input/model-fcn-2layer/model_fcn_2layer.pth"))

<All keys matched successfully>

In [7]:
counter = 0
cache = pd.DataFrame()
qps = []

for (test, revealed_targets, sample_prediction) in iter_test:
    now_time = time()
    
    cache = pd.concat([cache, test], ignore_index=True, axis=0)
    
    global_stock_id_feats = {
        "median_size": cache.groupby("stock_id")["bid_size"].median() + cache.groupby("stock_id")["ask_size"].median(),
        "std_size": cache.groupby("stock_id")["bid_size"].std() + cache.groupby("stock_id")["ask_size"].std(),
        "ptp_size": cache.groupby("stock_id")["bid_size"].max() - cache.groupby("stock_id")["bid_size"].min(),
        "median_price": cache.groupby("stock_id")["bid_price"].median() + cache.groupby("stock_id")["ask_price"].median(),
        "std_price": cache.groupby("stock_id")["bid_price"].std() + cache.groupby("stock_id")["ask_price"].std(),
        "ptp_price": cache.groupby("stock_id")["bid_price"].max() - cache.groupby("stock_id")["ask_price"].min(),
        }
    
    # 🔄 If not the first iteration, limit the cache to the last 21 rows for each stock
    if counter > 0:
        cache = cache.groupby(['stock_id']).tail(21).sort_values(by=['date_id', 'seconds_in_bucket', 'stock_id']).reset_index(drop=True)
    
    # 📊 Generate features based on the updated cache
    df_feat = generate_all_features(cache, global_stock_id_feats)[-len(test):]
    
    feat_cols = [col for col in df_feat.columns if col not in ['target', 'date_id', 'dow', 'time_id', 'currently_scored']]
    df_feat = df_feat[feat_cols]
    
    scaler = StandardScaler()
    standarize_cols = [col for col in feat_cols if col not in ['stock_id', 'seconds_in_bucket', 'imbalance_buy_sell_flag', 'session_label', 'seconds', 'minute']]
    df_feat[standarize_cols] = scaler.fit_transform(df_feat[standarize_cols])
    
    target = np.ones(df_feat.shape[0])
    pred_dataset = MyDataset(
        torch.from_numpy(df_feat.values), torch.from_numpy(target)
    )
    
    prediction_loader = DataLoader(pred_dataset, batch_size=2048, shuffle=False)
    y_pred = predict(model, prediction_loader)

#     # 📊 Generate predictions for each model and calculate the weighted average
#     lgb_predictions = np.zeros(len(test))
#     for model, weight in zip(models, model_weights):
#         lgb_predictions += weight * model.predict(feat)

#     # 🧮 Adjust predictions using the zero_sum function
#     lgb_predictions = zero_sum(lgb_predictions, test['bid_size'] + test['ask_size'])
#     clipped_predictions = np.clip(lgb_predictions, y_min, y_max)  # 📏 Clip predictions within a specified range
    
    sample_prediction['target'] = y_pred
    env.predict(sample_prediction)  # 📈 Submit predictions to the environment
    counter += 1
    qps.append(time() - now_time)

    # 🔄 Print the average queries per second every 10 iterations
    if counter % 10 == 0:
        print(counter, 'qps:', np.mean(qps))

        time_cost = 1.146 * np.mean(qps)
        print(f"The code will take approximately {np.round(time_cost, 4)} hours to reason about")


This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
10 qps: 0.8892535448074341
The code will take approximately 1.0191 hours to reason about
20 qps: 0.7506235003471374
The code will take approximately 0.8602 hours to reason about
30 qps: 0.7147401094436645
The code will take approximately 0.8191 hours to reason about
40 qps: 0.6962623953819275
The code will take approximately 0.7979 hours to reason about
50 qps: 0.6850412654876709
The code will take approximately 0.7851 hours to reason about
60 qps: 0.6777941068013509
The code will take approximately 0.7768 hours to reason about
70 qps: 0.6723435265677316
The code will take approximately 0.7705 hours to reason about
80 qps: 0.6688307762145996
The code will take approximately 0.7665 hours to reason about
90 qps: 0.6654219680362278
The code will take approximately 0.7626 hours to reason about
100 qps: 0.662209460735321
The code will take approximately 0.7589 hours t