In [114]:
import numpy as np
import torch 
import torch.nn as nn
import pandas as pd

In [115]:
class VolatilityModel(nn.Module):
    def __init__(self, num_stocks, feature_dim):
        super().__init__()
        self.num_stocks = num_stocks
        self.feature_dim = feature_dim
        
        self.stock_embedding = nn.Embedding(self.num_stocks, 8)
        self.gru = nn.GRU(feature_dim=self.feature_dim, hidden_size=64, num_layers=1, batch_first=True)
        
        self.regressor = nn.Sequential(
            nn.Linear(64 + 8, 128),
            nn.GELU(),
            nn.Linear(128, 1)
        )

In [116]:
# (seq_len, features_dim) 

In [126]:
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, hidden_dims: list, output_dim: int, dropout=0.10):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layers = nn.ModuleList()
        
        fan_in = self.input_dim
        for hidden_dim in hidden_dims:
            self.hidden_layers.append(nn.Linear(fan_in, hidden_dim))
            fan_in = hidden_dim
        self.hidden_layers.append(nn.Linear(fan_in, self.output_dim))
        
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(p=dropout)
        
    @staticmethod
    def criterion(pred, y):
        loss = torch.sqrt(torch.mean((1 - (pred / y))**2))
        return loss
        
    def forward(self, x):
        for layer in self.hidden_layers[:-1]:
            x = layer(x)
            x = self.gelu(x)
            x = self.dropout(x)
            
        x = self.hidden_layers[-1](x)
        return x

In [127]:
df = pd.read_csv('../baseline2.csv', index_col=False).iloc[:, 1:]
df.shape

(428910, 9)

In [128]:
df.head()

Unnamed: 0,stock_id,time_id,sigma,target,size,spread,time_diff,log_time_diff,timefunc
0,0,5,0.004499,0.004136,3179,7.922559,15,2.70805,0.310975
1,0,11,0.001204,0.001445,1287,4.118409,23,3.135494,0.723982
2,0,16,0.002369,0.002168,2161,6.476585,35,3.555348,0.971598
3,0,31,0.002574,0.002195,1962,7.627233,28,3.332205,0.849035
4,0,62,0.001894,0.001747,1791,4.302926,25,3.218876,0.779443


In [129]:
X = df.drop(columns=['stock_id', 'time_id', 'target'])
y = df['target']

X = X.to_numpy()
y = y.to_numpy()

X = torch.tensor(X).to(torch.float)
y = torch.tensor(y).to(torch.float) * 10000

In [130]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [131]:
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=256, shuffle=True, pin_memory=True)

In [132]:
INPUT_DIM = X_train.shape[1]
OUTPUT_DIM = 1
HIDDEN_DIMS = [256, 256, 256]
DROPOUT_RATE = 0.1

In [133]:
from torch.optim import Adam

In [134]:
lr = 1e-4
criterion = SimpleMLP.criterion
model = SimpleMLP(INPUT_DIM, HIDDEN_DIMS, OUTPUT_DIM, DROPOUT_RATE)
optimizer = Adam(model.parameters(), lr=lr)
num_epochs = 100
device = 'cuda'

model = model.to(device)
model.train()
for epoch in range(num_epochs):
    epoch_loss = 0
    for i, (batch_x, batch_y) in enumerate(dataloader):
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        preds = model(batch_x)
        loss = criterion(preds, batch_y)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
    print(f'Epoch: {epoch+1}/{num_epochs}, Loss: {epoch_loss / len(dataloader)}.')

Epoch: 1/100, Loss: 7.025533907632166.
Epoch: 2/100, Loss: 1.1457408933742645.
Epoch: 3/100, Loss: 0.953823443404958.
Epoch: 4/100, Loss: 0.9366568410956619.
Epoch: 5/100, Loss: 0.9248734044548649.
Epoch: 6/100, Loss: 0.9108504899662879.
Epoch: 7/100, Loss: 0.8898163611190164.
Epoch: 8/100, Loss: 0.8656721675600426.
Epoch: 9/100, Loss: 0.8207883940180409.
Epoch: 10/100, Loss: 0.800071074836384.
Epoch: 11/100, Loss: 0.7555407028781399.
Epoch: 12/100, Loss: 0.732449693967832.
Epoch: 13/100, Loss: 0.7089717487271912.
Epoch: 14/100, Loss: 0.7027563287818903.
Epoch: 15/100, Loss: 0.6863737767263636.
Epoch: 16/100, Loss: 0.6715523175211827.
Epoch: 17/100, Loss: 0.6613546261406941.
Epoch: 18/100, Loss: 0.6538321728318774.
Epoch: 19/100, Loss: 0.6530761738751558.
Epoch: 20/100, Loss: 0.6397975632986144.
Epoch: 21/100, Loss: 0.6437754068331963.
Epoch: 22/100, Loss: 0.6369801189453188.
Epoch: 23/100, Loss: 0.6292841480704584.
Epoch: 24/100, Loss: 0.6218728180433013.
Epoch: 25/100, Loss: 0.619318