In [None]:
import os
import gc
import json
import pickle
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import torch.nn as nn
import random
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

In [None]:
def seed_everything(s):
    random.seed(s)
    np.random.seed(s)
    torch.manual_seed(s)
    torch.cuda.manual_seed_all(s)
    torch.cuda.manual_seed(s)
    torch.backends.cudnn.deterministic=True
    
seed_everything(10)

Dataset Modified : 
https://www.kaggle.com/narendra/optiver-features-dataset/log?scriptVersionId=71337174


1. Version:9 --> Added Penalization loss for the Negative outputs
2. Version:10 --> Added Featuers for individual Buckets & Made TimeSlices: 30 seconds
3. Version : 13 --> Added std features for price movements.
4. Version : 24 --> Added rv1, rv2 to the dataset

In [None]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
%%time
train=pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')
train_features=pd.read_pickle('../input/optiverdataset/features.pkl')
train=train.merge(train_features)

del train_features
gc.collect()

train.head()

In [None]:
train.columns

CONFIG

In [None]:
class config:
    num_buckets= 20
    num_features= 26
    epochs=30

# Dataset

In [None]:
class OptiverDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.df=df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row=self.df.iloc[idx]
        bucket_num=row.bucket_num
        bucket_features=row.bucket_features
        
        X=torch.zeros((config.num_buckets, config.num_features), dtype=torch.float32)
        y=row.target
        for i, bucket_id in enumerate(bucket_num):
            X[bucket_id] = torch.tensor(bucket_features[i], dtype=torch.float32)
        y=torch.tensor(y, dtype=torch.float32)
        Xmax, _=torch.max(X, dim=0)
        Xmax=Xmax.view(1, -1)
        Xmax[Xmax==0]=1
        X_norm=X*(1/Xmax)
        return (X_norm, y)

# Model

In [None]:
class FFN(nn.Module):
    def __init__(self, sz):
        super().__init__()
        self.linear=nn.Linear(sz, sz)
        self.bn=nn.BatchNorm1d(sz)
        self.silu=nn.SiLU()
        self.dropout=nn.Dropout(0.1)
        
    def forward(self, x):
        x=self.bn(x)
        x=self.silu(x)
        x=self.dropout(x)
        x=self.linear(x)
        
        return x
    
class OptiverModel(nn.Module):
    def __init__(self):
        super().__init__()
        hsize=150
        self.pre_bn=nn.BatchNorm1d(config.num_buckets , config.num_features)
        self.gru=nn.GRU(config.num_features, hsize, 2, batch_first=True, dropout=0.1)
        
        self.ffn1=FFN(hsize)
        self.ffn2=FFN(hsize)
        self.out=nn.Linear(hsize, 1)
        
    def forward(self, x):
        x=self.pre_bn(x)
        _, h=self.gru(x)
        h=h[1].squeeze(0)
        
        y=self.ffn1(h)
        y=self.ffn2(h)
        
        yout=self.out(y)
        yout=yout.view(-1)
        return y, yout

In [None]:
model=OptiverModel()
model=model.to(device)
print(model)

In [None]:
train.head()

In [None]:
train_idx=train.sample(frac=0.8, random_state=20).index
val_idx=train[~train.index.isin(train_idx)].index

print(len(train_idx), len(val_idx))

train_df=train[train.index.isin(train_idx)].copy()
val_df=train[train.index.isin(val_idx)].copy()

In [None]:
train_dataset=OptiverDataset(train_df)
val_dataset=OptiverDataset(val_df)


train_dataloader=torch.utils.data.DataLoader(train_dataset,
                                             batch_size=512,
                                             shuffle=True,
                                             drop_last=True,
                                             pin_memory=True)

val_dataloader=torch.utils.data.DataLoader(val_dataset,
                                           batch_size=2048,
                                           shuffle=False,
                                           drop_last=False)

print(len(train_dataloader), len(val_dataloader))

# Losses

In [None]:
def MSE(y, yhat):
    yerr=y-yhat
    yerr=torch.square(yerr)
    return yerr.mean()

def RMSE(y, yhat):
    return torch.sqrt( torch.mean((y-yhat)**2) )

def RMSPE(y, yhat):
    err=(y-yhat)
    err/=y
    err=torch.square(err)
    return torch.sqrt( torch.mean(err) )

# Trainer

In [None]:
class Trainer:
    def __init__(self, epochs, model, train_dataloader, val_dataloader):
        self.epochs=epochs
        self.model=model
        self.train_dataloader=train_dataloader
        self.val_dataloader=val_dataloader
        
        self.optimizer=torch.optim.AdamW(model.parameters(), lr=7e-5, weight_decay=1e-5)
        self.schedular=torch.optim.lr_scheduler.OneCycleLR(self.optimizer, 
                                                           max_lr=7e-5,
                                                           epochs=self.epochs,
                                                           steps_per_epoch=len(train_dataloader))
        
        self.best_rmse=None
        self.best_rmspe=None
    
    def evaluate(self):
        ytrue=[]
        ypred=[]
        for X, y in self.val_dataloader:
            X=X.to(device)
            with torch.no_grad():
                _, yhat=self.model(X)
                yhat=yhat.detach().cpu().tolist()
                ypred+=yhat
                ytrue+=y.cpu().tolist()
        
        ytrue=torch.tensor(ytrue)
        ypred=torch.tensor(ypred)
        eval_rmse=RMSE(ytrue, ypred)
        eval_rmspe=RMSPE(ytrue, ypred)
        return (eval_rmse.item(), eval_rmspe.item())
    
    def train_ops(self, X, y):
        _,yhat =self.model(X)
        rmse_loss=RMSE(y, yhat)
        rmspe_loss=RMSPE(y.detach(), yhat.detach())
        
        self.optimizer.zero_grad()
        rmse_loss.backward()
        
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1)
        self.optimizer.step()
        self.schedular.step()
        
        return (rmse_loss.item(), rmspe_loss.item())
                
    def train(self):
        for e in range(self.epochs):
            train_epoch_rmse=[]
            train_epoch_rmspe=[]
            
            self.model.train()
            for i, (X,y) in enumerate(self.train_dataloader):
                X=X.to(device)
                y=y.to(device)
                
                rmse_loss, rmspe_loss=self.train_ops(X, y)
                train_epoch_rmse.append(rmse_loss)
                train_epoch_rmspe.append(rmspe_loss)
                
            (eval_rmse, eval_rmspe) = self.evaluate()

            if (self.best_rmse is None) or (self.best_rmse > eval_rmse):
                torch.save(self.model, 'best_rmse.pt')
            if (self.best_rmspe is None) or (self.best_rmspe > eval_rmspe):
                torch.save(self.model, 'best_rmspe.pt')

            print("epoch:{} - Train RMSE Loss:{:.4f} | Train RMSPE Loss:{:.4f}".format(e, np.mean(train_epoch_rmse),
                                                                                       np.mean(train_epoch_rmspe)))
            print("Val RMSE:{:.4f} | Val RMSPE:{:.4f}".format(eval_rmse, eval_rmspe))
        
    
    def lr_range_test(self):
        min_lr=1e-6
        max_lr=1e-3
        optimizer=torch.optim.AdamW(self.model.parameters(), lr=min_lr, weight_decay=1e-5)
        scheduler=torch.optim.lr_scheduler.StepLR(optimizer, 1, 1.02)
        
        losses=[]
        lrs=[]
        self.model.train()
        for _ in range(50):
            for i, (X, y) in enumerate(self.train_dataloader):
                optimizer.zero_grad()
                _, yhat=self.model(X)
                mse_loss=RMSE(y, yhat)
                
                loss=mse_loss
                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1)
                optimizer.step()
                scheduler.step()

                losses.append(loss.item())
                lrs.append(scheduler.get_last_lr()[0])

                if i%10==0:
                    print(i, '-->', lrs[-1], losses[-1])
                if lrs[-1] > max_lr:
                    break
            if lrs[-1] > max_lr:
                break
        return lrs, losses

In [None]:
%%time
model=OptiverModel()
model=model.to(device)


trainer=Trainer(config.epochs, model, train_dataloader, val_dataloader)
trainer.train()

In [None]:
#model=OptiverModel()
#print(model)
#trainer=Trainer(5, model, train_dataloader, val_dataloader)
#lrs, losses=trainer.lr_range_test()

In [None]:
#lmt=390
#plt.plot(lrs[:lmt], losses[:lmt])
#plt.xticks(rotation=45)
#plt.show()