# Jane Street: Neural Network baseline

In [None]:
# Imports
import time, os
from typing import Dict
import gc
import numpy as np
import pandas as pd
from numba import njit
import janestreet
from tempfile import gettempdir
import matplotlib.pyplot as plt
from tqdm import tqdm
from prettytable import PrettyTable
from pathlib import Path

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch import nn, optim
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.utils.data import TensorDataset, Dataset

import seaborn as sns
import sklearn
from sklearn.preprocessing import StandardScaler, minmax_scale
from sklearn.decomposition import PCA
import xgboost as xgb
from sklearn.impute import SimpleImputer


# **Configuration**

In [None]:
cfg = {
    'model_params': {
        'model_architecture': 'nn',
        'model_name': "nn_output",
        'lr': 1e-3,
        'weight_path': '',#'../input/jane-street-neural-network-1200000/nn_model_state_20000.pth',
        'train': True,
        'validate': False
    },

    'train_params': {
        'max_num_steps': 10000,
        'batch_size': 4096,
    }
}


# Preprocessing

In [None]:
%%time

print('Loading...')
train = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
features = [c for c in train.columns if 'feature' in c]

print('Filling...')
f_mean = train[features[1:]].mean()
train = train.loc[train.weight > 0].reset_index(drop = True)
train[features[1:]] = train[features[1:]].fillna(f_mean)
train['action'] = (train['resp'] > 0).astype('int')

print('Converting...')
np.save('f_mean.npy', f_mean.values)

print('Scaling...')
scaler = StandardScaler()
scaler.fit(train[features])
train_featues_norm = scaler.transform(train[features])

print('Finish.')

In [None]:
class trainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)




In [None]:
if cfg["model_params"]["train"]:
    train_target = torch.tensor(train['action'].values.astype(np.float32))
    train_features = torch.tensor(train_featues_norm.astype(np.float32)) 

    train_data = trainData(train_features, train_target)
    train_dataloader = DataLoader(dataset = train_data, batch_size = cfg["train_params"]["batch_size"], shuffle = True)

    print(len(train_dataloader))

# Training

In [None]:
def binary_acc(y_pred, y_test):
    y_pred = torch.where(y_pred >= 0.5, 1, 0).int()
    correct_results_sum = (y_pred == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [None]:
# define the MLP architecture
class JSMMLP(nn.Module):
    def __init__(self):
        super(JSMMLP, self).__init__()
        self.fc1 = nn.Linear(130, 1000)
        self.batchnorm1 = nn.BatchNorm1d(1000)
        self.fc2 = nn.Linear(1000, 2000)
        self.batchnorm2 = nn.BatchNorm1d(2000)
        self.fc3 = nn.Linear(2000, 2000)
        self.batchnorm3 = nn.BatchNorm1d(2000)
        self.fc4 = nn.Linear(2000, 2000)
        self.batchnorm4 = nn.BatchNorm1d(2000)
        self.fc5 = nn.Linear(2000, 1000)
        self.batchnorm5 = nn.BatchNorm1d(1000)
        self.fc6 = nn.Linear(1000, 500)
        self.batchnorm6 = nn.BatchNorm1d(500)
        self.fc7 = nn.Linear(500, 100)
        self.batchnorm7 = nn.BatchNorm1d(100)
        self.fc_out = nn.Linear(100, 1)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.batchnorm1(x)
        x = F.relu(self.fc2(x))
        x = self.batchnorm2(x)
        x = F.relu(self.fc3(x))
        x = self.batchnorm3(x)
        x = F.relu(self.fc4(x))
        x = self.batchnorm4(x)
        x = self.dropout(x)
        x = F.relu(self.fc5(x))
        x = self.batchnorm5(x)
        x = self.dropout(x)
        x = F.relu(self.fc6(x))
        x = self.batchnorm6(x)
        x = self.dropout(x)
        x = F.relu(self.fc7(x))
        x = self.batchnorm7(x)
        x = self.dropout(x)
        x = torch.sigmoid(self.fc_out(x))
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
#Instantiate model
model = JSMMLP().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=cfg["model_params"]["lr"]) 

# load weight if there is a pretrained model
weight_path = cfg["model_params"]["weight_path"]
if weight_path != '':
    model_state = torch.load(weight_path, map_location=device)
    model.load_state_dict(model_state['state_dict'])
    optimizer.load_state_dict(model_state['optimizer'])
    iteration = model_state['iteration']
else:
    iteration = 0
#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
#--scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


In [None]:
if cfg["model_params"]["train"]:
    progress_bar = tqdm( range(iteration, iteration+cfg["train_params"]["max_num_steps"]))
    losses_train = []
    acc_train = []
    iterations = []
    metrics = []
    times = []
    model_name = cfg["model_params"]["model_name"]
    start = time.time()

    tr_it = iter(train_dataloader)
    for i in progress_bar:
        try:
            x_data, y_data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            x_data, y_data = next(tr_it)
        

        model.train()
        torch.set_grad_enabled(True)

        #Move data to device
        inputs = x_data.to(device)
        targets = y_data.to(device)
        
        #Forward model
        output = model(inputs).view(targets.shape)
  
        loss = criterion(output, targets)
        acc = binary_acc(output, targets)
        
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # step the scheduler
        #--scheduler.step()

        losses_train.append(loss.item())
        acc_train.append(acc.item())

        progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)} \
        | Acc: {acc.item()} Acc(avg): {np.mean(acc_train)}")

        if ((i != 0) and (i % 10000 == 0)):
            iterations.append(i)
            metrics.append(np.mean(losses_train))
            times.append((time.time()-start)/60)

    results = pd.DataFrame({"iterations": iterations, 'metrics (avg)': metrics, 'elapsed time (min)': times})
    results.to_csv(f'{model_name}_{cfg["train_params"]["max_num_steps"]}.csv', index=False)
    print(f"Total training time is {(time.time()-start)/60} mins")
    print(results.head())

# Example Test Prediction Analysis

In [None]:
f_mean = np.load('../input/jane-street-keras-model/f_mean.npy')

In [None]:
example_test = pd.read_csv('../input/jane-street-market-prediction/example_test.csv')
example_test = example_test.query('weight > 0').reset_index(drop = True)
example_test_features = example_test.loc[:, features].values

if np.isnan(example_test_features[:,1:].sum()):
    example_test_features[:,1:] = np.nan_to_num(example_test_features[:,1:]) + np.isnan(example_test_features[:,1:])*f_mean

#Scaling
test_featues_norm = scaler.transform(example_test_features)
test_featues_norm = torch.tensor(test_featues_norm.astype(np.float32)).to(device)

model.eval()
with torch.no_grad():
    y_test_pred = model(test_featues_norm)
    y_pred_tag = y_test_pred.cpu().numpy()

print(y_pred_tag.min())
print(y_pred_tag.max())
print(y_pred_tag.mean())
print(y_pred_tag.std())

plt.hist(y_pred_tag, bins = 100)
plt.show()

# Submitting

In [None]:
@njit
def fast_fillna(array, values):
    if np.isnan(array.sum()):
        array = np.where(np.isnan(array), values, array)
    return array

train.loc[0, features[1:]] = fast_fillna(train.loc[0, features[1:]].values, 0)

In [None]:
env = janestreet.make_env()
env_iter = env.iter_test()

In [None]:

for (test_df, pred_df) in tqdm(env_iter):
    if test_df['weight'].item() > 0:
        test_features = test_df.loc[:, features].values
        if np.isnan(test_features[:,1:].sum()):
            test_features[:,1:] = np.nan_to_num(test_features[:,1:]) + np.isnan(test_features[:,1:])*f_mean
            #test_features[0, :] = fast_fillna(test_features[0, :], f_mean)
            
        #Scaling
        test_featues_norm = scaler.transform(test_features)
        
        model.eval()
        with torch.no_grad():
            y_test_pred = model(torch.tensor(test_featues_norm.astype(np.float32)).to(device))
            y_test_pred = y_test_pred.cpu().numpy() 
            pred_df.action = np.where(y_test_pred >= 0.5, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)