## JSMP: Pytorch Model- Inference

This Notebook uses pytorch to make inference using autoencoder model.<br/>

I will release training code soon.

## Importing Libraries ðŸ“—

In [None]:
import os
import gc
import sys
import time
import tqdm
import random
import numpy as np
import pandas as pd
import seaborn as sns
import datatable as dt
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

import pickle

from colorama import Fore, Back, Style
y_ = Fore.YELLOW
r_ = Fore.RED
g_ = Fore.GREEN
b_ = Fore.BLUE
m_ = Fore.MAGENTA
c_ = Fore.CYAN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import roc_auc_score
from sklearn.impute import SimpleImputer
from sklearn.model_selection import KFold, StratifiedKFold

In [None]:
folder_path = '../input/jane-street-market-prediction/'
sample = pd.read_csv(folder_path + 'example_sample_submission.csv')
test_data = pd.read_csv(folder_path + 'example_test.csv')

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [None]:
features = [f'feature_{i}' for i in range(130)]

config = {
    "epochs":100,
    "train_batch_size":1024,
    "valid_batch_size":1024,
    "test_batch_size":64,
    "nfolds":5, 
    "learning_rate":0.0005,
    
    'encoder_input':len(features),
    "input_size1":len(features), #raw input
    "input_size2":128, #encoded input
    'output_size':5,
}

data_path = '../input/jsmp-pytorch-bottelneck-model-train'
train_data_mean = pd.read_csv(f"{data_path}/train_data_mean.csv").to_numpy()

In [None]:
class GaussianNoise(nn.Module):
    
    def __init__(self,device,sigma=0.1, is_relative_detach=True):
        super().__init__()
        self.sigma = sigma
        self.is_relative_detach = is_relative_detach
        self.noise = torch.tensor(0,dtype=torch.float).to(device)

    def forward(self, x):
        if self.training and self.sigma != 0:
            scale = self.sigma * x.detach() if self.is_relative_detach else self.sigma * x
            sampled_noise = self.noise.repeat(*x.size()).normal_() * scale
            x = x + sampled_noise
        return x 
    
class Autoencoder(nn.Module):
    def __init__(self,input_size,gaussian_noise,noise_level= 0.1):
        super(Autoencoder,self).__init__()
        
        #encoder
        self.noise_level = noise_level
        self.gaussian_noise = gaussian_noise
        self.layer1 = self.batch_linear(input_size,768,nn.ReLU)
        self.layer2 = self.batch_linear(768,768,nn.ReLU)
        self.layer3 = self.batch_linear(768,128,nn.ReLU)

        #decoder
        self.layer4 = self.batch_linear(128,768,nn.ReLU)
        self.layer5 = self.batch_linear(768,768,nn.ReLU)
        self.layer6 = self.batch_linear(768,input_size)
                
    def swap_noise(self,x):
        batch_size = x.shape[0]
        num_columns = x.shape[1]

        random_rows = torch.randint(low = 0,high = batch_size,size=(batch_size,))
        t = x[random_rows]
        random_swap = torch.rand(num_columns) < self.noise_level
        x[:,random_swap] = t[:,random_swap]
        return x
    
    def batch_linear(self,inp,out,activation=None):
        if activation:
            return nn.Sequential(nn.BatchNorm1d(inp),nn.Linear(inp,out),activation())
        else:
            return nn.Sequential(nn.BatchNorm1d(inp),nn.Linear(inp,out))
            
    def forward(self,x):
        x = self.gaussian_noise(x)
#         x = self.swap_noise(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        
        return x
    
    def get_encoder(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        return x

In [None]:
class Model(nn.Module):
    def __init__(self,input_size1,input_size2,output_size):
        super(Model,self).__init__()
        
        #fc1
        self.layer1 = self.batch_linear_drop(input_size1,256,0.3,activation=nn.ELU)
        self.layer2 = self.batch_linear(256,128,activation= nn.ELU)
        
        #resblock1
        self.layer3 = self.batch_linear_drop(input_size1+128,256,0.1,nn.ReLU)
        
        #resblock2
        self.layer4 = self.batch_linear_drop(input_size1+384,256,0.1,nn.ELU)
        self.layer5 = self.batch_linear(256,128,nn.ReLU)
        
        #resblock3
        self.layer6 = self.batch_linear_drop(384,256,0.1,nn.ELU)
        self.layer7 = self.batch_linear(256,128,nn.ReLU)
        
        #resblock4
        self.layer8 = self.batch_linear_drop(512,256,0.1,nn.ELU)
        self.layer9 = self.batch_linear(256,128,nn.ReLU)
        
        #resblock5
        self.layer10 = self.batch_linear_drop(384,256,0.1,nn.ELU)
        self.layer11 = self.batch_linear(256,128,nn.ReLU)
                
        #fc2
        self.layer12 = self.batch_linear(768,256,nn.SELU)
        self.layer13 = self.batch_linear(256,128,nn.SELU)
        self.layer14 = nn.Sequential(nn.BatchNorm1d(128),nn.Linear(128,output_size))
        
    def batch_linear_drop(self,inp,out,drop,activation=None):
        if activation:
            return nn.Sequential(nn.BatchNorm1d(inp),nn.Dropout(drop),nn.Linear(inp,out),activation())
        else:
            return nn.Sequential(nn.BatchNorm1d(inp),nn.Dropout(drop),nn.Linear(inp,out))
            
    def batch_linear(self,inp,out,activation=None):
        if activation:
            return nn.Sequential(nn.BatchNorm1d(inp),nn.Linear(inp,out),activation())
        else:
            return nn.Sequential(nn.BatchNorm1d(inp),nn.Linear(inp,out))
    
    def forward(self,input1,input2):
        #fc1
        x1 = self.layer1(input1)
        x1 = self.layer2(x1)
        
        #resblock1
        x2 = torch.cat([input1,x1],1)
        x2 = self.layer3(x2)
        
        #resblock2
        x3 = torch.cat([input1,x1,x2],1)
        x3 = self.layer4(x3)
        x3 = self.layer5(x3)
        
        #resblock3
        x4 = torch.cat([x2,x3],1)
        x4 = self.layer6(x4)
        x4 = self.layer7(x4)
        
        #resblock4
        x5 = torch.cat([x2,x3,x4],1)
        x5 = self.layer8(x5)
        x5 = self.layer9(x5)
        
        #resblock5
        x6 = torch.cat([x3,x4,x5],1)
        x6 = self.layer10(x6)
        x6 = self.layer11(x6)
        
        #fc2
        x7 = torch.cat([x1,x2,x3,x5,x6],1)
        x7 = self.layer12(x7)
        x7 = self.layer13(x7)
        x7 = self.layer14(x7)
        
        return x7

# class Model(nn.Module):
#     def __init__(self,input_size1,input_size2,output_size):
#         super(Model,self).__init__()
#         total_input_size = input_size1+input_size2
#         hidden_size = 256
        
#         self.layer1 = self.batch_linear_drop(total_input_size,hidden_size,0.2,nn.LeakyReLU)
#         self.layer2 = self.batch_linear_drop(total_input_size+hidden_size,hidden_size,0.2,nn.LeakyReLU)
#         self.layer3 = self.batch_linear_drop(2*hidden_size,hidden_size,0.2,nn.LeakyReLU)
#         self.layer4 = self.batch_linear_drop(2*hidden_size,hidden_size,0.2,nn.LeakyReLU)
#         self.layer5 = self.batch_linear_drop(2*hidden_size,hidden_size,0.2,nn.LeakyReLU)
#         self.layer6 = self.batch_linear_drop(2*hidden_size,hidden_size,0.2,nn.LeakyReLU)
#         self.layer7 = self.batch_linear_drop(2*hidden_size,hidden_size,0.2,nn.LeakyReLU)
#         self.layer8 = self.batch_linear(2*hidden_size,output_size)
        
#     def batch_linear_drop(self,inp,out,drop,activation=None):
#         if activation:
#             return nn.Sequential(nn.BatchNorm1d(inp),nn.Dropout(drop),nn.Linear(inp,out),activation())
#         else:
#             return nn.Sequential(nn.BatchNorm1d(inp),nn.Dropout(drop),nn.Linear(inp,out))
            
#     def batch_linear(self,inp,out,activation=None):
#         if activation:
#             return nn.Sequential(nn.BatchNorm1d(inp),nn.Linear(inp,out),activation())
#         else:
#             return nn.Sequential(nn.BatchNorm1d(inp),nn.Linear(inp,out))
    
#     def forward(self,input1,input2):
#         x1 = torch.cat([input1,input2],1)
#         x1 = self.layer1(x1)
        
#         x2 = torch.cat([x1,input1,input2],1)
#         x2 = self.layer2(x2)
        
#         x3 = torch.cat([x2,x1],1)
#         x3 = self.layer3(x3)
        
#         x4 = torch.cat([x3,x2],1)
#         x4 = self.layer4(x4)
        
#         x5 = torch.cat([x4,x3],1)
#         x5 = self.layer5(x5)
        
#         x6 = torch.cat([x5,x4],1)
#         x6 = self.layer6(x6)
        
#         x7 = torch.cat([x6,x5],1)
#         x7 = self.layer7(x7)
        
#         x8 = torch.cat([x7,x6],1)
#         x8 = self.layer8(x8)
        
#         return x8

In [None]:
#loading models and scaler
data_path = '../input/jsmp-pytorch-bottelneck-model-train'
# scaler = pickle.load(open(f'{data_path}/scaler.pkl','rb'))
models = list()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(config['nfolds']):
    model = Model(config['input_size1'],config['input_size2'],config['output_size'])
    model.load_state_dict(torch.load(f"{data_path}/model{i}.bin",map_location=device))
    model.to(device)
    model.eval()
    models.append(model)

gaussian_noise = GaussianNoise(device)
encoder = Autoencoder(config['input_size1'],gaussian_noise)
encoder.load_state_dict(torch.load(f'{data_path}/encoder.bin',map_location=device))
encoder.to(device)
encoder.eval();

In [None]:
def inference(test):
    all_prediction = np.zeros((test.shape[0],5))
    inputs = torch.tensor(test,dtype=torch.float)

    for model in models:
        inputs = inputs.to(device,dtype=torch.float)
        encoder_inp = encoder.get_encoder(inputs)
        outputs = model(inputs,encoder_inp) 
        all_prediction += outputs.sigmoid().detach().cpu().numpy()

    return all_prediction/len(models)

In [None]:
test_data = pd.read_csv(folder_path + 'example_test.csv')
test_data.fillna(0,inplace=True)
test_data = test_data[features].to_numpy()
# test_data = scaler.transform(test_data[features].to_numpy())
predictions = inference(test_data)
predictions = predictions.mean(axis=1)
sns.distplot(predictions);

In [None]:
import janestreet
env = janestreet.make_env()
iter_test = env.iter_test()

In [None]:
%%time
all_predictions = list()
for (test_df, sample_prediction_df) in iter_test:
    
    if test_df['weight'].item() != 0:
        test_df.fillna(0,inplace=True)
        predictions = inference(test_df[features].to_numpy())
        prediction = np.mean(predictions)
        all_predictions.append(prediction)
        sample_prediction_df.action =  np.where(prediction >= 0.5, 1, 0).astype(int)
    else:
        sample_prediction_df.action = 0
        
    env.predict(sample_prediction_df)

In [None]:
# env.predict(sample)

In [None]:
submission = pd.read_csv('./submission.csv')
submission.head()

In [None]:
plt.figure(figsize=(15,7))
plt.subplot(121)
sns.distplot(all_predictions)
plt.title("Distplot of all the prediction")
plt.subplot(122)
sns.countplot(submission.action)
plt.title("Count plot of action in prediction")
plt.show()