In [1]:
# lib
import copy
import datatable as dt
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import random
import seaborn as sns
import time

import torch
import torch.nn as nn
from glob import glob

def seed_everything(seed=7777):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)  
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    #tf.random.set_seed(seed)
    np.random.seed(seed)


SEED = 7777
seed_everything(SEED)
pd.set_option('display.max_columns', 200)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
data_path = '../input/jane-street-market-prediction/'


def save_pickle(dic, save_path):
    with open(save_path, 'wb') as f:
    # with gzip.open(save_path, 'wb') as f:
        pickle.dump(dic, f)

def load_pickle(load_path):
    with open(load_path, 'rb') as f:
    # with gzip.open(load_path, 'rb') as f:
        dic = pickle.load(f)
    return dic

In [3]:
features = [f'feature_{i}' for i in range(130)]
train_df_median = pd.read_csv('../input/js-lgbm-cls/train_df_median.csv')
train_df_median = train_df_median[features].values

In [4]:
class Mish(nn.Module):
    class MishAutoFn(torch.autograd.Function):
        """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
        Experimental memory-efficient variant
        """
        @staticmethod
        def forward(ctx, x):
            ctx.save_for_backward(x)
            y = x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
            return y

        @staticmethod
        def backward(ctx, grad_output):
            x = ctx.saved_tensors[0]
            x_sigmoid = torch.sigmoid(x)
            x_tanh_sp = F.softplus(x).tanh()
            return grad_output.mul(x_tanh_sp + x * x_sigmoid *
                                   (1 - x_tanh_sp * x_tanh_sp))

    def __init__(self, inplace: bool = False):
        super(Mish, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        return self.MishAutoFn.apply(x)


class Block(nn.Module):
    def __init__(self, in_dim, out_dim, dropout_rate=0.2):
        super(Block, self).__init__()
        self.dense = nn.Linear(in_dim, out_dim)
        self.batch_norm = nn.BatchNorm1d(out_dim)
        self.dropout = nn.Dropout(dropout_rate)
        self.mish = nn.LeakyReLU(negative_slope=0.01, inplace=True)#Mish()

    def forward(self, x):
        x1 = self.dense(x)
        x1 = self.mish(x1)
        x1 = self.batch_norm(x1)
        x1 = self.dropout(x1)
        return x, x1


class Model(nn.Module):
    def __init__(self, in_dim, out_dim, hidden_size=256, dropout_rate=0.2):
        super(Model, self).__init__()
        self.batch_norm0 = nn.BatchNorm1d(in_dim)
        self.dropout0 = nn.Dropout(dropout_rate)

        self.block1 = Block(in_dim, hidden_size)

        self.block2 = Block(hidden_size + in_dim, hidden_size)

        self.block3 = Block(hidden_size + hidden_size, hidden_size)

        self.block4 = Block(hidden_size + hidden_size, hidden_size)

        self.dense5 = nn.Linear(hidden_size + hidden_size, out_dim)
        

    def forward(self, x):
        x = self.batch_norm0(x)
        x = self.dropout0(x)

        x, x1 = self.block1(x)
        x = torch.cat([x, x1], 1)

        x, x2 = self.block2(x)
        x = torch.cat([x1, x2], 1)
        
        x, x3 = self.block3(x)
        x = torch.cat([x2, x3], 1)
        
        x, x4 = self.block4(x)
        x = torch.cat([x3, x4], 1)

        x = self.dense5(x)

        return x

In [5]:
checkpoints = [torch.load(path, map_location=torch.device('cpu'))
                       for path in glob( '../input/js-pytorch/fold*.pth')]


model = Model(132, 5)
model.eval()
model.to(device)


models = [copy.deepcopy(model) for i in range(5)]
for idx, checkpoint in enumerate(checkpoints):
    models[idx].load_state_dict(checkpoint['model_state_dict'])

In [6]:
th = 0.500
esp = 1e-8

import janestreet
env = janestreet.make_env()

for (test_df, pred_df) in env.iter_test():
    if test_df['weight'].item() > 0:
        x_test = test_df.loc[:, features].values
        
        # fill na
        if np.isnan(x_test[:, 1:].sum()):
            x_test = np.nan_to_num(x_test) + np.isnan(x_test) * train_df_median
            
        feature_41_42_43 = x_test[:, 41] + x_test[:, 42] + x_test[:, 43]
        feature_1_2 = (x_test[:, 1] + esp) / (x_test[:, 2] + esp)
        
        x_test = np.concatenate((
            x_test,
            np.array(feature_41_42_43).reshape(x_test.shape[0], 1),
            np.array(feature_1_2).reshape(x_test.shape[0], 1),
        ), axis=1)
        
        x_test = torch.tensor(x_test, device=device, dtype=torch.float)
        with torch.no_grad():
            pred = np.median(np.stack([model(x_test)[:, 4].sigmoid().cpu().numpy() for model in models]), axis=0)
        pred_df.action = np.where(pred >= th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    
    env.predict(pred_df)