In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Pytorch

In [None]:
import os
import time
import pickle
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import namedtuple
from sklearn.metrics import log_loss, roc_auc_score

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss, MSELoss
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

import warnings
warnings.filterwarnings ("ignore")

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

DATA_PATH = '../input/jane-street-market-prediction/'

NFOLDS = 5

TRAIN = False
CACHE_PATH = '../input/mlp012003weights'

def save_pickle(dic, save_path):
    with open(save_path, 'wb') as f:
    # with gzip.open(save_path, 'wb') as f:
        pickle.dump(dic, f)

def load_pickle(load_path):
    with open(load_path, 'rb') as f:
    # with gzip.open(load_path, 'rb') as f:
        message_dict = pickle.load(f)
    return message_dict

feat_cols = [f'feature_{i}' for i in range(130)]

target_cols = ['action', 'action_1', 'action_2', 'action_3', 'action_4']

f_mean = np.load(f'{CACHE_PATH}/f_mean_online.npy')


##### Making features
all_feat_cols = [col for col in feat_cols]
all_feat_cols.extend(['cross_41_42_43', 'cross_1_2'])

##### Model&Data fnc
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.batch_norm0 = nn.BatchNorm1d(len(all_feat_cols))
        self.dropout0 = nn.Dropout(0.2)

        dropout_rate = 0.2
        hidden_size = 256
        self.dense1 = nn.Linear(len(all_feat_cols), hidden_size)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size)
        self.dropout1 = nn.Dropout(dropout_rate)

        self.dense2 = nn.Linear(hidden_size+len(all_feat_cols), hidden_size)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(dropout_rate)

        self.dense3 = nn.Linear(hidden_size+hidden_size, hidden_size)
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(dropout_rate)

        self.dense4 = nn.Linear(hidden_size+hidden_size, hidden_size)
        self.batch_norm4 = nn.BatchNorm1d(hidden_size)
        self.dropout4 = nn.Dropout(dropout_rate)

        self.dense5 = nn.Linear(hidden_size+hidden_size, len(target_cols))

        self.Relu = nn.ReLU(inplace=True)
        self.PReLU = nn.PReLU()
        self.LeakyReLU = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        # self.GeLU = nn.GELU()
        self.RReLU = nn.RReLU()

    def forward(self, x):
        x = self.batch_norm0(x)
        x = self.dropout0(x)

        x1 = self.dense1(x)
        x1 = self.batch_norm1(x1)
        # x = F.relu(x)
        # x = self.PReLU(x)
        x1 = self.LeakyReLU(x1)
        x1 = self.dropout1(x1)

        x = torch.cat([x, x1], 1)

        x2 = self.dense2(x)
        x2 = self.batch_norm2(x2)
        # x = F.relu(x)
        # x = self.PReLU(x)
        x2 = self.LeakyReLU(x2)
        x2 = self.dropout2(x2)

        x = torch.cat([x1, x2], 1)

        x3 = self.dense3(x)
        x3 = self.batch_norm3(x3)
        # x = F.relu(x)
        # x = self.PReLU(x)
        x3 = self.LeakyReLU(x3)
        x3 = self.dropout3(x3)

        x = torch.cat([x2, x3], 1)

        x4 = self.dense4(x)
        x4 = self.batch_norm4(x4)
        # x = F.relu(x)
        # x = self.PReLU(x)
        x4 = self.LeakyReLU(x4)
        x4 = self.dropout4(x4)

        x = torch.cat([x3, x4], 1)

        x = self.dense5(x)

        return x

if True:
    device = torch.device("cuda:0")
    
    model_list = []
    tmp = np.zeros(len(feat_cols))
    for _fold in range(NFOLDS):
        torch.cuda.empty_cache()
        model = Model()
        model.to(device)
        model_weights = f"{CACHE_PATH}/online_model{_fold}.pth"

        model.load_state_dict(torch.load(model_weights, map_location=torch.device('cpu')))
        model.eval()
        model_list.append(model)

## Tensorflow MLP

In [None]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import numpy as np
import pandas as pd
from tqdm import tqdm
from random import choices

SEED = 1111

tf.random.set_seed(SEED)
np.random.seed(SEED)

# train = pd.read_csv('../input/jane-street-market-prediction/train.csv')
X_train = np.load('../input/lstm-final/X_train.npy')
y_train = np.load('../input/lstm-final/y_train.npy')

In [None]:
# train = train.query('date > 85').reset_index(drop = True)
# train = train[train['weight'] != 0]

# train['action'] = ((train['resp'].values) > 0).astype(int)
# features = [c for c in train.columns if "feature" in c]
# weights = train['weight']

# train.fillna(0, inplace=True)
# f_mean = np.mean(train[features[1:]].values,axis=1)

# resp_cols = ['resp', 'resp_1', 'resp_2', 'resp_3', 'resp_4']

# X_train = train.loc[:, train.columns.str.contains('feature')]
# y_train = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T

In [None]:
def create_mlp(
    num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate
):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)

    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        metrics=tf.keras.metrics.AUC(name="acc"),
    )

    return model

In [None]:
epochs = 200
batch_size = 4096
hidden_units = [160, 160, 160]
dropout_rates = [0.2, 0.2, 0.2, 0.2]
label_smoothing = 1e-2
learning_rate = 1e-3

mlp_model = create_mlp(130, 5, hidden_units, dropout_rates, label_smoothing, learning_rate)
# mlp_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)
mlp_model.load_weights('../input/jane-street-market-prediction-clfmlp/mlp.h5')

tf_models = [mlp_model]

### Tensorflow LSTM

In [None]:
def create_lstm(lookback, num_columns, num_labels, head_hidden_units,lstm_units,tail_hidden_units, dropout_rates, label_smoothing, learning_rate):
    
    assert (len(dropout_rates)==1 + len(head_hidden_units) + len(lstm_units)+ len(tail_hidden_units)), "number of dropout_rates is not equal to number of layers!"  
    
    inp = tf.keras.layers.Input(shape = (lookback,num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    
    # implement hidden_layers before LSTMs
    for i in range(len(head_hidden_units)): 
        x = tf.keras.layers.Dense(head_hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i+1])(x)
    # implement LSTMs
    for i in range(len(lstm_units)):
        x = tf.keras.layers.LSTM(lstm_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(dropout_rates[i+1+len(head_hidden_units)])(x) 
        
    # implement hidden_layers after LSTMs
    for i in range(len(tail_hidden_units)): 
        x = tf.keras.layers.Dense(tail_hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i+1+len(head_hidden_units)+len(lstm_units)])(x)    
        
    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation('sigmoid')(x)
    
    model = tf.keras.models.Model(inputs = inp, outputs = out)
    model.compile(optimizer = tf.optimizers.Adam(learning_rate=learning_rate),
                  loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = label_smoothing), 
                  metrics = tf.keras.metrics.AUC(name = 'acc'), 
                 )
    
    return model

In [None]:
lookback = 10
batch_size = 4096
head_hidden_units = [256]
lstm_units = [5]
tail_hidden_units = [512, 394]
dropout_rates = [0.10143786981358652, 0.19720339053599725, 0.1123435323 ,0.23148340929571917, 0.2157768967777311]

label_smoothing = 1e-2
learning_rate = 1e-3

In [None]:
model = create_lstm(
    lookback,
    len(feat_cols),
    5,
    head_hidden_units,
    lstm_units,
    tail_hidden_units,
    dropout_rates,
    label_smoothing,
    learning_rate
)

In [None]:
def prepare_dataset(X,y,window_length,batch_size, mode='train'):
    x_ds = tf.data.Dataset.from_tensor_slices(X) 
    x_ds = x_ds.window(window_length,shift=1,drop_remainder=True)
    x_ds = x_ds.flat_map(lambda window: window.batch(window_length))
    
    if mode=='train':
        y_ds = tf.data.Dataset.from_tensor_slices(y[window_length-1:])
        ds = tf.data.Dataset.zip((x_ds, y_ds))
        ds = ds.shuffle(10000).batch(batch_size)
    elif mode=='predict':
        ds = x_ds
        ds = ds.batch(batch_size)

    ds = ds.prefetch(30)
    return ds

In [None]:
dataset = prepare_dataset(X_train, y_train, lookback, batch_size, mode='train')

In [None]:
clf = create_lstm(lookback, len(feat_cols), 5, head_hidden_units,lstm_units,tail_hidden_units, dropout_rates, label_smoothing, learning_rate)
# hist = clf.fit(dataset, epochs=10, verbose=1)
clf.load_weights('../input/jane-street-market-prediction-clfmlp/lstm.h5')

In [None]:
f_mean = np.load('../input/mlp012003weights/f_mean_online.npy')

In [None]:
import janestreet

env = janestreet.make_env()
env_iter = env.iter_test()

In [None]:
if True:
    for (test_df, pred_df) in tqdm(env_iter):
        if test_df['weight'].item() > 0:
            x_tt = test_df.loc[:, feat_cols].values
            
            if np.isnan(x_tt.sum()):
                x_tt = np.nan_to_num(x_tt) + np.isnan(x_tt) * f_mean

            cross_41_42_43 = x_tt[:, 41] + x_tt[:, 42] + x_tt[:, 43]
            cross_1_2 = x_tt[:, 1] / (x_tt[:, 2] + 1e-5)
            
            feature_inp = np.concatenate((
                x_tt,
                np.array(cross_41_42_43).reshape(x_tt.shape[0], 1),
                np.array(cross_1_2).reshape(x_tt.shape[0], 1),
            ), axis=1)

            # torch_pred
            torch_pred = np.zeros((1, len(target_cols)))
            for model in model_list:
                torch_pred += model(torch.tensor(feature_inp, dtype=torch.float).to(device)).sigmoid().detach().cpu().numpy() / NFOLDS
            torch_pred = np.median(torch_pred)
            
            # tf_pred
            tf_pred = np.median(np.mean([model(x_tt, training = False).numpy() for model in tf_models],axis=0))
            
            # lstm_pred
            X = None
            data = test_df.loc[:, feat_cols].values
            if np.isnan(data.sum()):
                data = np.nan_to_num(data) + np.isnan(data) * f_mean

            if X is None:
                X = np.concatenate([data for _ in range(lookback)],axis=0)
            X = np.concatenate([X[1:],data] ,axis=0)

            if test_df['weight'].item() > 0:
                lstm_pred = 0.
                lstm_pred = np.median(np.mean([clf(X[np.newaxis,:], training = False).numpy()], axis=0))
                
            # avg
            pred_pr = torch_pred*0.2945 + tf_pred*0.4204 + lstm_pred*0.2851
            pred_df.action = np.where (pred_pr >= 0.5013, 1, 0).astype (int)
        else:
            pred_df.action = 0
        env.predict(pred_df)