In [None]:
import os
import time
import pickle
import random
from tqdm import tqdm

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [None]:
CACHE_PATH = '../input/mlp012003weights'

def save_pickle(dic, save_path):
    with open(save_path, 'wb') as f:
        pickle.dump(dic, f)

def load_pickle(load_path):
    with open(load_path, 'rb') as f:
        message_dict = pickle.load(f)
    return message_dict

f_mean = np.load(f'{CACHE_PATH}/f_mean_online.npy')

In [None]:
# list of the features
feat_cols = [f'feature_{i}' for i in range(130)]

# list of all the features
all_feat_cols = [col for col in feat_cols]

# add two more features to the feature list
all_feat_cols.extend(['cross_41_42_43', 'cross_1_2'])

# resp 1,2,3,4
target_cols = ['action', 'action_1', 'action_2', 'action_3', 'action_4']

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss, MSELoss
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

In [None]:

class Model(nn.Module):
    
    def __init__(self):
        
        super(Model, self).__init__()
        self.batch_norm0 = nn.BatchNorm1d(len(all_feat_cols))
        
        self.dropout0 = nn.Dropout(0.8) # 0.2

        dropout_rate = 0.5 # 0.2
        hidden_size = 256
        
        self.dense1 = nn.Linear(len(all_feat_cols), hidden_size)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size)
        self.dropout1 = nn.Dropout(dropout_rate)

        self.dense2 = nn.Linear(hidden_size+len(all_feat_cols), hidden_size)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(dropout_rate)

        self.dense3 = nn.Linear(hidden_size+hidden_size, hidden_size)
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(dropout_rate)

        self.dense4 = nn.Linear(hidden_size+hidden_size, hidden_size)
        self.batch_norm4 = nn.BatchNorm1d(hidden_size)
        self.dropout4 = nn.Dropout(dropout_rate)

        self.dense5 = nn.Linear(hidden_size+hidden_size, len(target_cols))

        self.Relu = nn.ReLU(inplace=True)
        self.PReLU = nn.PReLU()
        self.LeakyReLU = nn.LeakyReLU(negative_slope=0.01, inplace=True)

        self.RReLU = nn.RReLU()

    def forward(self, x):
        
        x = self.batch_norm0(x)
        x = self.dropout0(x)

        x1 = self.dense1(x)
        x1 = self.batch_norm1(x1)

        x1 = self.LeakyReLU(x1)
        x1 = self.dropout1(x1)

        x = torch.cat([x, x1], 1)

        x2 = self.dense2(x)
        x2 = self.batch_norm2(x2)

        x2 = self.LeakyReLU(x2)
        x2 = self.dropout2(x2)

        x = torch.cat([x1, x2], 1)

        x3 = self.dense3(x)
        x3 = self.batch_norm3(x3)

        x3 = self.LeakyReLU(x3)
        x3 = self.dropout3(x3)

        x = torch.cat([x2, x3], 1)

        x4 = self.dense4(x)
        x4 = self.batch_norm4(x4)

        x4 = self.LeakyReLU(x4)
        x4 = self.dropout4(x4)

        x = torch.cat([x3, x4], 1)

        x = self.dense5(x)

        return x

In [None]:
if torch.cuda.is_available():
    print('using device: cuda')
    torch.device("cuda:0")
else:
    print('using device: cpu')
    device = torch.device('cpu')

In [None]:
NFOLDS = 5

model_list = []
tmp = np.zeros(len(feat_cols))
for _fold in range(NFOLDS):
    torch.cuda.empty_cache()
    model = Model()
    model.to(device)
    model_weights = f"{CACHE_PATH}/online_model{_fold}.pth"
    model.load_state_dict(torch.load(model_weights, map_location=device))
    model.eval()
    model_list.append(model)

In [None]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import tensorflow_addons as tfa

In [None]:
SEED = 1111

np.random.seed(SEED)

def create_mlp(
    num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate
):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)
    
    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tfa.optimizers.RectifiedAdam(learning_rate=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        metrics=tf.keras.metrics.AUC(name="AUC"),
    )

    return model

epochs = 300
batch_size = 4096
hidden_units = [160, 160, 160]
dropout_rates = [0.2, 0.2, 0.2, 0.2]
label_smoothing = 1e-2
learning_rate = 1e-3

tf.keras.backend.clear_session()
tf.random.set_seed(SEED)
clf = create_mlp(len(feat_cols), 5, hidden_units, dropout_rates, label_smoothing, learning_rate)

In [None]:
# Fit the model and save it with 
#clf.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2)
#clf.save(f'model.h5')

# Load the Fitted model
# !ls ../input/jane-street-with-keras-nn-overfit/
clf.load_weights('../input/jane-street-with-keras-nn-overfit/model.h5')

# If you have several models, the you can store into a list
#tf_models = [clf]

In [None]:
th = 0.5
import janestreet
janestreet.competition.make_env.__called__ = False

env = janestreet.make_env()
env_iter = env.iter_test()

for (test_df, pred_df) in tqdm(env_iter):

    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, feat_cols].values
        
        if np.isnan(x_tt.sum()):
            
            x_tt = np.nan_to_num(x_tt) + np.isnan(x_tt) * f_mean

    
        cross_41_42_43 = x_tt[:, 41] + x_tt[:, 42] + x_tt[:, 43]
        cross_1_2 = x_tt[:, 1] / (x_tt[:, 2] + 1e-5)
        feature_inp = np.concatenate((x_tt, np.array(cross_41_42_43).reshape(x_tt.shape[0], 1), np.array(cross_1_2).reshape(x_tt.shape[0], 1),), axis=1)

        
        torch_pred = np.zeros((1, len(target_cols)))
        for model in model_list:
            torch_pred += model(torch.tensor(feature_inp, dtype=torch.float).to(device)).sigmoid().detach().cpu().numpy() / NFOLDS
        torch_pred = np.median(torch_pred)

        tf_pred = np.median(clf(x_tt))

        
        # PyTorch and TensorFlow Average prediction
        pred = torch_pred * 0.4 + tf_pred * 0.6
        pred_df.action = np.where(pred >= th, 1, 0).astype(int)
        
    else:
        pred_df.action = 0
        
    env.predict(pred_df)