In [None]:
import numpy as np
import pandas as pd

import janestreet

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input, Dense, Input, BatchNormalization, Dropout

import cudf

from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import GroupKFold
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import gc

In [None]:
train = cudf.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
features = [c for c in train.columns if 'feature' in c]

In [None]:
f_mean = train[features[1:]].mean()
train = train.query('weight > 0').reset_index(drop = True)
train[features[1:]] = train[features[1:]].fillna(f_mean)
train['action'] = ((train['weight'].values * train['resp'].values) > 0).astype('int')

train = train.to_pandas()
f_mean = f_mean.values.get()

### Based on https://www.kaggle.com/gogo827jz/jane-street-neural-network-starter

In [None]:
def create_model(input_size, layers, dropout_rates, learning_rate):
    
    inp = Input(shape = (input_size, ))
    x = BatchNormalization()(inp)
    x = Dropout(dropout_rates[0])(x)
    for i in range(len(layers)): 
        x = Dense(hidden_units[i], activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(dropout_rates[i+1])(x)    
        
    out = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    
    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate = learning_rate),
        loss = tf.keras.losses.BinaryCrossentropy(), 
        metrics = tf.keras.metrics.AUC(name = 'AUC')
    )
    
    return model

In [None]:
tf.random.set_seed(666)
batch_size = 4000

hidden_units = [256, 256, 80]
dropout_rates = [0.05, 0.25, 0.25, 0.1]
learning_rate = 1e-3

oof = np.zeros(len(train['action']))
gkf = GroupKFold(n_splits=5)
for fold, (tr, te) in enumerate(gkf.split(train['action'].values, train['action'].values, train['date'].values)):
    X_tr, X_val = train.loc[tr, features].values, train.loc[te, features].values
    y_tr, y_val = train.loc[tr, 'action'].values, train.loc[te, 'action'].values
    
    ckp_path = f'model_{fold}.hdf5'
    model = create_model(X_tr.shape[1], hidden_units, dropout_rates, learning_rate)
    rlr = ReduceLROnPlateau(monitor='val_AUC', factor = 0.1, patience = 3, verbose = 0, min_delta = 1e-4, mode = 'max')
    ckp = ModelCheckpoint(ckp_path, monitor='val_AUC', verbose=0, save_best_only = True, save_weights_only = True, mode = 'max')
    es = EarlyStopping(monitor = 'val_AUC', min_delta = 1e-4, patience = 10, mode = 'max', baseline = None, restore_best_weights = True, verbose=0)
    model.fit(X_tr, y_tr, validation_data = (X_val, y_val), epochs = 1000, 
              batch_size = batch_size, callbacks = [rlr, ckp, es], verbose=1)
                
    oof[te] += model.predict(X_val, batch_size=batch_size * 4).ravel()
    score = roc_auc_score(y_val, oof[te])
    print(f'Fold {fold} ROC AUC:\t', score)
    
    K.clear_session()
    rubbish = gc.collect()

In [None]:
score_oof = roc_auc_score(train['action'].values, oof)
print(score_oof)

In [None]:
env = janestreet.make_env()
iter_test = env.iter_test()

In [None]:
models = []
for i in range(5):
    clf = create_model(len(features), hidden_units, dropout_rates, learning_rate)
    clf.load_weights(f'./model_{i}.hdf5')
    models.append(clf)

In [None]:
from tqdm.notebook import tqdm

In [None]:
opt_th = 0.5
for (test_df, pred_df) in tqdm(iter_test):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        for i, clf in enumerate(models):
            if i == 0:
                pred = clf(x_tt, training = False).numpy().item() / len(models)
            else:
                pred += clf(x_tt, training = False).numpy().item() / len(models)
        pred_df.action = np.where(pred >= opt_th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)