In [None]:
import time
START_TIME = time.time()

from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Average, Add, Flatten, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy, KLDivergence
from tensorflow.keras.optimizers import Adam, Nadam, SGD
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupKFold

from sklearn.preprocessing import MinMaxScaler #StandardScaler, RobustScaler, Normalizer, power_transform, PowerTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score, f1_score

from scikitplot.metrics import plot_confusion_matrix


from tqdm import tqdm
from random import choices

import kerastuner as kt

import matplotlib.pyplot as plt

from glob import glob

In [None]:
train = pd.read_csv('../input/jane-street-market-prediction/train.csv')
# I want to use all the data
#train = train.query('date > 85').reset_index(drop = True) 
train = train.astype({c: np.float32 for c in train.select_dtypes(include='float64').columns})
train.fillna(train.median(), inplace=True)
train = train.query('weight > 0').reset_index(drop = True)
train['action'] = (train['resp'] > 0).astype('int')
features = [c for c in train.columns if 'feature' in c]

resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']

X = train[features].values
y = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T

f_median = np.median(train[features[1:]].values, axis=0)
#pd.to_pickle(f_median, 'f_median.pkl')

In [None]:
y_resps = train[resp_cols].values
y_actions = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T

In [None]:
def plot_history(h, epochs_ignored=0.2, title=None):

    vals = [k for k in h.history.keys() if k.startswith('val_')]
    
    epochs = len(h.history[vals[0]])
    if epochs > 2:
        last_epochs = int(epochs * (1-epochs_ignored))
        epochs_x = list(range(epochs - last_epochs, epochs))
    else:
        last_epochs = epochs
        epochs_x = list(range(epochs))
    
    fig, axes = plt.subplots(len(vals), 3, figsize=(20, len(vals)*4))
    axes = axes.flatten()
    for i, val in enumerate(vals):
        loss = val.replace('val_', '')
        ax = axes[i*3]
        ax.plot(epochs_x, h.history[loss][-last_epochs:])
        ax.set_title(loss)
        ax = axes[i*3 + 1]
        ax.plot(epochs_x, h.history[val][-last_epochs:])
        ax.set_title(val)
        ax = axes[i*3 + 2]
        ax.plot(epochs_x, h.history[loss][-last_epochs:])
        ax.plot(epochs_x, h.history[val][-last_epochs:])
        ax.legend([loss, val])
        ax.set_title(f'{loss} vs {val}')
        
    if title != None:
        fig.suptitle(title)
    fig.show()

In [None]:
# target = [action_resp, resp, action_resps, resps]
# dims:         1          1         5         5

def create_model(input_dim, output_dims, add_models=0):
    
    input_layer_0 = Input(input_dim)
    bn_0 = BatchNormalization()(input_layer_0)
    
    # n_models to predict responses
    outputs_layer_0 = []
    for m in range(2+add_models):
        x = Dropout(0.2)(bn_0)
        for i in range(m+1):
            x = Dense(64)(x)    
            x = BatchNormalization()(x)
            x = Lambda(tf.keras.activations.swish)(x)
            x = Dropout(0.1)(x)
        output = Dense(output_dims[3], activation='linear', name=f'level_0_output_{m}')(x)
        outputs_layer_0.append(output)
    
    output_layer_0_average = Average(name='output_layer_0_average')(outputs_layer_0)
    bn_1 = BatchNormalization()(output_layer_0_average)

    #output_layer_0_concatenated = Concatenate(name='output_layer_0_concatenated')(outputs_layer_0)
    
    input_layer_1 = Concatenate()([bn_0] + [bn_1])
    
    # n_models to predict actions
    outputs_layer_1 = []
    for m in range(2+add_models):
        x = Dropout(0.2)(input_layer_1)
        for i in range(m+1):
            x = Dense(64)(x)    
            x = BatchNormalization()(x)
            x = Lambda(tf.keras.activations.swish)(x)
            x = Dropout(0.1)(x)
        output = Dense(output_dims[2], activation='sigmoid', name=f'level_1_output_{m}')(x)
        outputs_layer_1.append(output)
    
    output_layer_1_average = Average(name='output_layer_1_average')(outputs_layer_1)
    bn_2 = BatchNormalization()(output_layer_1_average)

    #output_layer_1_concatenated = Concatenate(name='output_layer_1_concatenated')(outputs_layer_1)
    
    input_layer_2 = Concatenate()([bn_1] + [bn_2])
    
    # n_models to predict resp
    outputs_layer_2 = []
    for m in range(2+add_models):
        x = Dropout(0.2)(input_layer_2)
        for i in range(m+1):
            x = Dense(64)(x)    
            x = BatchNormalization()(x)
            x = Lambda(tf.keras.activations.swish)(x)
            x = Dropout(0.1)(x)
        output = Dense(output_dims[1], activation='linear', name=f'level_2_output_{m}')(x)
        outputs_layer_2.append(output)
    
    output_layer_2_average = Average(name='output_layer_2_average')(outputs_layer_2)
    bn_3 = BatchNormalization()(output_layer_2_average)
    
    input_layer_3 = Concatenate()([bn_2] + [bn_3])
    
    # n_models to predict action
    outputs_layer_3 = []
    for m in range(2+add_models):
        x = Dropout(0.2)(input_layer_3)
        for i in range(m+1):
            x = Dense(64)(x)    
            x = BatchNormalization()(x)
            x = Lambda(tf.keras.activations.swish)(x)
            x = Dropout(0.1)(x)
        output = Dense(output_dims[1], activation='sigmoid', name=f'level_3_output_{m}')(x)
        outputs_layer_3.append(output)
    
    output_layer_3_average = Average(name='output_layer_3_average')(outputs_layer_3)
    
    model = Model(inputs=input_layer_0, outputs=[output_layer_3_average, output_layer_2_average, output_layer_1_average, output_layer_0_average])
    
    loss = {}
    loss['output_layer_3_average'] = BinaryCrossentropy(label_smoothing = 0.1) # 1 action for resp
    loss['output_layer_2_average'] = 'mse'                                     # 1 resp
    loss['output_layer_1_average'] = BinaryCrossentropy(label_smoothing = 0.1) # 5 actions for resps
    loss['output_layer_0_average'] = 'mse'                                     # 5 resps
    
    loss_weights={}
    loss_weights['output_layer_3_average'] = .25
    loss_weights['output_layer_2_average'] = .25
    loss_weights['output_layer_1_average'] = .25
    loss_weights['output_layer_0_average'] = .25
    
    metrics = {}
    metrics['output_layer_3_average'] = tf.keras.metrics.AUC(name = 'auc')
    metrics['output_layer_2_average'] = 'mse'    
    metrics['output_layer_1_average'] = tf.keras.metrics.AUC(name = 'auc')
    metrics['output_layer_0_average'] = 'mse'
    
    model.compile(optimizer = Adam(), loss = loss, metrics = metrics, loss_weights=loss_weights)
    
    return model
    

In [None]:
epochs = 50
batch_size = 1024 * 4
verbose = True

objective = 'val_output_layer_3_average_auc'
objective = 'output_layer_3_average_auc' # overfit
direction = 'max'

tr = (0, 400)
te = (420, 500)

train_indices = train[(train.date >= tr[0]) & (train.date < tr[1])].index
test_indices = train[(train.date >= te[0]) & (train.date < te[1])].index

model = create_model(input_dim=130, output_dims=(1,1,5,5), add_models=3)

X_train, X_test = X[train_indices], X[test_indices]
y_train = (y_actions[train_indices][:,3], y_resps[train_indices][:,3], y_actions[train_indices], y_resps[train_indices])
y_test = (y_actions[test_indices][:,3], y_resps[test_indices][:,3], y_actions[test_indices], y_resps[test_indices])

rlr = ReduceLROnPlateau(monitor = objective, factor = 0.5, patience = 4, verbose = 1, min_delta = 1e-4, mode = direction)
es = EarlyStopping(objective, patience=21, restore_best_weights=True, mode=direction)

#h = model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs=epochs, batch_size=batch_size, verbose=verbose, callbacks = [es, rlr])
h = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose, callbacks = [es, rlr])

model.save_weights('./model.hdf5')

metrics = model.evaluate(X_test, y_test, batch_size=batch_size)
print(metrics)

In [None]:
# action resp actions resps
# same weight for all:     .5349, .5506
# weights .40 .40 .10 .10: .5341  .5477
# weights .45 .45 .05 .05: .5335  .5437  
# weights .40 .10 .40 .10: .5347  .5518 # decrease importance of mse
# weights .60 .10 .25 .05: .5353  .5484
# weights .75 .10 .13 .02: .5342  .5470

In [None]:
plot_history(h)

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(model, to_file=f'model.png', show_shapes=True)

In [None]:
model.call = tf.function(model.call, experimental_relax_shapes=True)

In [None]:
pred = model.predict(X_test, batch_size=batch_size, verbose=True)

In [None]:
pred[0].flatten().shape, y_test[0].shape

In [None]:
pred[2][:, 1].shape, y_test[2][:, 3].shape

In [None]:
import janestreet
env = janestreet.make_env()
iter_test = env.iter_test()

In [None]:
selected_models = [model]

In [None]:
start = time.time()    
th = 0.5
j = 0

for (test_df, pred_df) in tqdm(iter_test):

    if test_df['weight'].item() > 0:
        
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_median

        try:
            pred = model(x_tt, training=False)[2].numpy().flatten()
            pred = np.median(pred)
            pred_df.action = np.where(pred >= th, 1, 0).astype(int)
        except:
            pred_df.action = 0

    else:
        pred_df.action = 0

    env.predict(pred_df)
    j +=1

total = time.time() - start
print(f'Expected time for 1M: { total * 1000000 / (j*60*60+1):.2} hours')
print(f'Iters per second: {j/total:.1f} iter/s')
print(f'Global time: {(time.time() - START_TIME) / 60:.1f} minutes')