# Jane Street: Neural Network Starter

I try implementing a simple Tensorflow Keras neural network here. Train in Version 17.

**Caution:** The GroupCV method applied in this notebook may cause time leakage problem. Please use [Purged Time-Series CV][1] instead.

[1]: https://www.kaggle.com/marketneutral/purged-time-series-cv-xgboost-optuna

# Preprocessing

In [1]:
TRAINING = True
USE_FINETUNE = False

In [2]:
import warnings
warnings.filterwarnings('ignore')

#if TRAINING:
#    import cudf
#    import cupy as cp

import os, gc
import pandas as pd
import numpy as np
import xgboost as xgb
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm, trange
from joblib import dump, load

import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import multiply, add, Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.metrics import AUC

import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args


import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args
import kerastuner as kt

In [3]:
TARGET = 'action'
FEATS = ['feature_{}'.format(int(i)) for i in range(130)]

In [4]:
if TRAINING:
    print('Loading...')
    train = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
    TARGET = 'action'
    FEATS = ['feature_{}'.format(int(i)) for i in range(130)]

    print('Filling...')
    train = train.query('weight > 0').reset_index(drop = True)
    resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']
    y = np.stack([(train[c] > 0.00000).astype('int') for c in resp_cols]).T #Multitarget

    X = train[FEATS].to_numpy()
    wr = train.weight*train['resp'].to_numpy()
    Y = wr*y[:,3]
    X = train[FEATS].to_numpy()
    print('Finish.')

Loading...
Filling...
Finish.


In [5]:
if TRAINING:
    nan_feat = (train[FEATS].isnull().sum()>0)
    NAN_FEAT = nan_feat[nan_feat == True].index
    nan_feat_bool = nan_feat.values
    with open('../input/js-cv-split2/f_mean.npy', 'rb') as f:
        f_mean = np.load(f)
    
    mask2 = np.isnan(X[:,nan_feat_bool]).astype(int)
    X = np.nan_to_num(X) + np.isnan(X).astype(int) * f_mean
    del(train)
    _= gc.collect()
    pd.to_pickle(nan_feat_bool, 'nfb.pkl')

# Training

In [6]:
def create_autoencoder(input_dim,output_dim,noise=0.1):
    i = Input(130)
    mask = Input(130)
    encoded = BatchNormalization()(i)
    encoded = GaussianNoise(noise)(encoded)
    
    encoded = Dense(96, activation = 'elu')(encoded)
    encoded = Dense(64,activation='linear')(encoded)
    encoder = Model(inputs=i,outputs=encoded)
    
    return encoder

In [7]:
encoder = create_autoencoder(130, 5, noise=0.1)
encoder.load_weights('../input/js-cv-split2/encoder.hdf5')
encoder.trainable = False

In [8]:
FOLDS = 5
SEED = 42

In [9]:
from tensorflow.keras.layers import Conv1D, AveragePooling1D, MaxPooling1D, Flatten, Multiply, Add

input_dim = 130 + 64 + 88
output_dim = 5
window_size = 16
cha1 = 128
cha2 = 256
rate = 0.18


def conv_model(input_dim, output_dim, window_size, cha1, cha2, rate):
    inputs = Input(input_dim)
    x = BatchNormalization()(inputs)
    x = Dense(window_size * cha1, use_bias = False)(x)
    x = tf.reshape(x, (-1, window_size, cha1))
    x = BatchNormalization()(x)
    
    for i in range(3):
        #x = BatchNormalization()(x)
        x = Dropout(rate)(x)
        x = Conv1D(cha1, 3, activation = tf.keras.activations.swish, padding = 'causal')(x)
        temp_x = x
        
        x = BatchNormalization()(x)
        x = Dropout(rate)(x)
        x = Conv1D(cha2, 3, activation = tf.keras.activations.swish, padding = 'causal')(x)

        x = BatchNormalization()(x)
        x = Dropout(rate)(x)
        x = Conv1D(cha1, 3, activation = tf.keras.activations.swish, padding = 'causal')(x)
        
        x = Add()([temp_x,x])
        x = BatchNormalization()(x)
        x = MaxPooling1D(2)(x)
        
    x = Dense(64)(x)    
    x = Flatten()(x)
    x = BatchNormalization()(x)
    x = Dropout(rate)(x)
    x = Dense(output_dim, activation = 'sigmoid', name = 'label_out')(x)
        
    y2 = tf.math.reduce_mean(x, axis = -1)
    y2 = tf.where(y2 > 0.5, 1, 0)
    y2 = tf.cast(y2, tf.float32)
    wr = Input(1)
    y2 = Multiply(name = 'return_out')([y2,wr])
    
    model = Model(inputs = [inputs, wr], outputs = [x,y2]) 
    
    return model

In [10]:
splits = pd.read_pickle('../input/js-cv-split2/cross_validation.pkl')

In [11]:
f = 5

y_train, y_test = y[splits[f][0]], y[splits[f][1]]
X_train, X_test = X[splits[f][0]], X[splits[f][1]]
Y_train, Y_test = Y[splits[f][0]], Y[splits[f][1]]
mask_train, mask_test = mask2[splits[f][0]], mask2[splits[f][1]]
            
encoded_X_train = encoder(X_train).numpy()
encoded_X_test = encoder(X_test).numpy()
            
X_train = np.concatenate((X_train, encoded_X_train, mask_train), axis = -1)
X_test = np.concatenate((X_test, encoded_X_test, mask_test), axis = -1)

wr_train, wr_test = wr[splits[f][0]], wr[splits[f][1]]

del(X, y, encoded_X_train, encoded_X_test, Y, wr, mask_train, mask_test)

_=gc.collect()

In [12]:
def custom_loss(y_true, y_pred):
    return 100 * tf.keras.losses.MSE(y_true,y_pred)

def metrics2(y_true, y_pred):
    return K.sum(y_pred)

def metrics(y_true, y_pred):
    Pi = np.bincount(y_true, y_pred)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    print('\n', round(u,5))
    return u

In [13]:
## detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

## instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [14]:
with tpu_strategy.scope():
    model = conv_model(input_dim, output_dim, window_size, cha1, cha2, 0.15)
    model.compile(optimizer = Adam(learning_rate = 0.0005), loss = {'label_out':'binary_crossentropy', 'return_out': custom_loss}, metrics = {'label_out':AUC(name = 'auc'), 'return_out': metrics2})



In [15]:
model.fit([X_train, wr_train],[y_train, Y_train],validation_data=([X_test, wr_test],[y_test, Y_test]),epochs=300,batch_size=4000,callbacks =  [EarlyStopping('val_label_out_auc',mode='max',patience=10,restore_best_weights=True)])
model.save_weights(f'./model_{SEED}_{f}.hdf5')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300


In [16]:
with tpu_strategy.scope():
    model = conv_model(input_dim, output_dim, window_size, cha1, cha2, 0.2)
    model.compile(optimizer = Adam(learning_rate = 0.0005), loss = {'label_out':'binary_crossentropy', 'return_out': custom_loss}, metrics = {'label_out':AUC(name = 'auc'), 'return_out': metrics2})


model.fit([X_train, wr_train],[y_train, Y_train],validation_data=([X_test, wr_test],[y_test, Y_test]),epochs=300,batch_size=4000,callbacks =  [EarlyStopping('val_label_out_auc',mode='max',patience=10,restore_best_weights=True)])
model.save_weights(f'./model2_{SEED}_{f}.hdf5')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300


In [17]:
with tpu_strategy.scope():
    model = conv_model(input_dim, output_dim, window_size, cha1, cha2, 0.25)
    model.compile(optimizer = Adam(learning_rate = 0.0005), loss = {'label_out':'binary_crossentropy', 'return_out': custom_loss}, metrics = {'label_out':AUC(name = 'auc'), 'return_out': metrics2})


model.fit([X_train, wr_train],[y_train, Y_train],validation_data=([X_test, wr_test],[y_test, Y_test]),epochs=300,batch_size=4000,callbacks =  [EarlyStopping('val_label_out_auc',mode='max',patience=10,restore_best_weights=True)])
model.save_weights(f'./model3_{SEED}_{f}.hdf5')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300


In [18]:
with tpu_strategy.scope():
    model = conv_model(input_dim, output_dim, window_size, cha1, cha2, 0.3)
    model.compile(optimizer = Adam(learning_rate = 0.0005), loss = {'label_out':'binary_crossentropy', 'return_out': custom_loss}, metrics = {'label_out':AUC(name = 'auc'), 'return_out': metrics2})


model.fit([X_train, wr_train],[y_train, Y_train],validation_data=([X_test, wr_test],[y_test, Y_test]),epochs=300,batch_size=4000,callbacks =  [EarlyStopping('val_label_out_auc',mode='max',patience=10,restore_best_weights=True)])
model.save_weights(f'./model4_{SEED}_{f}.hdf5')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300


In [19]:
with tpu_strategy.scope():
    model = conv_model(input_dim, output_dim, window_size, cha1, cha2, 0.35)
    model.compile(optimizer = Adam(learning_rate = 0.0005), loss = {'label_out':'binary_crossentropy', 'return_out': custom_loss}, metrics = {'label_out':AUC(name = 'auc'), 'return_out': metrics2})


model.fit([X_train, wr_train],[y_train, Y_train],validation_data=([X_test, wr_test],[y_test, Y_test]),epochs=300,batch_size=4000,callbacks =  [EarlyStopping('val_label_out_auc',mode='max',patience=10,restore_best_weights=True)])
model.save_weights(f'./model5_{SEED}_{f}.hdf5')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
