# Jane Street: Neural Network Starter

I try implementing a simple Tensorflow Keras neural network here. Train in Version 17.

**Caution:** The GroupCV method applied in this notebook may cause time leakage problem. Please use [Purged Time-Series CV][1] instead.

[1]: https://www.kaggle.com/marketneutral/purged-time-series-cv-xgboost-optuna

In [1]:
#import sys
#!cp ../input/rapids/rapids.0.17.0 /opt/conda/envs/rapids.tar.gz
#!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
#sys.path = ["/opt/conda/envs/rapids/lib/python3.7/site-packages"] + sys.path
#sys.path = ["/opt/conda/envs/rapids/lib/python3.7"] + sys.path
#sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
#!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

# Preprocessing

In [2]:
TRAINING = True
USE_FINETUNE = False

In [3]:
import warnings
warnings.filterwarnings('ignore')

#if TRAINING:
#    import cudf
#    import cupy as cp

import os, gc
import pandas as pd
import numpy as np
import xgboost as xgb
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm, trange
from joblib import dump, load

import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import Multiply, multiply, add, Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.metrics import AUC

import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args


import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args
import kerastuner as kt

In [4]:
TARGET = 'action'
FEATS = ['feature_{}'.format(int(i)) for i in range(130)]
#FIX_FEAT = ['feature_53', 'feature_45', 'feature_69', 'feature_101', 'feature_84', 'feature_121', 'feature_48', 'feature_89', 'feature_93', 'feature_106', 'feature_108', 'feature_99', 'feature_100', 'feature_87', 'feature_120', 'feature_98', 'feature_46', 'feature_128', 'feature_50', 'feature_116', 'feature_71', 'feature_111', 'feature_55', 'feature_49', 'feature_56', 'feature_1', 'feature_51', 'feature_57', 'feature_107', 'feature_41', 'feature_109', 'feature_113', 'feature_123', 'feature_103', 'feature_90', 'feature_91', 'feature_114', 'feature_42', 'feature_54', 'feature_47', 'feature_95', 'feature_96']
#FIX_FEAT = ['feature_101', 'feature_108', 'feature_127', 'feature_96', 'feature_86', 'feature_56', 'feature_106', 'feature_45', 'feature_84', 'feature_93', 'feature_88', 'feature_57', 'feature_41', 'feature_2', 'feature_50', 'feature_125', 'feature_54', 'feature_109', 'feature_111', 'feature_128', 'feature_55', 'feature_70', 'feature_115', 'feature_1', 'feature_98', 'feature_113', 'feature_120', 'feature_91', 'feature_105', 'feature_48', 'feature_110', 'feature_116', 'feature_89', 'feature_107', 'feature_69', 'feature_92', 'feature_123', 'feature_119', 'feature_58', 'feature_59', 'feature_97', 'feature_47', 'feature_90', 'feature_71', 'feature_49', 'feature_100', 'feature_87', 'feature_103', 'feature_95', 'feature_121', 'feature_51', 'feature_114', 'feature_126', 'feature_53', 'feature_46', 'feature_42', 'feature_99']
#FIX_FEAT = ['feature_53', 'feature_45', 'feature_69', 'feature_101', 'feature_84', 'feature_121', 'feature_48', 'feature_89', 'feature_93', 'feature_106', 'feature_108', 'feature_99', 'feature_100', 'feature_87', 'feature_120', 'feature_98', 'feature_46', 'feature_128', 'feature_50', 'feature_116', 'feature_71', 'feature_111', 'feature_55', 'feature_49', 'feature_56', 'feature_1', 'feature_51', 'feature_57', 'feature_107', 'feature_41', 'feature_109', 'feature_113', 'feature_123', 'feature_103', 'feature_90', 'feature_91', 'feature_114', 'feature_42', 'feature_54', 'feature_47', 'feature_95', 'feature_96']
#FIX_FEAT = ['feature_1','feature_2','feature_41','feature_42','feature_44','feature_45','feature_46','feature_47','feature_48','feature_49','feature_50','feature_51','feature_53','feature_54','feature_55','feature_56','feature_57','feature_58','feature_59','feature_69','feature_70','feature_71','feature_84','feature_85','feature_86','feature_87','feature_88','feature_89','feature_90','feature_91','feature_92','feature_93','feature_94','feature_95','feature_96','feature_97','feature_98','feature_99','feature_100','feature_101','feature_102','feature_103','feature_104','feature_105','feature_106','feature_107','feature_108','feature_109','feature_110','feature_111','feature_112','feature_113','feature_114','feature_115','feature_116','feature_117','feature_118','feature_119','feature_120','feature_121','feature_122','feature_123','feature_124','feature_125','feature_126','feature_127','feature_128','feature_129']

In [5]:
if TRAINING:
    print('Loading...')
    train = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
    TARGET = 'action'
    FEATS = ['feature_{}'.format(int(i)) for i in range(130)]

    print('Filling...')
    train = train.query('weight > 0').reset_index(drop = True)
    resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']
    y = np.stack([(train[c] > 0.00000).astype('int') for c in resp_cols]).T #Multitarget

    X = train[FEATS].to_numpy()
    wr = train.weight*train['resp'].to_numpy()
    Y = wr*y[:,3]
    X = train[FEATS].to_numpy()
    print('Finish.')

Loading...
Filling...
Finish.


In [6]:
if TRAINING:
    nan_feat = (train[FEATS].isnull().sum()>0)
    NAN_FEAT = nan_feat[nan_feat == True].index
    nan_feat_bool = nan_feat.values
    with open('../input/js-cv-split2/f_mean.npy', 'rb') as f:
        f_mean = np.load(f)
    
    mask2 = np.isnan(X[:,nan_feat_bool]).astype(int)
    X = np.nan_to_num(X) + np.isnan(X).astype(int) * f_mean
    del(train)
    _= gc.collect()
    pd.to_pickle(nan_feat_bool, 'nfb.pkl')

# Training

In [7]:

def create_autoencoder(input_dim,output_dim,noise=0.1):
    i = Input(130)
    mask = Input(130)
    encoded = BatchNormalization()(i)
    encoded = GaussianNoise(noise)(encoded)
    
    encoded = Dense(96, activation = 'elu')(encoded)
    encoded = Dense(64,activation='linear')(encoded)
    encoder = Model(inputs=i,outputs=encoded)
    
    return encoder

In [8]:
def custom_loss(y_true, y_pred):
    return 100 * tf.keras.losses.MSE(y_true,y_pred)

def metrics2(y_true, y_pred):
    return K.sum(y_pred)

def metrics(y_true, y_pred):
    Pi = np.bincount(y_true, y_pred)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    print('\n', round(u,5))
    return u

In [9]:
encoder = create_autoencoder(130, 5, noise=0.1)
encoder.load_weights('../input/js-cv-split2/encoder.hdf5')
encoder.trainable = False

In [10]:
def create_model(input_dim,output_dim):
    inputs = Input(input_dim)
    
    #x = encoder(inputs)
    #x = Concatenate()([x,inputs]) #use both raw and encoded features
    x = BatchNormalization()(inputs)
    
    x = Dense(512)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0.5)(x)    
    
    x = Dense(512)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0.5)(x)
    
    x = Dense(300)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0.42)(x)
    
    
    x = Dense(64)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0)(x)
    
    x = Dense(output_dim,activation='sigmoid', name = 'label_output')(x)
    
    y2 = tf.math.reduce_mean(x, axis = -1)
    y2 = tf.where(y2 > 0.5, 1, 0)
    y2 = tf.cast(y2, tf.float32)
    wr = Input(1)
    y2 = Multiply(name = 'return_out')([y2,wr])
    
    model = Model(inputs=[inputs,wr],outputs=[x,y2])
    model.compile(optimizer=Adam(0.0005),loss={'label_output':BinaryCrossentropy(label_smoothing=0.0845), 'return_out': custom_loss}, metrics = {'label_output':AUC(name = 'auc'), 'return_out': metrics2})
    return model

In [11]:
FOLDS = 6

In [12]:
    #gkf = PurgedGroupTimeSeriesSplit(n_splits = FOLDS, group_gap=20)
    splits = pd.read_pickle('../input/js-cv-split2/cross_validation.pkl')#list(gkf.split(y, groups=date))
    
    for fold, (train_indices, test_indices) in enumerate(splits):
        if fold >= 3:
            
            model = create_model(130 + 64 + 88, 5)
            
            X_train, X_test = X[train_indices], X[test_indices]
            y_train, y_test = y[train_indices], y[test_indices]
            Y_train, Y_test = Y[train_indices], Y[test_indices]
            encoded_X_train = encoder(X_train, training = False)
            encoded_X_test = encoder(X_test, training = False)
            
            mask_train, mask_test = mask2[train_indices], mask2[test_indices]
            X_train = tf.concat([X_train, encoded_X_train, mask_train], -1)
            X_test = tf.concat([X_test, encoded_X_test, mask_test], -1)
            del(encoded_X_train, encoded_X_test, mask_train, mask_test)
            _=gc.collect()
            wr_train, wr_test = wr[train_indices], wr[test_indices]
            
            model.fit([X_train, wr_train], [y_train, Y_train],validation_data=([X_test, wr_test],[y_test, Y_test]),epochs=300,batch_size=4000,callbacks= [EarlyStopping('val_label_output_auc',mode='max',patience=10,restore_best_weights=True)])
            model.save_weights(f'./model_{fold}.hdf5')
            del(wr_train, wr_test)
            _=gc.collect()

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300


In [13]:
def create_model(input_dim,output_dim):
    inputs = Input(input_dim)

    x = BatchNormalization()(inputs)

    x = Dense(438)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0.34554)(x)

    x = Dense(420)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0.47263)(x)

    x = Dense(64)(x)
    x = BatchNormalization()(x)
    x = Lambda(tf.keras.activations.swish)(x)
    x = Dropout(0.42533)(x)
    
    x = Dense(output_dim,activation='sigmoid', name = 'label_output')(x)
    
    y2 = tf.math.reduce_mean(x, axis = -1)
    y2 = tf.where(y2 > 0.5, 1, 0)
    y2 = tf.cast(y2, tf.float32)
    wr = Input(1)
    y2 = Multiply(name = 'return_out')([y2,wr])
    
    model = Model(inputs=[inputs,wr],outputs=[x,y2])
    model.compile(optimizer=Adam(0.0072342),loss={'label_output': BinaryCrossentropy(label_smoothing=0.090004), 'return_out': custom_loss}, metrics = {'label_output':AUC(name = 'auc'), 'return_out': metrics2})
    return model

In [14]:
    #gkf = PurgedGroupTimeSeriesSplit(n_splits = FOLDS, group_gap=20)
    #splits = list(gkf.split(y, groups=date))
    
    for fold, (train_indices, test_indices) in enumerate(splits):
        if fold >= 3:
            model = create_model(130 + 64 + 88, 5)
            
            X_train, X_test = X[train_indices], X[test_indices]
            y_train, y_test = y[train_indices], y[test_indices]
            Y_train, Y_test = Y[train_indices], Y[test_indices]
            encoded_X_train = encoder(X_train, training = False)
            encoded_X_test = encoder(X_test, training = False)
            
            mask_train, mask_test = mask2[train_indices], mask2[test_indices]
            X_train = tf.concat([X_train, encoded_X_train, mask_train], -1)
            X_test = tf.concat([X_test, encoded_X_test, mask_test], -1)
            del(encoded_X_train, encoded_X_test, mask_train, mask_test)
            _=gc.collect()
            wr_train, wr_test = wr[train_indices], wr[test_indices]
            
            model.fit([X_train, wr_train], [y_train, Y_train],validation_data=([X_test, wr_test],[y_test, Y_test]),epochs=300,batch_size=4000,callbacks=[EarlyStopping('val_label_output_auc',mode='max',patience=10,restore_best_weights=True)])
            model.save_weights(f'./model2_{fold}.hdf5')
            del(wr_train, wr_test)
            _=gc.collect()

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
