In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Upload modules**

In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf
import tensorflow_addons as tfa
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

**Upload Dataset and first operations..**

In [None]:
jane=pd.read_csv("../input/jane-street-market-prediction/train.csv")

jane=jane[jane['weight']!=0]

Impute by mean

In [None]:
imputer = SimpleImputer(missing_values = np.nan, strategy ='median') 

jane2= imputer.fit(jane)
jane2= imputer.transform(jane)

In [None]:
jane2=pd.DataFrame(jane2)

cols=jane.columns

jane2.columns=cols

jane2.drop(['resp_1','resp_2','resp_3','resp_4', 'ts_id'], axis=1, inplace=True)

action=jane2['resp']

action=(action>0)*1

# MODEL

Create train and test set

In [None]:
jane2.drop(['resp'], axis=1, inplace=True)

X_train=jane2[jane2['date']>347]
X_test=jane2[jane2['date']<=347]

y_train=action[1327574:]
y_test=action[:1327574]

Keras

In [None]:
inp = tf.keras.layers.Input(shape=(132, ))

first_units = [316, 316]
second_units = [158, 158, 158]
third_units = [76, 76, 76, 76]

#first block
b1 = tf.keras.layers.BatchNormalization()(inp)
for units in first_units:
    b1 = tf.keras.layers.Dropout(0.2)(b1)
    b1 = tf.keras.layers.Dense(units)(b1)
    b1 = tf.keras.layers.Activation(tf.keras.activations.relu)(b1)
    
#second block
b2 = tf.keras.layers.BatchNormalization()(inp)
for units in second_units:
    b2 = tf.keras.layers.Dropout(0.2)(b2)
    b2 = tf.keras.layers.Dense(units)(b2)
    b2 = tf.keras.layers.Activation(tf.keras.activations.relu)(b2)

#third block
b3 = tf.keras.layers.BatchNormalization()(inp)
for units in third_units:
    b3 = tf.keras.layers.Dropout(0.2)(b3)
    b3 = tf.keras.layers.Dense(units)(b3)
    b3 = tf.keras.layers.Activation(tf.keras.activations.relu)(b3)
    
    
#concatenate blocks
blocks = tf.keras.layers.concatenate([b1, b2, b3])
blocks = tf.keras.layers.Dense(1)(blocks)
out = tf.keras.layers.Activation('sigmoid')(blocks)

model = tf.keras.models.Model(inputs=inp, outputs=out)

**Plotting the model rapresentation:**

In [None]:
from keras.utils import plot_model
display(plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True))

In [None]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=10000,
    decay_rate=0.9)

model.compile(
    optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule),#learning_rate=lr_schedule
    loss='binary_crossentropy',  
    metrics=['binary_accuracy'],
)


early=EarlyStopping(
    min_delta=0.01,
    patience=50,
    restore_best_weights='True',
)

In [None]:
history= model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=200, 
    batch_size=4096,
    callbacks=[early], 
    verbose=0, 
)

In [None]:
history_df=pd.DataFrame(history.history)
history_df.loc[:, ['loss','val_loss']].plot()
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot()

In [None]:
import janestreet
#janestreet.competition.make_env.__called__ = False
env = janestreet.make_env() # initialize the environment
iter_test = env.iter_test() # an iterator which loops over the test set

import time
start_time = time.time()

for (test_df, pred_df) in tqdm(env.iter_test()):
    x_tt = test_df.loc[:,].values
    if np.isnan(x_tt[:, 1:].sum()):
        pred_df.action = 0
    else:
        pred = model(x_tt, training=False)
        pred_df.action = np.where(pred > 0.5, 1, 0).astype(int)
    env.predict(pred_df)
    


print(f"took: {time.time() - start_time} seconds")