# Import libraries

In [None]:
# import libraries
import pandas as pd
from sklearn.decomposition import PCA
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout
from tensorflow.keras.initializers import GlorotNormal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Import data and data cleaning

In [None]:
# importing data

df = pd.read_csv("../input/jane-street-market-prediction/train.csv")

# filter rows with non-zero weights
nonzero_weight = df["weight"] != 0
df = df[nonzero_weight]

In [None]:
# create data frame for inputs (X)
x = df.iloc[:, 7:137]
x.fillna(0, inplace = True)                                  # fill NaN values with 0

# convert x to numpy array
x = x.to_numpy()

In [None]:
# create data frame for ouputs (y)
resp = df.iloc[:, 2:7]                                      # resp values
pca = PCA(n_components = 1)                                 # run resp values through pca
resp_pca = pca.fit_transform(resp)
y = (resp_pca > 0).astype("int")                            # classify actions
y = y.reshape((-1,1))

In [None]:
# delete unused variables
del df
del nonzero_weight
del ts
del w
del resp
del pca
del resp_pca

# Model Fitting

In [None]:
# fit model
def fit_model(x_train, y_train, epochs = 200, batch_size = 1024):
    model = Sequential([
        Input(shape=(130,)),
        # hidden layer 1
        Dense(units = 64, kernel_initializer = GlorotNormal()),
        Activation(tf.keras.activations.relu),
        BatchNormalization(),
        # hidden layer 2
        Dense(units = 64, kernel_initializer = GlorotNormal()),
        Activation(tf.keras.activations.relu),
        BatchNormalization(),      
        Dropout(0.2),
        
        # hidden layer 3
        Dense(units = 32, kernel_initializer = GlorotNormal()),
        Activation(tf.keras.activations.relu),
        BatchNormalization(),
        # hidden layer 4
        Dense(units = 32, kernel_initializer = GlorotNormal()),
        Activation(tf.keras.activations.relu),
        BatchNormalization(),      
        Dropout(0.2),
        
        # output
        Dense(units = 1, activation = "sigmoid")
        ])
    
    opt = Adam(learning_rate = 0.01)
    model.compile(
        loss = "binary_crossentropy", 
        optimizer = opt,
        metrics = [tf.keras.metrics.AUC(name="AUC"), "accuracy"])
    model.fit(
        x = x_train, 
        y = y_train, 
        epochs = epochs, 
        batch_size = batch_size,
        callbacks = [EarlyStopping('accuracy', patience=10, restore_best_weights = True)],
        verbose = 2)
    
    return model

model = fit_model(x, y, epochs = 1000, batch_size = 8192)

# save model
model.save(f'nn_model.h5')

In [None]:
# run model
import janestreet
env = janestreet.make_env()

for (test_df, pred_df) in env.iter_test():
    if test_df["weight"].item() > 0:                            # model fit if weight > 0
        x_test = test_df.iloc[:, 1:131]
        x_test = x_test.fillna(0)                               # fill NaN values with 0
        
        y_pred = model.predict(x_test)
        pred_df.action = (y_pred > 0.5).astype("int")
    else:                                                       # predict action as False
        pred_df.action = 0
    
    env.predict(pred_df)