In [None]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation, GaussianDropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy, Huber
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import numpy as np
import pandas as pd
from tqdm import tqdm
from random import choices
from tensorflow.keras.constraints import max_norm, min_max_norm
from keras.callbacks import ReduceLROnPlateau

SEED = 1111

tf.random.set_seed(SEED)
np.random.seed(SEED)

train = pd.read_csv('../input/jane-street-market-prediction/train.csv')
train = train.query('date > 85').reset_index(drop = True) 
train = train[train['weight'] != 0]

train.fillna(train.mean(),inplace=True)

train['action'] = ((train['resp'].values) > 0).astype(int)


features = [c for c in train.columns if "feature" in c]

f_mean = np.mean(train[features[1:]].values,axis=0)

resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']

X_train = train.loc[:, train.columns.str.contains('feature')]
#y_train = (train.loc[:, 'action'])

y_train = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T




In [None]:
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Activation, LeakyReLU

# Add the GELU function to Keras
#https://mlfromscratch.com/activation-functions-explained/#/
    
def gelu(x):
    return 0.5 * x * (1 + tf.tanh(x * 0.7978845608 * (1 + 0.044715 * x * x)))
    #https://github.com/hendrycks/GELUs
    #exact version is better not sure if below if exact and above is approximation
    
# not sure if below is considered exact
#    return 0.5 * x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
get_custom_objects().update({'gelu': Activation(gelu)})

# Add leaky-relu so we can use it as a string
get_custom_objects().update({'leaky-relu': Activation(LeakyReLU(alpha=0.2))})

act_func = ['sigmoid', 'relu', 'elu', 'leaky-relu', 'selu', 'gelu']


In [None]:
def create_mlp(
    num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate
):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    x = tf.keras.layers.GaussianDropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
#        x = tf.keras.layers.Dense(hidden_units[i], kernel_constraint=min_max_norm(min_value=-0.5, max_value=0.5, rate=1.0, axis=0))(x)
#        x = tf.keras.layers.Dense(hidden_units[i],kernel_initializer=tf.keras.initializers.TruncatedNormal(seed=1111))(x)
#        x = tf.keras.layers.Dense(hidden_units[i],kernel_initializer=tf.keras.initializers.Orthogonal(seed=1111))(x)
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
#        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Activation('gelu')(x)
# introduce randomness e.g. flip a coin and choose different dropouts
#        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)
        x = tf.keras.layers.GaussianDropout(dropout_rates[i + 1])(x)

    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
#        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
#        optimizer=tf.keras.optimizers.RMSprop(learning_rate=learning_rate),
        optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
# introduce randomness e.g. flip a coin and choose difference losses
# i don' tthink you can use huber on sigmoid acivation, you would need to activate after the fact then, 
# as huber probably won't work for this 1 vs 0 loss....
#        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        loss=tf.keras.losses.Huber(),
#        metrics=tf.keras.metrics.AUC(name="AUC"),
        metrics=[tf.keras.metrics.AUC(name="AUC"),
                 tf.keras.metrics.TrueNegatives(name="TN"),
                 tf.keras.metrics.TruePositives(name="TP")]
    )

    return model

In [None]:
batch_size = 5000
#hidden_units = [130, 130, 130]
hidden_units = [130, 130, 130, 130, 130, 130]
#hidden_units = [260, 260, 260, 260, 260, 260]
#dropout_rates = [0.2, 0.2, 0.2, 0.2]
dropout_rates = [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]
#dropout_rates = [0.2, 0.2, 0.2, 0.2, 0.5, 0.2, 0.2]
label_smoothing = 1e-2
learning_rate = 1e-3
# REDUCED LEARNING RATE
#learning_rate = 1e-2

clf = create_mlp(
    len(features), 5, hidden_units, dropout_rates, label_smoothing, learning_rate
    )

clf.fit(X_train, y_train, epochs=50, batch_size=5000)
#clf.fit(X_train, y_train, epochs=200, batch_size=5000)
#clf.fit(X_train, y_train, epochs=800, batch_size=5000)
#clf.fit(X_train, y_train, epochs=1000, batch_size=5000)
# time out, need to optimize if want to meet this
#clf.fit(X_train, y_train, epochs=2000, batch_size=5000)


models = []

models.append(clf)

th = 0.5000

In [None]:
#submission = True
submission = False

if(submission==False):
    janestreet.make_env.__called__ = False


In [None]:

f = np.median
models = models[-3:]
import janestreet
env = janestreet.make_env()
if(submission==False):
    store_data_1 = []
    store_data_2 = []
    store_data_3 = []
for (test_df, pred_df) in tqdm(env.iter_test()):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        pred = np.mean([model(x_tt, training = False).numpy() for model in models],axis=0)
        pred = f(pred)
        pred_df.action = np.where(pred >= th, 1, 0).astype(int)
    else:
        pred_df.action = 0
        pred=0
    env.predict(pred_df)
    if(submission==False):
        store_data_1.append(pred_df)
        store_data_2.append(test_df)
        store_data_3.append(pred)

In [None]:
if(submission==False):
    df_data_1 = pd.concat(store_data_1)
    df_data_2 = pd.concat(store_data_2)
    df_data_2['action'] = df_data_1['action']
    df_data_2['resp'] = store_data_3 #careful as some values are just repeat of last value sicne skipped, but we are filterin gthese out
    train = df_data_2

In [None]:
if(submission==False):
    features = [c for c in train.columns if 'feature' in c]
    print('Forward-Filling...')
    train = train.query('weight > 0').reset_index(drop = True)
    train[features] = train[features].fillna(method = 'ffill').fillna(0)

    date = train['date'].values #* 1.0
    weight = train['weight'].values #* 1.0
    resp = train['resp'].values #* 1.0 # just doing this for now to debug... not sure if my 1 and 0 will also cause issues
    action = train['action'].values #* 1.0


    date = date.astype(np.int64)
    weight = weight.astype(np.float64)
    resp = resp.astype(np.float64)
    action = action.astype(np.int64)


In [None]:
if(submission==False):
    def utility_score_pd(date, weight, resp, action):
        count_i = len(pd.unique(date))
        Pi = np.bincount(date, weight * resp * action)
        t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
        u = np.clip(t, 0, 6) * np.sum(Pi)
        return u

    def utility_score_max(date, weight, resp, action):
        count_i = date.max() + 1
        Pi = np.bincount(date, weight * resp * action)
        t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
        u = np.clip(t, 0, 6) * np.sum(Pi)
        return u

    def utility_score_last(date, weight, resp, action):
        count_i = date[-1] + 1
        Pi = np.bincount(date, weight * resp * action)
        t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
        u = np.clip(t, 0, 6) * np.sum(Pi)
        return u

    from numba import njit

    @njit(fastmath = True)
    def utility_score_numba(date, weight, resp, action):
    #    import pdb;pdb.set_trace()
        Pi = np.bincount(date, weight * resp * action)
        t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
        u = min(max(t, 0), 6) * np.sum(Pi)
        return u

In [None]:
if(submission==False):
    #utility_score_pd(date, weight, resp, action)
    #utility_score_max(date, weight, resp, action)
    #utility_score_last(date, weight, resp, action)
    somevalue = utility_score_numba(date, weight, resp, action)
    print(somevalue)
    assumed_number = 400
    print(somevalue/assumed_number) # there is some difference in this calculation and actual, so here is what we think we would get with this notebook
    
# 200,000 equates to about 4,000 or 5,000 on PB Leaderboard
#200000/4000