There is one model parameter which is common to almost all models in this competition which no one seems to really be talking about.  For example given this Neural Network model (thanks to Yirun Zhang: https://www.kaggle.com/gogo827jz/jane-street-neural-network-starter):

In [None]:
import warnings

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import tensorflow as tf
import dask.dataframe as dd
import matplotlib.pyplot as plt                                                                      
%matplotlib inline   

tf.random.set_seed(42)

train = dd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv').compute().sort_values('ts_id')
features = [c for c in train.columns if "feature" in c]


def create_mlp(
    num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate
):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)

    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        metrics=tf.keras.metrics.AUC(name="AUC"),
    )

    return model


batch_size = 4096
hidden_units = [384, 896, 896, 394]
dropout_rates = [
    0.10143786981358652,
    0.19720339053599725,
    0.2703017847244654,
    0.23148340929571917,
    0.2357768967777311,
]
label_smoothing = 1e-2
learning_rate = 1e-3

num_models = 2

models = []
for i in range(num_models):
    clf = create_mlp(
        len(features), 1, hidden_units, dropout_rates, label_smoothing, learning_rate
    )
    clf.load_weights(f"../input/js-nn-models/JSModel_{i}.hdf5")
    models.append(clf)

f_mean = np.load('../input/js-nn-models/f_mean.npy')



We get different scores by changing `opt_th` to different values:

In [None]:
opt_th = 0.5025

This parameter controls the ratio of ones and zeros in the prediction.  If the market is going up, then you will want to predict more ones than zeros.  

However it's a little more complicated than this as you need to take the weights into consideration and not just the direction of the whole market:


In [None]:
train['trend'] = train['resp'].cumsum()                                                              
train['weighted_trend'] = (train['weight']*train['resp']).cumsum()                                   
train.plot(x='ts_id', y=['trend', 'weighted_trend'])                                                 
plt.show() 

In the training data the overall market is going up, but the weighted returns are trending downwards so you would want to predict fewer ones for this time period.

In [None]:
import janestreet

env = janestreet.make_env()
env_iter = env.iter_test()

for test_df, pred_df in env_iter:
    if test_df["weight"].item() > 0:
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        pred = 0.0
        for clf in models:
            pred += clf(x_tt, training=False).numpy().item() / num_models
        pred_df.action = np.where(pred >= opt_th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)


Values I've tested and their scores are:
* 0.49  6704                                                                                          
* 0.496 6871                                                                                          
* 0.498 6782                                                                                          
* 0.5   6876                                                                                          
* 0.505 6781                                                                                          
* 0.503 6920                                                                                          
* 0.508 6370  

Obviously `0.053` does well on the public test set but there is no guarantee it will do well on the private test set.