# Jane Street: Super Fast Utility Score Function

In this notebook, I compare the time-consumption of different utility score function implementations from discussion forum [Super Fast Utility Score Function Implementation
][1].

[1]: https://www.kaggle.com/c/jane-street-market-prediction/discussion/201257

In [None]:
import warnings
warnings.filterwarnings('ignore')

import torch 
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

import os, gc, random
if device == 'cuda':
    import cudf
    import cupy as cp
import datatable as dtable
import pandas as pd
import numpy as np
import janestreet
from numba import njit
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from joblib import dump, load

import tensorflow as tf
tf.random.set_seed(42)
import tensorflow.keras.backend as K
import tensorflow.keras.layers as layers
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

QUICK_TEST = False

In [None]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
seed_everything(42)

# Preprocessing

In [None]:
print('Loading...')
if QUICK_TEST:
    train = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv', nrows = 10000)
else:
    train = dtable.fread('../input/jane-street-market-prediction/train.csv').to_pandas()
features = [c for c in train.columns if 'feature' in c]

print('Forward-Filling...')
train = train.query('weight > 0').reset_index(drop = True)
train[features] = train[features].fillna(method = 'ffill').fillna(0)
train['action'] = (train['resp'] > 0).astype('int')

print('Finish.')

# Utility Score Functions

For-loop version is very slow. We would better replace it with a magic numpy function called `numpy.bincount()`.

In [None]:
def utility_score_loop(date, weight, resp, action):
    count_i = len(np.unique(date))
    Pi = np.zeros(count_i)
    for i, day in enumerate(np.unique(date)):
        Pi[i] = np.sum(weight[date == day] * resp[date == day] * action[date == day])
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

def utility_score_bincount(date, weight, resp, action):
    count_i = len(np.unique(date))
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

Further improvement by changing `numpy.unique()` to `pandas.unique()` because it does not sort the values. However, if your date values are consecutive and chronological, using `date.max() + 1` or `date[-1] + 1` is the optimal solution.

In [None]:
def utility_score_pd(date, weight, resp, action):
    count_i = len(pd.unique(date))
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

def utility_score_max(date, weight, resp, action):
    count_i = date.max() + 1
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

def utility_score_last(date, weight, resp, action):
    count_i = date[-1] + 1
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

Calibrator has suggested using `len(Pi)` and `@njit(fastmath = True)` for acceleration. Let's check how it performs!

In [None]:
@njit(fastmath = True)
def utility_score_numba(date, weight, resp, action):
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    return u

We also compare some of the functions in pandas from the discussion forum.

In [None]:
# LDMTWO's
def utility_score_LDMTWO(df, labels='action,.r0,.weight,.date'.split(',')):
    """Calculate utility score of a dataframe according to formulas defined at
    https://www.kaggle.com/c/jane-street-market-prediction/overview/evaluation
    """
    action,resp,weight,date = labels
    df = df.set_index(date)
    p = df[weight]  * df[resp] * df[action]
    p_i = p.groupby(date).sum()
    t = (p_i.sum() / np.sqrt((p_i**2).sum())) * (np.sqrt(250 / p_i.index.size))
    return np.clip(t,0,6) * p_i.sum()

# Jorijn Jacko Smit's
def utility_score_Jorijn(df):
    """Calculate utility score of a dataframe according to formulas defined at
    https://www.kaggle.com/c/jane-street-market-prediction/overview/evaluation
    """

    df['p'] = df['weight']  * df['resp'] * df['action']
    p_i = df.set_index('date')['p'].groupby('date').sum()
    t = (p_i.sum() / np.sqrt((p_i**2).sum())) * (np.sqrt(250 / p_i.index.size))
    return min(max(t, 0), 6) * p_i.sum()

# Time-Consumption Comparison

In [None]:
date = train['date'].values
weight = train['weight'].values
resp = train['resp'].values
action = train['action'].values

In [None]:
print('numpy for-loop:')
%timeit utility_score_loop(date, weight, resp, action)
print('-' * 70)
print('numpy.bincount():')
%timeit utility_score_bincount(date, weight, resp, action)
print('-' * 70)
print('numpy.bincount() + pandas.unique():')
%timeit utility_score_pd(date, weight, resp, action)
print('-' * 70)
print('numpy.bincount() + date.max() + 1:')
%timeit utility_score_max(date, weight, resp, action)
print('-' * 70)
print('numpy.bincount() + date[-1] + 1:')
%timeit utility_score_last(date, weight, resp, action)
print('-' * 70)
print('numba:')
%timeit utility_score_numba(date, weight, resp, action)
print('-' * 70)
print('LDMTWO\'s:')
%timeit utility_score_LDMTWO(train, labels = 'action,resp,weight,date'.split(','))
print('-' * 70)
print('Jorijn\'s:')
%timeit utility_score_Jorijn(train)

# Perfect Prediction on Train

Wow, we can get a utility score of **224162** if we perfectly predict every action in the train set.

In [None]:
print(utility_score_numba(date, weight, resp, action))

# Optimisation Based On Validation

In [None]:
X_tr = train.loc[(train['date'] > 80) & (train['date'] <= 370), features]
y_tr = train.loc[(train['date'] > 80) & (train['date'] <= 370), 'action']

X_tr2 = train.loc[(train['date'] > 370) & (train['date'] <= 400), features]
y_tr2 = train.loc[(train['date'] > 370) & (train['date'] <= 400), 'action']

X_val = train.loc[train['date'] > 400, features]
y_val = train.loc[train['date'] > 400, 'action']

date = train.loc[train['date'] > 400, 'date'].values
weight = train.loc[train['date'] > 400, 'weight'].values
resp = train.loc[train['date'] > 400, 'resp'].values

rubbish = gc.collect()

In [None]:
def create_mlp(num_columns, num_labels, hidden_units, dropout_rates, learning_rate):
    
    inp = tf.keras.layers.Input(shape = (num_columns, ))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i+1])(x)    
        
    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation('sigmoid')(x)
    
    model = tf.keras.models.Model(inputs = inp, outputs = out)
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate),
                  loss = tf.keras.losses.BinaryCrossentropy(), 
                  metrics = tf.keras.metrics.AUC(name = 'AUC'), 
                 )
    
    return model

In [None]:
hidden_units = [384, 896, 384]
dropout_rates = [0.10143786981358652, 0.19720339053599725, 0.2703017847244654, 0.2357768967777311]

ckp_path = 'JSModel.hdf5'
model = create_mlp(X_tr.shape[1], 1, hidden_units, dropout_rates, 1e-3)
rlr = ReduceLROnPlateau(monitor = 'val_AUC', factor = 0.1, patience = 3, verbose = 1, 
                        min_delta = 1e-4, mode = 'max')
ckp = ModelCheckpoint(ckp_path, monitor = 'val_AUC', verbose = 0, 
                      save_best_only = True, save_weights_only = True, mode = 'max')
es = EarlyStopping(monitor = 'val_AUC', min_delta = 1e-4, patience = 5, mode = 'max', 
                   baseline = None, restore_best_weights = True, verbose = 0)
history = model.fit(X_tr.values, y_tr.values, validation_data = (X_val.values, y_val.values), epochs = 100, 
                    batch_size = 4096, callbacks = [rlr, ckp, es], verbose = 1)
hist = pd.DataFrame(history.history)
print(hist['val_AUC'].max())

del model
K.clear_session()
rubbish = gc.collect()

In [None]:
@njit(fastmath = True)
def decision_threshold_optimisation(preds, date, weight, resp, low = 0, high = 1, bins = 100, eps = 1):
    opt_threshold = low
    gap = (high - low) / bins
    action = np.where(preds >= opt_threshold, 1, 0)
    opt_utility = utility_score_numba(date, weight, resp, action)
    for threshold in np.arange(low, high, gap):
        action = np.where(preds >= threshold, 1, 0)
        utility = utility_score_numba(date, weight, resp, action)
        if utility - opt_utility > eps:
            opt_threshold = threshold
            opt_utility = utility
    print('Optimal Decision Threshold:', opt_threshold)
    print('Optimal Utility Score:', opt_utility)
    return opt_threshold, opt_utility

The optimised threshold is very high, it seems the market trend in the last 100 days is decreasing dramatically so the model needs to take fewer actions. We may use cross-validation score instead of one leave-out validation score for a better threshold optimisation result.

In [None]:
# Optimise Decision Threshold on the Validation Set
model = create_mlp(X_tr.shape[1], 1, hidden_units, dropout_rates, 1e-5)
model.load_weights(ckp_path)
preds = model.predict(X_val, batch_size = 4096, verbose = 1).ravel()
opt_threshold, opt_utility = decision_threshold_optimisation(preds, date, weight, resp, preds.min(), preds.max(), 1000, 1)

rubbish = gc.collect()

In [None]:
# # Finetune 3 epochs
# model.fit(np.concatenate((X_tr2.values, X_val.values)), np.concatenate((y_tr2.values, y_val.values)), 
#           epochs = 3, batch_size = 4096, verbose = 1)
# model.save_weights(ckp_path)

Try training on the entire train set.

In [None]:
X_tr = train.loc[train['date'] > 80, features]
y_tr = train.loc[train['date'] > 80, 'action']
model = create_mlp(X_tr.shape[1], 1, hidden_units, dropout_rates, 1e-4)
model.fit(X_tr.values, y_tr.values, batch_size = 4096, epochs = 10, verbose = 1)
model.save_weights(ckp_path)

In [None]:
example_test = pd.read_csv('../input/jane-street-market-prediction/example_test.csv')
example_test = example_test.query('weight > 0').reset_index(drop = True)
example_test[features] = example_test[features].fillna(method = 'ffill').fillna(0)
test_preds = model.predict(example_test[features].values, batch_size = 4096, verbose = 1).ravel()
print(test_preds.min())
print(test_preds.max())
print(test_preds.mean())
print(test_preds.std())
plt.hist(test_preds, bins = 100)
plt.show()

In [None]:
@njit
def fast_fillna(array, values):
    if np.isnan(array.sum()):
        array = np.where(np.isnan(array), values, array)
    return array

In [None]:
env = janestreet.make_env()
env_iter = env.iter_test()

In [None]:
# Try 0.5 threshold
opt_threshold = 0.5
tmp = np.zeros(len(features))
for (test_df, pred_df) in tqdm(env_iter):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        x_tt[0, :] = fast_fillna(x_tt[0, :], tmp)
        tmp = x_tt[0, :]
        pred = model(x_tt, training = False).numpy().item()
        pred_df.action = np.where(pred >= opt_threshold, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)

# Conclusion

So far, the fastest version is the numba version! Tribute to [Calibrator][1]!

[1]: https://www.kaggle.com/calibrator

In [None]:
import numpy as np
from numba import njit

@njit(fastmath = True)
def utility_score_numba(date, weight, resp, action):
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    return u