# Jane Street - Tuned Submission
This notebook is the first of two actually submitted to the competition. It loads the tuned ensemble from the Tuner notebook and makes the prediction using Jane Street's API.

In [1]:
import joblib
import json
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras

from xgboost import XGBClassifier

import janestreet

Load the models, ensemble weights, and optimizing decision threshold.

In [2]:
# load NN model
model_folder = os.path.join(os.pardir, "input", "jane-street-nn")

with open(os.path.join(model_folder, "model.json"), "r") as f:
    nn = keras.models.model_from_json(f.read())

nn.load_weights(os.path.join(model_folder, "model.h5"))
nn.call = tf.function(nn.call, experimental_relax_shapes=True)

nn_pp = joblib.load(os.path.join(model_folder, "preprocessor.pkl"))


# load EDA XGB model
model_folder = os.path.join(os.pardir, "input", "jane-street-eda-xgb")

edaxgb = XGBClassifier()
edaxgb.load_model(os.path.join(model_folder, "model.xgb"))

edaxgb_pp = joblib.load(os.path.join(model_folder, "preprocessor.pkl"))


# load PLS XGB model
model_folder = os.path.join(os.pardir, "input", "jane-street-pls-xgb")

plsxgb = XGBClassifier()
plsxgb.load_model(os.path.join(model_folder, "model.xgb"))

plsxgb_pp = joblib.load(os.path.join(model_folder, "preprocessor.pkl"))


# get the optimal weights and threshold
tuner_folder = os.path.join(os.pardir, "input", "jane-street-tuner")

with open(os.path.join(tuner_folder, "weights.json")) as f:
    weights = json.loads(f.read())

with open(os.path.join(tuner_folder, "threshold.json")) as f:
    threshold_json = json.loads(f.read())

threshold = float(threshold_json["threshold"])

Define a function that takes a new sample and returns the ensemble's prediction.

In [3]:
def predict(sample):
    X = sample.to_numpy()
    
    nn_pred = nn(nn_pp.transform(X), training=False)
    nn_pred = weights["nn"] * nn_pred.numpy().item()
    
    plsxgb_pred = plsxgb.predict_proba(plsxgb_pp.transform(X))
    plsxgb_pred = weights["pls-xgb"] * plsxgb_pred[0, 1]
    
    edaxgb_pred = edaxgb.predict_proba(edaxgb_pp.transform(X))
    edaxgb_pred = weights["eda-xgb"] * edaxgb_pred[0, 1]
    
    ensemble_pred = nn_pred + edaxgb_pred + plsxgb_pred
    return 1 if ensemble_pred > threshold else 0

Make the predictions for the new samples using Jane Street's API.

In [4]:
env = janestreet.make_env()
features = [f"feature_{x}" for x in range(130)]

for (sample, pred) in env.iter_test():
    pred.action = predict(sample[features]) if sample["weight"].item() > 0.0 else 0
    env.predict(pred)