In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train = pd.read_csv('../input/jane-street-market-prediction/train.csv')
train = train.query('weight>0').reset_index(drop=True)
train = train.query('date>85').reset_index(drop=True)

features = [c for c in train.columns if 'feature' in c]
f_mean = np.nanmedian(train[features[1:]].values,axis=0)

train.fillna(train.median(),inplace=True)

NUM_TRAIN_EXAMPLES = len(train)

In [None]:
import tensorflow as tf
import tensorflow_probability as tfp 
tfd = tfp.distributions
import tensorflow_addons as tfa

In [None]:
TRAINING = False
PATH = '../input/tfpstarterjs'

In [None]:
X = train[features].values
resp_cols = [c for c in train.columns if 'resp' in c]
y = (train[resp_cols].values > 0).astype(int)

In [None]:
def create_model():
    """
    Adapted from https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/bayesian_neural_network.py
    """
    kl_divergence_function = (lambda q, p, _: tfd.kl_divergence(q, p) /  # pylint: disable=g-long-lambda
                                tf.cast(NUM_TRAIN_EXAMPLES, dtype=tf.float32))

    model = tf.keras.models.Sequential([
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tfp.layers.DenseFlipout(
          64, kernel_divergence_fn=kl_divergence_function,
          activation=tf.keras.activations.swish),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tfp.layers.DenseFlipout(
          y.shape[-1], kernel_divergence_fn=kl_divergence_function,
          activation='sigmoid')])

    optimizer = tfa.optimizers.RectifiedAdam()
    model.compile(optimizer, loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.01),
                    metrics=[tf.keras.metrics.Precision(name='precision')], experimental_run_tf_function=False)
    return model


In [None]:
import random
import os
def set_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
if TRAINING:
    """
    To boost the notebook votes, overfit. 
    """
    
    set_all_seeds(42)

    model = create_model()
    model.fit(X,y,
             epochs=200,
             batch_size=8192)
    model.save_weights(f'model.tf')
                  
else:
    model = create_model()
    model.load_weights(f'{PATH}/model.tf')
    model.call = tf.function(model.call, experimental_relax_shapes=True)


In [None]:
p = []

In [None]:
from tqdm import tqdm
if not TRAINING:
    N = 11
    f = np.median
    import janestreet
    janestreet.competition.make_env.__called__ = False
    env = janestreet.make_env()
    th = 0.5
    for (test_df, pred_df) in tqdm(env.iter_test()):
        if test_df['weight'].item() > 0:
            x_tt = test_df.loc[:, features].values
            if np.isnan(x_tt[:, 1:].sum()):
                x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
            
            predictions = [f(model(x_tt,training=False).numpy()) for _ in range(N)]
            mean = np.mean(predictions)
            std = np.std(predictions)
            
            #predictions that are close to mean
            pred = np.where(predictions<mean+0.75*std,1,0) * np.where(predictions>mean-0.75*std,1,0)
            pred = np.mean(pred)
            
            p.append(pred)
            pred_df.action = np.where(pred > th, 1, 0).astype(int)
        else:
            pred_df.action = 0
        env.predict(pred_df)

In [None]:
import matplotlib.pyplot as plt
plt.hist(p,bins=25)