# XGBoost + Risk-based Weighted Predictions

**Trick:** Adjusted predictions based on classifier certainty

In [1]:
# Load modules
import numpy as np
import pandas as pd
import xgboost as xgb
import janestreet

In [2]:
import gc # garbage collection
import joblib
from tqdm import tqdm

In [3]:
# Test packages
print(np.__version__)
print(pd.__version__)
print(xgb.__version__)


assert np.__version__ == '1.18.5'
assert pd.__version__ == '1.1.4'
assert xgb.__version__ == '1.2.1'

1.18.5
1.1.4
1.2.1


## Load dataset

In [4]:
# Load data
train = pd.read_csv('../input/jane-street-market-prediction/train.csv')
print(f'Done loading data. Train shape is {train.shape}')

Done loading data. Train shape is (2390491, 138)


In [5]:
train.head()

Unnamed: 0,date,weight,resp_1,resp_2,resp_3,resp_4,resp,feature_0,feature_1,feature_2,...,feature_121,feature_122,feature_123,feature_124,feature_125,feature_126,feature_127,feature_128,feature_129,ts_id
0,0,0.0,0.009916,0.014079,0.008773,0.00139,0.00627,1,-1.872746,-2.191242,...,,1.168391,8.313583,1.782433,14.018213,2.653056,12.600292,2.301488,11.445807,0
1,0,16.673515,-0.002828,-0.003226,-0.007319,-0.011114,-0.009792,-1,-1.349537,-1.704709,...,,-1.17885,1.777472,-0.915458,2.831612,-1.41701,2.297459,-1.304614,1.898684,1
2,0,0.0,0.025134,0.027607,0.033406,0.03438,0.02397,-1,0.81278,-0.256156,...,,6.115747,9.667908,5.542871,11.671595,7.281757,10.060014,6.638248,9.427299,2
3,0,0.0,-0.00473,-0.003273,-0.000461,-0.000476,-0.0032,-1,1.174378,0.34464,...,,2.838853,0.499251,3.033732,1.513488,4.397532,1.266037,3.856384,1.013469,3
4,0,0.138531,0.001252,0.002165,-0.001215,-0.006219,-0.002604,1,-3.172026,-3.093182,...,,0.34485,4.101145,0.614252,6.623456,0.800129,5.233243,0.362636,3.926633,4


In [6]:
# For training only look at data that has non-zero weight
train = train[train.weight != 0]

## Define experiment settings

In [7]:
# Settings
NAN_VALUE = -999
FEATURES = [c for c in train.columns if 'feature' in c]
TARGET = 'resp'
MAX_WEIGHT = train.weight.max()

## Preprocessing 

The same preprocessing steps must be done to test dataset too.

In [8]:
# Split into X and y
X = train.loc[:, FEATURES].fillna(NAN_VALUE)

In [9]:
# Create targets
y = (train.loc[:, TARGET] > 0).astype(int)

In [10]:
# Clear memory
del train
gc.collect()

100

## Train model

In [11]:
# Parameters from: https://www.kaggle.com/hamditarek/market-prediction-xgboost-with-gpu-fit-in-1min
model = xgb.XGBClassifier(n_estimators=500,
                          max_depth=11,
                          learning_rate=0.05,
                          subsample=0.9,
                          colsample_bytree=0.7,
                          missing=NAN_VALUE,
                          random_state=2020,
                          tree_method='hist',
                          n_jobs = 10, verbosity = 2)
model.fit(X, y)
print('Finished training model')

[19:55:41] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:55:41] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:55:41] INFO: ../src/common/hist_util.cc:134: Building quantile cut on a dense dataset or distributed environment.
[19:55:45] INFO: ../src/common/hist_util.cc:138: Total number of hist bins: 32883
[19:55:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2728 extra nodes, 0 pruned nodes, max_depth=11
[19:55:49] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:55:50] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2500 extra nodes, 0 pruned nodes, max_depth=11
[19:55:50] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:55:51] INFO: ../src/tree/upd

[19:56:14] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2130 extra nodes, 0 pruned nodes, max_depth=11
[19:56:14] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:56:14] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1826 extra nodes, 0 pruned nodes, max_depth=11
[19:56:14] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:56:15] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2424 extra nodes, 0 pruned nodes, max_depth=11
[19:56:15] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:56:16] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2060 extra nodes, 0 pruned nodes, max_depth=11
[19:56:16] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:56:38] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2118 extra nodes, 0 pruned nodes, max_depth=11
[19:56:38] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:56:38] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1198 extra nodes, 0 pruned nodes, max_depth=11
[19:56:39] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:56:39] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1918 extra nodes, 0 pruned nodes, max_depth=11
[19:56:39] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:56:40] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1566 extra nodes, 0 pruned nodes, max_depth=11
[19:56:40] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:57:00] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1610 extra nodes, 0 pruned nodes, max_depth=11
[19:57:00] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:00] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2328 extra nodes, 0 pruned nodes, max_depth=11
[19:57:01] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:01] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1748 extra nodes, 0 pruned nodes, max_depth=11
[19:57:01] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:02] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2246 extra nodes, 0 pruned nodes, max_depth=11
[19:57:02] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:57:22] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1320 extra nodes, 0 pruned nodes, max_depth=11
[19:57:22] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:22] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1424 extra nodes, 0 pruned nodes, max_depth=11
[19:57:23] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:23] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1932 extra nodes, 0 pruned nodes, max_depth=11
[19:57:23] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:24] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2074 extra nodes, 0 pruned nodes, max_depth=11
[19:57:24] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:57:43] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2396 extra nodes, 0 pruned nodes, max_depth=11
[19:57:44] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:44] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1566 extra nodes, 0 pruned nodes, max_depth=11
[19:57:44] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:45] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1866 extra nodes, 0 pruned nodes, max_depth=11
[19:57:45] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:57:45] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1848 extra nodes, 0 pruned nodes, max_depth=11
[19:57:45] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:58:05] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2258 extra nodes, 0 pruned nodes, max_depth=11
[19:58:05] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:06] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2340 extra nodes, 0 pruned nodes, max_depth=11
[19:58:06] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:06] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2312 extra nodes, 0 pruned nodes, max_depth=11
[19:58:07] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:07] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1926 extra nodes, 0 pruned nodes, max_depth=11
[19:58:07] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:58:27] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1708 extra nodes, 0 pruned nodes, max_depth=11
[19:58:27] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:27] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1824 extra nodes, 0 pruned nodes, max_depth=11
[19:58:27] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:28] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2544 extra nodes, 0 pruned nodes, max_depth=11
[19:58:28] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:29] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2154 extra nodes, 0 pruned nodes, max_depth=11
[19:58:29] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:58:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2316 extra nodes, 0 pruned nodes, max_depth=11
[19:58:49] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:49] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1944 extra nodes, 0 pruned nodes, max_depth=11
[19:58:50] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:50] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2428 extra nodes, 0 pruned nodes, max_depth=11
[19:58:50] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:58:51] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2350 extra nodes, 0 pruned nodes, max_depth=11
[19:58:51] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:59:10] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2240 extra nodes, 0 pruned nodes, max_depth=11
[19:59:10] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:11] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2172 extra nodes, 0 pruned nodes, max_depth=11
[19:59:11] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:12] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2090 extra nodes, 0 pruned nodes, max_depth=11
[19:59:12] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:12] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2122 extra nodes, 0 pruned nodes, max_depth=11
[19:59:12] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:59:31] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1756 extra nodes, 0 pruned nodes, max_depth=11
[19:59:32] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:32] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2058 extra nodes, 0 pruned nodes, max_depth=11
[19:59:32] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:33] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2420 extra nodes, 0 pruned nodes, max_depth=11
[19:59:33] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:33] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2538 extra nodes, 0 pruned nodes, max_depth=11
[19:59:33] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[19:59:53] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1414 extra nodes, 0 pruned nodes, max_depth=11
[19:59:53] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:54] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2552 extra nodes, 0 pruned nodes, max_depth=11
[19:59:54] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2364 extra nodes, 0 pruned nodes, max_depth=11
[19:59:55] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[19:59:55] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2192 extra nodes, 0 pruned nodes, max_depth=11
[19:59:55] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[20:00:15] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1558 extra nodes, 0 pruned nodes, max_depth=11
[20:00:15] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:15] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2072 extra nodes, 0 pruned nodes, max_depth=11
[20:00:15] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:16] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1416 extra nodes, 0 pruned nodes, max_depth=11
[20:00:16] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:16] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1384 extra nodes, 0 pruned nodes, max_depth=11
[20:00:16] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[20:00:36] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1788 extra nodes, 0 pruned nodes, max_depth=11
[20:00:36] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:37] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1740 extra nodes, 0 pruned nodes, max_depth=11
[20:00:37] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:37] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1922 extra nodes, 0 pruned nodes, max_depth=11
[20:00:37] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:38] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2076 extra nodes, 0 pruned nodes, max_depth=11
[20:00:38] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[20:00:58] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2880 extra nodes, 0 pruned nodes, max_depth=11
[20:00:58] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:59] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1520 extra nodes, 0 pruned nodes, max_depth=11
[20:00:59] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:00:59] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1616 extra nodes, 0 pruned nodes, max_depth=11
[20:00:59] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:01:00] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1778 extra nodes, 0 pruned nodes, max_depth=11
[20:01:00] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


[20:01:20] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2492 extra nodes, 0 pruned nodes, max_depth=11
[20:01:20] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:01:21] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 1902 extra nodes, 0 pruned nodes, max_depth=11
[20:01:21] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:01:21] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2062 extra nodes, 0 pruned nodes, max_depth=11
[20:01:21] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[20:01:22] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2054 extra nodes, 0 pruned nodes, max_depth=11
[20:01:22] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


In [12]:
# save model to file
joblib.dump(model, "../models/xgboost_v0.joblib.dat")

[20:01:24] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


['../models/xgboost_v0.joblib.dat']

In [13]:
# Clear memory
del X, y
gc.collect()

56

## Predict


In [14]:
# load model from file
model = joblib.load("../models/xgboost_v0.joblib.dat")

In [15]:
# Create submission using time-series API (from janestreet module)
env = janestreet.make_env()
iter_test = env.iter_test()

for (test_df, sample_prediction_df) in tqdm(iter_test):    
    test_weight = test_df.iloc[0].weight
    if test_weight > 0:
        proba = model.predict_proba(test_df.loc[:, FEATURES].fillna(NAN_VALUE))[0, 1]
        sample_prediction_df.action = 1 if proba > 0.49 else 0
    else:
        sample_prediction_df.action = 0
    env.predict(sample_prediction_df)

23it [00:00,  6.16it/s]

[20:01:25] INFO: ../src/gbm/gbtree.cc:169: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.


15219it [03:18, 76.76it/s] 
