In [None]:
import gc
from time import time
import multiprocessing

import numpy as np
import pandas as pd

import xgboost as xgb
import janestreet

# Load data
data = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
data=data[data.weight!=0]

# Settings
NAN_VALUE = -9999
features = [c for c in data.columns if 'feature' in c]
target = 'resp'

# Split into features X and target Y
X = data.loc[:, features].fillna(NAN_VALUE)
Y = (data.loc[:, target] > 0).astype(int)

# Clear memory
del data
gc.collect()

# Train model
model = xgb.XGBClassifier(
    n_estimators=500,
    max_depth=11,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.7,
    missing=NAN_VALUE,
    random_state=2020,
    tree_method='gpu_hist',
    nthread=multiprocessing.cpu_count()
)
model.fit(X, Y)
print('Finished training model')

# Clear memory
del X, Y
gc.collect()

# Create submission
env = janestreet.make_env()
iter_test = env.iter_test()
for (test_df, sample_prediction_df) in iter_test: 
    X_test = test_df.loc[:, features]
    X_test = X_test.fillna(NAN_VALUE)
    sample_prediction_df.action = model.predict(X_test)
    env.predict(sample_prediction_df)