In [None]:
gpu = False
tree_method = 'gpu_hist' if gpu else 'hist'

import gc
import numpy as np
import pandas as pd
import xgboost as xgb
import janestreet
import time

train = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
train = train[train.weight != 0] 

missing = -999
fcols = [c for c in train.columns if 'feature' in c]
target = 'resp'

# Split into X and y
X = train.loc[:, fcols].fillna(missing)
y = (train.loc[:, target] > 0).astype(int)

# Clear memory
del train
#gc.collect()

# Train model
model = xgb.XGBClassifier(
    n_estimators=50,
    max_depth=3,
    learning_rate=0.01,
    subsample=0.9,
    colsample_bytree=0.5,
    missing=missing,
    random_state=2020,
    tree_method='hist'
)

fittime = time.time()
model.fit(X, y)
fittime = time.time()-fittime
print('Finished training model, taking %g min'%(fittime/60))

# Clear memory
del X, y
#gc.collect()

# Create submission

start = time.time()
env = janestreet.make_env()
iter_test = env.iter_test()
predict_time = []
count=0
for (test_df, sample_prediction_df) in iter_test:    
    innerstart = time.time()
    try:
        p = model.predict(test_df.loc[:, fcols].fillna(missing))
        sample_prediction_df.action = np.rint(p)
    except:
        sample_prediction_df.action = 0
        
    env.predict(sample_prediction_df)
    
    innerend = time.time()
    predict_time.append(innerend-innerstart)
    count+=1

total = time.time()-start
total /= 3600
totalpredict = sum(predict_time)/3600
million = totalpredict/count*1e6
print('took', total, 'hours for looping through %d rows'%count)
print('took', totalpredict, 'hours for predicting %d rows'%count)
print('will take', million, 'hours for predicting 1e6 rows')
 