In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pickle
import numpy as np
import pandas as pd

# load data

In [None]:
train_pickle_file = '../input/pickling/train.csv.pandas.pickle'
train = pickle.load(open(train_pickle_file, 'rb'))

features = [c for c in train.columns if "feature" in c]
train = train[train['weight'] != 0]
train = train.query('date > 85').reset_index(drop = True) 
train.fillna(train.mean(),inplace=True)

f_mean = np.mean(train[features[1:]].values,axis=0)
resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']
groups = train['date'].values

y = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T #Multitarget

# Train test split

In [None]:
valid = train.loc[(train.date >= 450) & (train.date < 500)].reset_index(drop=True)
train = train.loc[train.date < 450].reset_index(drop=True)

X_train = train.loc[:, train.columns.str.contains('feature')].values
y_train = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T

X_valid = valid.loc[:, valid.columns.str.contains('feature')].values
y_valid = np.stack([(valid[c] > 0).astype('int') for c in resp_cols]).T

# Train Model

In [None]:
import xgboost as xgb
from tqdm import tqdm


params_1 = {'n_estimators': 494, 'max_depth': 8, 'min_child_weight': 6, 'learning_rate': 0.009624384025871735, 
            'subsample': 0.8328412036014541, 'gamma': 0, 'colsample_bytree': 0.715303237773365,
           'objective':'binary:logistic', 'eval_metric': 'auc','tree_method': 'hist', 'random_state': 42,}


In [None]:
TRAINING= False

if TRAINING:
    model = xgb.XGBClassifier(**params_1,n_jobs=-1)
    model.fit(X_train, y_train[:,3], eval_set=[(X_valid, y_valid[:,3])], eval_metric='auc',verbose=100, callbacks = [xgb.callback.EarlyStopping(rounds=300,save_best=True)])
    pickle.dump(model,open("./simple-xgb.dat","rb"))
else:
    model = pickle.load(open("../input/jsxgb/simple-xgb.dat","rb"))

# prediction

In [None]:
from tqdm import tqdm
import janestreet
from numba import njit
env = janestreet.make_env()
env_iter = env.iter_test()

In [None]:
@njit
def fillna_npwhere_njit(array, values):
    if np.isnan(array.sum()):
        array = np.where(np.isnan(array), values, array)
    return array
test_df_columns = ['weight'] + [f'feature_{i}' for i in range(130)] + ['date']
index_features = [n for n,col in enumerate(test_df_columns) if col in features]

In [None]:
for (test_df, pred_df) in tqdm(env_iter):
    if test_df['weight'].values[0]>0:
        x_tt = test_df.values[0][index_features].reshape(1,-1)
        x_tt[:, 1:] = fillna_npwhere_njit(x_tt[:, 1:][0], f_mean)
        y_pred = model.predict(x_tt)
        pred_df.action = int(y_pred)
    else:
        pred_df.action = 0
    env.predict(pred_df)