In [1]:
# lib
import datatable as dt
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import random
import seaborn as sns
import time

import lightgbm as lgbm

def seed_everything(seed=7777):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)  
    #torch.manual_seed(seed)
    #torch.cuda.manual_seed(seed)
    #torch.backends.cudnn.deterministic = True
    #tf.random.set_seed(seed)
    np.random.seed(seed)


SEED = 7777
seed_everything(SEED)
pd.set_option('display.max_columns', 200)

In [2]:
data_path = '../input/jane-street-market-prediction/'


def save_pickle(dic, save_path):
    with open(save_path, 'wb') as f:
    # with gzip.open(save_path, 'wb') as f:
        pickle.dump(dic, f)

def load_pickle(load_path):
    with open(load_path, 'rb') as f:
    # with gzip.open(load_path, 'rb') as f:
        dic = pickle.load(f)
    return dic

In [3]:
features = [f'feature_{i}' for i in range(130)]
train_df_median = pd.read_csv('../input/js-lgbm-cls/train_df_median.csv')
train_df_median = train_df_median[features].values

In [4]:
models = load_pickle('../input/js-lgbm-cls/lgb_model_cls.bin')
models = [model[4] for fold, model in models.items()]

In [5]:
th = 0.500
esp = 1e-8

import janestreet
env = janestreet.make_env()

for (test_df, pred_df) in env.iter_test():
    if test_df['weight'].item() > 0:
        x_test = test_df.loc[:, features].values
        
        # fill na
        if np.isnan(x_test[:, 1:].sum()):
            x_test = np.nan_to_num(x_test) + np.isnan(x_test) * train_df_median
            
        feature_41_42_43 = x_test[:, 41] + x_test[:, 42] + x_test[:, 43]
        feature_1_2 = (x_test[:, 1] + esp) / (x_test[:, 2] + esp)
        
        x_test = np.concatenate((
            x_test[:, 1:],
            np.array(feature_41_42_43).reshape(x_test.shape[0], 1),
            np.array(feature_1_2).reshape(x_test.shape[0], 1),
        ), axis=1)
        
        pred = np.median(np.stack([model.predict(x_test) for model in models]), axis=0)
        pred_df.action = np.where(pred >= th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)