### Please note: Internet access is not allowed for this competition, this notebook is purely for educational purposes

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install 'tensorflow==1.15.0'
!pip install 'stable-baselines[mpi]==2.10.0'

In [None]:
import json
import cudf
import random
import datetime
import janestreet
import numpy as np
import pandas as pd
from tqdm import tqdm

import gym
from gym import spaces
from stable_baselines import PPO2
from stable_baselines.common.policies import MlpPolicy, MlpLnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv

In [None]:
js_env = janestreet.make_env() # initialize the environment
iter_test = js_env.iter_test() # an iterator which loops over the test set

## Load data

In [None]:
%%time
train_cudf  = cudf.read_csv('/kaggle/input/jane-street-market-prediction/train.csv')
train = train_cudf.to_pandas()
del train_cudf
# For non-gpu: train = pd.read_csv('/kaggle/input/jane-street-market-prediction/train.csv', nrows=(round(2390491 * 0.3)))
features = pd.read_csv('../input/jane-street-market-prediction/features.csv')
test = pd.read_csv('../input/jane-street-market-prediction/example_test.csv')
sample_prediction_df = pd.read_csv('../input/jane-street-market-prediction/example_sample_submission.csv')
print ("Data is loaded!")

In [None]:
train.head()

In [None]:
timestamp = datetime.datetime.now()
MODEL_ID = f"jane_street_ppo_{timestamp.strftime('%s')}"

## Utilities

In [None]:
def fillNaN(df):
    f_mean = df.mean()
    df = df.fillna(f_mean)
    return df

In [None]:
train = fillNaN(train)
test = fillNaN(test)

## Gym env

In [None]:
class JaneStreetEnv(gym.Env):

    def __init__(self, df):
        super(JaneStreetEnv, self).__init__()
        self.df = df
        self.n_samples = df.shape[0]
        self.weight = df['weight'].values
        self.resp = df['resp'].values
        self.features = [col for col in list(self.df.columns) if 'feature' in col]
        self.states = df[self.features].values
        self.idx = 0    
        
        # Possible actions = Sell | Hold
        self.action_space = spaces.Discrete(2)

        # Prices contains the OHCL values for the last five prices
        self.observation_space = spaces.Box(low=-8.215050, high=5.872849e+01, shape=(df[self.features].shape[1], 6))

    def _next_observation(self):
        obs = np.array([self.df.loc[self.idx: self.idx + 5, feature].values for feature in self.features])
        return obs


    def step(self, action):
#         delay_modifier = (self.current_step / MAX_STEPS)
#         reward = self.balance * delay_modifier
        
        obs = self._next_observation()
        reward = self.weight[self.idx] * self.resp[self.idx] * action
        self.idx += 1
        if self.idx >= self.n_samples - 5:
            done = True
            self.idx = 0
        else:
            done = False          
        return obs, reward, done, {}

    def reset(self):
        self.idx = 0
        return self._next_observation()

    def render(self):
        print(f'Step: {self.idx}')
        
env = DummyVecEnv([lambda: JaneStreetEnv(train)])

## Training and evaluation

In [None]:
def learn(timesteps=20000):
    model = PPO2(MlpLnLstmPolicy, env, verbose=1, nminibatches=1)
    model.learn(total_timesteps=timesteps)
    model.save(MODEL_ID)
    print(f"Saved model: {MODEL_ID}")
learn()

In [None]:
def evaluate(model_name):
    model = PPO2.load(model_name)
    obs = env.reset()
    submission_df = pd.DataFrame()
    for (test_df, sample_prediction_df) in tqdm(iter_test):
        action, _states = model.predict(obs) # _states are only useful when using LSTM policies
        obs, rewards, done, info = env.step(action)        
        sample_prediction_df.action = action #make your 0/1 prediction here
        submission_df = submission_df.append(sample_prediction_df)
        js_env.predict(sample_prediction_df)
        # env.render()
    return submission_df

submission_df = evaluate(MODEL_ID)

In [None]:
submission_df = submission_df.rename_axis(None, axis=1).rename_axis('ts_id', axis=0)
submission_df.to_csv('submission.csv')