In [None]:
import numpy as np
import pandas as pd
from ray.rllib.agents import ppo
from ray.rllib.utils.framework import try_import_tf
import matplotlib.pyplot as plt
import seaborn as sns

from src.rllib_gym_trade_environment import prepare_dict
from src.rllib_gym_trade_environment import CryptoEnv

sns.set_style("darkgrid")

In [None]:
tf1, tf, tfv = try_import_tf()

df_test = pd.read_parquet('data/df_train.parquet')
test_dict = prepare_dict(df_test)

In [None]:
config = {
    "env": CryptoEnv,
    "env_config": {
        "price_array": test_dict['price_array'],
        "observations": test_dict['observations'],
        "initial_capital": 1e4,
        "gamma": 0.99,
        "max_steps": 10000,
    },
    "model": {
        "vf_share_layers": False,
    },
    "framework": "tf",
    "num_workers": 0,
}

In [None]:
env = CryptoEnv(config["env_config"])
ppo_config = ppo.DEFAULT_CONFIG.copy()
ppo_config.update(config)
trainer = ppo.PPOTrainer(config=ppo_config, env=CryptoEnv)
path = "/home/lmsh/ray_results/PPOTrainer_CryptoEnv_2022-04-22_18-46-303pnq7kyz" + \
       "/checkpoint_000050/checkpoint-50"
trainer.restore(path)

In [None]:
actions = []
total_assets = []
rewards = []
observation = env.reset()
for _ in range(config["env_config"]["max_steps"]):
    action = trainer.compute_action(observation)
    observation, reward, done, info = env.step(action)
    rewards.append(reward)
    total_assets.append(info["Final total asset"])
    actions.append(action)
    if done:
        observation = env.reset()

In [None]:
init_step, final_step = info["Initial step"], info["Final step"]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.plot(df_test.iloc[init_step: final_step].index, df_test.iloc[init_step: final_step]['price'])
ax.set_xlabel("Time")
ax.set_ylabel("Price")
ax.set_title("Test set")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.scatter(df_test.iloc[init_step: final_step].index, np.array(actions))
ax.set_xlabel("Time")
ax.set_ylabel("actions")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.scatter(df_test.iloc[init_step: final_step].index, np.array(rewards))
ax.set_xlabel("Time")
ax.set_ylabel("Rewards")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
ax.scatter(df_test.iloc[init_step: final_step].index, np.array(total_assets))
ax.set_xlabel("Time")
ax.set_ylabel("Total asset")