In [4]:
from Environments.BaseMarket import TestEnv
from Environments.LimitOrderMarket import LimitMarket
from Environments.wrappers.reward_wrapper import CuriosityWrapper

import numpy as np
import matplotlib.pyplot as plt
import ray
import gym
from ray.tune.registry import register_env
from ray.rllib.agents.ppo.ddppo import DEFAULT_CONFIG
from ray.rllib.agents.ppo.ddppo import DDPPOTrainer
import os
import gc
import torch 
import pandas as pd
gc.collect()

212

In [5]:
try:
    ray.init(num_gpus = 1, num_cpus = 4)
except:
    ray.shutdown()
    ray.init(num_gpus = 1, num_cpus = 4)

RuntimeError: Unable to connect to Redis at 127.0.0.1:6379 after 12 retries. Check that 127.0.0.1:6379 is reachable from this machine. If it is not, your firewall may be blocking this port. If the problem is a flaky connection, try setting the environment variable `RAY_START_REDIS_WAIT_RETRIES` to increase the number of attempts to ping the Redis server.

In [None]:
trainer_config = DEFAULT_CONFIG.copy()

In [None]:
trainer_config['model']

In [None]:
trainer_config['model']['use_lstm'] = False
trainer_config['model']['lstm_cell_size'] = 512
trainer_config['num_gpus'] = 0
trainer_config['num_gpus_per_worker'] = 1
trainer_config['num_envs_per_worker'] = 1
trainer_config['gamma'] = 0
trainer_config['entropy_coeff'] = 0
trainer_config['framework'] = 'torch'
trainer_config['num_workers'] = 1
trainer_config['horizon'] = 1000
trainer_config['rollout_fragment_length'] = 1000
trainer_config['model']['framestack'] = False
trainer_config['model']['fcnet_hiddens'] = [1024, 1024]
conf = {'data': 'Data/indicator_dataset/',
        'starting_money': 1000,
        'starting_stocks': 0,
        'episode_length': 1000,
        'commission': 0.0025,
        'state_orders_num': 10,
        'max_horizon' : 100,
        'curiosity_reward' : 0
        }
trainer_config['env_config'] = conf
trainer_config['entropy_coeff_schedule'] = 1000

In [None]:
def curiosity_env_create(env_config):
    return CuriosityWrapper(LimitMarket(env_config))

In [None]:
register_env('CuriosityLimitMarket', curiosity_env_create)

In [None]:
trainer = DDPPOTrainer(trainer_config, env = 'CuriosityLimitMarket')

In [None]:
best_reward = -np.inf
trainer.save()
hall_of_fame = [0]
last_checkpoint = 0

In [None]:
%%time
for i in range(100):
    print("Training iteration {}...".format(i))
    results = trainer.train()
    this_reward = results['episode_reward_max']
    if this_reward > best_reward:
        best_reward = this_reward
        trainer.save()
        path = trainer.logdir + 'checkpoint_{0}/checkpoint-{0}'.format(last_checkpoint)
        os.remove(path)
        last_checkpoint = i + 1
        hall_of_fame.append(i+1)
        print('New best reward')
        print(best_reward)
    if i % 10 == 0:
        print('Best Reward So Far')
        print(best_reward)      

In [None]:
path = trainer.logdir + 'checkpoint_{0}/checkpoint-{0}'.format(hall_of_fame[-1])

In [None]:
training = pd.read_csv(trainer.logdir + 'progress.csv')
plt.plot(training['episode_reward_mean'])

In [None]:
env = LimitMarket(conf)
obs = env.reset()

done = False
cumulative_reward = 0
prices = []
assets = []
actions = []
states = [obs]
rewards = []
hidden = [torch.zeros(512),torch.zeros(512)]
infos = []
while not done:
    action, hidden, info = trainer.compute_action(obs, hidden)
    obs, reward, done, results = env.step(action)
    cumulative_reward += reward
    rewards.append(reward)
    actions.append(action)
    assets.append(results['assets'])
    prices.append(results['current_price'])
    states.append(obs)
    infos.append(info)
    if i % 100 == 0:
        print('Step: {}/{}'.format(i, 200))
print("Cumulative reward you've received is: {}. Congratulations!".format(cumulative_reward))
print("Asset_Gain {}".format(assets[-1] -assets[0]))

In [None]:
pure_actions = []
for action in actions:
    pure_actions.append(action[0])
    
actions = pure_actions

In [None]:
buy10 = np.ma.masked_where(np.array(actions) != 0, prices)
buy20 = np.ma.masked_where(np.array(actions) != 1, prices)
buy50 = np.ma.masked_where(np.array(actions) != 2, prices)
sell10 = np.ma.masked_where(np.array(actions) != 3, prices)
sell20 = np.ma.masked_where(np.array(actions) != 4, prices)
sell50 = np.ma.masked_where(np.array(actions) != 5, prices)
hold = np.ma.masked_where(np.array(actions) != 6, prices)

# plt.plot(prices, marker = '', markersize = 0.5, markevery = np.where(np.array(actions) == 6, True, False))
# plt.figure(figsize = (20, 15))
# plt.plot(buy10, c = 'turquoise', linewidth = 0.6)
# plt.plot(buy20, c = 'lime', linewidth = 0.6)
# plt.plot(buy50, c = 'green', linewidth = 0.6)
# plt.plot(hold, c = 'blue', linewidth = 0.6)
graph_prices = prices[::10][:500]
graph_actions = actions[::10][:500]
colors = ['r', 'r', 'r', 'g', 'g', 'g', 'b']
fig = plt.figure(figsize = (10, 6))
plt.scatter(range(len(graph_prices)), graph_prices, s=1, color = np.array(colors)[graph_actions])

In [None]:
plt.plot(assets)

In [None]:
plt.plot(prices)

In [None]:
obs = env.test()

done = False
cumulative_reward = 0
prices = []
assets = []
actions = []
states = [obs]
rewards = []
hidden = [torch.zeros(512),torch.zeros(512)]
infos = []
market_beaters = []
while not done:
    action, hidden, info = trainer.compute_action(obs, hidden)
    obs, reward, done, results = env.step(action)
    cumulative_reward += reward
    rewards.append(reward)
    actions.append(action)
    assets.append(results['assets'])
    prices.append(results['current_price'])
    states.append(obs)
    infos.append(info)
    market_beaters.append(results['market_beater'])
print("Cumulative reward you've received is: {}. Congratulations!".format(cumulative_reward))
print("Asset_Gain {}".format(assets[-1] -assets[0]))

In [None]:
plt.plot(market_beaters)

In [None]:
pure_actions = []
for action in actions:
    print(action)

In [None]:
buy10 = np.ma.masked_where(np.array(actions) == 0, prices)
buy20 = np.ma.masked_where(np.array(actions) == 1, prices)
buy50 = np.ma.masked_where(np.array(actions) == 2, prices)
hold = np.ma.masked_where(np.array(actions) == 3, prices)

plt.plot(buy10, c = 'turquoise')
plt.plot(buy20, c = 'lime')
plt.plot(buy50, c = 'green')
plt.plot(hold, c = 'blue')

In [None]:
plt.plot(assets)

In [None]:
plt.plot(prices)

In [None]:
market_beaters[-1]

In [None]:
from sklearn import preprocessing

In [None]:
prices_norm = preprocessing.normalize(np.array(prices).reshape(-1,1))
assets_norm = preprocessing.normalize(np.array(assets).reshape(-1,1))

In [None]:
assets_norm-prices_norm