#!pip install tensorflow-gpu==1.15.0 tensorflow==1.15.0 stable-baselines gym-anytrading gym

In [1]:
# !pip install tensorflow-gpu==1.15.0 gym-anytrading 

# 1_ Import dependencies

In [2]:
# Gym stuff
import gym
import gym_anytrading

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os

# import RL model libraries
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import DummyVecEnv





# 1_ Bring in Marketwatch GME Data

https://www.marketwatch.com/investing/stock/gme/download-data?startDate=11/1/2019&endDate=03/12/2021


In [3]:
data_df = pd.read_csv('data/data_gme_0.csv')

In [4]:
data_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,09/21/2021,199.36,199.36,186.0,189.95,2633827
1,09/20/2021,200.0,202.85,184.55,192.2,3941802
2,09/17/2021,208.02,212.49,200.78,204.97,3950643
3,09/16/2021,202.33,216.55,201.15,206.37,3058217
4,09/15/2021,197.0,204.87,193.75,204.52,2310407


In [5]:
data_df['Date'] = pd.to_datetime(data_df['Date'])
data_df.dtypes

Date      datetime64[ns]
Open             float64
High             float64
Low              float64
Close            float64
Volume            object
dtype: object

In [6]:
data_df.set_index('Date', inplace=True)
data_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-09-21,199.36,199.36,186.0,189.95,2633827
2021-09-20,200.0,202.85,184.55,192.2,3941802
2021-09-17,208.02,212.49,200.78,204.97,3950643
2021-09-16,202.33,216.55,201.15,206.37,3058217
2021-09-15,197.0,204.87,193.75,204.52,2310407


In [7]:
env = gym.make('stocks-v0', df=data_df, frame_bound=(5,100), window_size=5)

In [8]:
env.signal_features

array([[ 1.8995e+02,  0.0000e+00],
       [ 1.9220e+02,  2.2500e+00],
       [ 2.0497e+02,  1.2770e+01],
       [ 2.0637e+02,  1.4000e+00],
       [ 2.0452e+02, -1.8500e+00],
       [ 1.9924e+02, -5.2800e+00],
       [ 2.0340e+02,  4.1600e+00],
       [ 1.9041e+02, -1.2990e+01],
       [ 1.9918e+02,  8.7700e+00],
       [ 1.9880e+02, -3.8000e-01],
       [ 1.9900e+02,  2.0000e-01],
       [ 2.0275e+02,  3.7500e+00],
       [ 2.1352e+02,  1.0770e+01],
       [ 2.1297e+02, -5.5000e-01],
       [ 2.1824e+02,  5.2700e+00],
       [ 2.0920e+02, -9.0400e+00],
       [ 2.0495e+02, -4.2500e+00],
       [ 2.0522e+02,  2.7000e-01],
       [ 1.9965e+02, -5.5700e+00],
       [ 2.1029e+02,  1.0640e+01],
       [ 1.6489e+02, -4.5400e+01]])

# 3_ Build an env

In [9]:
env.action_space.sample()

0

In [10]:
env.observation_space.sample()

array([[-1.2111175 ,  0.5759055 ],
       [ 0.23513591, -0.00318142],
       [ 0.27112505,  1.6192615 ],
       [-0.66271406, -0.9245122 ],
       [-0.54029983,  0.881068  ]], dtype=float32)

In [11]:
state = env.reset()
while True: 
    action = env.action_space.sample()
    n_state, reward, done, info = env.step(action)
    if done: 
        print("info", info)
        break


info {'total_reward': 16.930000000000007, 'total_profit': 1.024986353591251, 'position': 1}


plt.figure(figsize=(15,6))
plt.cla()
env.render_all()
plt.show()

# 5_ Train model

In [12]:
env_maker = lambda: gym.make('stocks-v0', df=data_df, frame_bound=(5,100), window_size=5)
env = DummyVecEnv([env_maker])

In [13]:
model = A2C('MlpPolicy', env, verbose=1) 
model.learn(total_timesteps=10000)

Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 661      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.459   |
|    explained_variance | -0.0145  |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -1.58    |
|    value_loss         | 6.65     |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 689      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.191   |
|    explained_variance | -6.47    |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 0.000356 |
|    value_loss      

-------------------------------------
| time/                 |           |
|    fps                | 728       |
|    iterations         | 1700      |
|    time_elapsed       | 11        |
|    total_timesteps    | 8500      |
| train/                |           |
|    entropy_loss       | -0.0358   |
|    explained_variance | -0.000175 |
|    learning_rate      | 0.0007    |
|    n_updates          | 1699      |
|    policy_loss        | -1.21e-06 |
|    value_loss         | 5.25e-08  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 723       |
|    iterations         | 1800      |
|    time_elapsed       | 12        |
|    total_timesteps    | 9000      |
| train/                |           |
|    entropy_loss       | -0.0178   |
|    explained_variance | 0.0415    |
|    learning_rate      | 0.0007    |
|    n_updates          | 1799      |
|    policy_loss        | -5.68e-08 |
|    value_l

<stable_baselines3.a2c.a2c.A2C at 0x1cd3744c70>

In [15]:
env = gym.make('stocks-v0', df=data_df, frame_bound=(90,110), window_size=5)
obs = env.reset()
while True: 
    obs = obs[np.newaxis, ...]
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        print("info", info)
        break

IndexError: index 85 is out of bounds for axis 0 with size 21

plt.figure(figsize=(15,6))
plt.cla()
env.render_all()
plt.show()