In [2]:
import gym
import json
import datetime as dt
import talib as ta
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2, A2C

from env.StockTradingEnv import StockTradingEnv
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport

In [3]:
def add_indicators(df):
    #add 18 new features as part of feature engineering
    df['adx'] = ta.ADX(df['High'], df['Low'], df['Close'], timeperiod=14)
    df['adx']=df['adx']/df['adx'].max()
    
    df['macd'], df['macdsignal'], df['macdhist'] = ta.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
    df['macd']=df['macd']/df['macd'].max()
    df['macdsignal']=df['macdsignal']/df['macdsignal'].max()
    df['macdhist']=df['macdhist']/df['macdhist'].max()
    
    df['minus_di'] = ta.MINUS_DI(df['High'], df['Low'], df['Close'], timeperiod=14)
    df['minus_di']=df['minus_di']/df['minus_di'].max()
    
    df['minus_dm'] = ta.MINUS_DM(df['High'], df['Low'], timeperiod=14)
    df['minus_dm']=df['minus_dm']/df['minus_dm'].max()
    
    df['plus_di'] = ta.PLUS_DI(df['High'], df['Low'], df['Close'], timeperiod=14)
    df['plus_di']=df['plus_di']/df['plus_di'].max()
    
    df['plus_dm'] = ta.PLUS_DM(df['High'], df['Low'], timeperiod=14)
    df['plus_dm']=df['plus_dm']/df['plus_dm'].max()
    
    df['rocp'] = ta.ROCP(df['Close'], timeperiod=10)
    df['rocp']=df['rocp']/df['rocp'].max()
    
    df['rsi'] = ta.RSI(df['Close'], timeperiod=14)
    df['rsi']=df['rsi']/df['rsi'].max()
    
    df['ema'] = ta.EMA(df['Close'], timeperiod=23)
    df['ema']=df['ema']/df['ema'].max()
    
    df['kama'] = ta.KAMA(df['Close'], timeperiod=20)
    df['kama']=df['kama']/df['kama'].max()
    
    df['atr'] = ta.ATR(df['High'], df['Low'], df['Close'], timeperiod=14)
    df['atr']=df['atr']/df['atr'].max()
    
    df['beta'] = ta.BETA(df['High'], df['Low'], timeperiod=5)
    df['beta']=df['beta']/df['beta'].max()
    
    df['correl'] = ta.CORREL(df['High'], df['Low'], timeperiod=30)
    df['correl']=df['correl']/df['correl'].max()
    
    df['stddev'] = ta.STDDEV(df['Close'], timeperiod=5, nbdev=1)
    df['stddev']=df['stddev']/df['stddev'].max()
    
    df['var'] = ta.VAR(df['Close'], timeperiod=5, nbdev=1)
    
    df['slope'] = ta.LINEARREG_ANGLE(df['Close'], timeperiod=14)
    df['slope']=df['slope']/df['slope'].max()
    
    df = df.iloc[50:]
    return df

In [4]:
df = pd.read_csv('./data/AAPL.csv')
df = df.sort_values('Date')
#add 18 Technical Indicators as part of Feature Engineering
df = add_indicators(df)

In [8]:
#Build envirnment for Apple Data set & then train your Reinforcement Learning Agent on it.
env = DummyVecEnv([lambda: StockTradingEnv(df)])
model = PPO2(MlpPolicy, env, verbose=1)

Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [9]:
#make your agent learn over randomized 1000 days of Apple Price Action
model.learn(total_timesteps=1000)

  self.cost_basis = (prev_cost + additional_cost) / (self.shares_held + shares_bought)


--------------------------------------
| approxkl           | 0.0008568985  |
| clipfrac           | 0.0           |
| explained_variance | -0.595        |
| fps                | 69            |
| n_updates          | 1             |
| policy_entropy     | 2.8364315     |
| policy_loss        | -0.0060669137 |
| serial_timesteps   | 128           |
| time_elapsed       | 0             |
| total_timesteps    | 128           |
| value_loss         | 0.001540688   |
--------------------------------------
--------------------------------------
| approxkl           | 8.457717e-05  |
| clipfrac           | 0.0           |
| explained_variance | -8.47         |
| fps                | 95            |
| n_updates          | 2             |
| policy_entropy     | 2.8339393     |
| policy_loss        | -0.0004092008 |
| serial_timesteps   | 256           |
| time_elapsed       | 1.84          |
| total_timesteps    | 256           |
| value_loss         | 0.0017982727  |
-------------------------

<stable_baselines.ppo2.ppo2.PPO2 at 0x2373bcf67f0>

In [21]:
#testing your agent on Apple's environment
#97% return base investment on randomly selected part of environment over 200 trading days
obs = env.reset()
for i in range(200):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    env.render()

Step: 399
Balance: 6261.647604371587
Shares held: 66 (Total sold: 0)
Avg cost for held shares: 56.641702964066866 (Total sales value: 0)
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 400
Balance: 2573.6898153441034
Shares held: 133 (Total sold: 0)
Avg cost for held shares: 55.83691868162328 (Total sales value: 0)
Net worth: 9894.561247294183 (Max net worth: 10000)
Profit: -105.43875270581702
Step: 401
Balance: 2573.6898153441034
Shares held: 133 (Total sold: 0)
Avg cost for held shares: 55.83691868162328 (Total sales value: 0)
Net worth: 9837.17242906584 (Max net worth: 10000)
Profit: -162.82757093415967
Step: 402
Balance: 2141.095822048106
Shares held: 141 (Total sold: 0)
Avg cost for held shares: 55.73690906348861 (Total sales value: 0)
Net worth: 9765.564953890054 (Max net worth: 10000)
Profit: -234.4350461099457
Step: 403
Balance: 2141.095822048106
Shares held: 141 (Total sold: 0)
Avg cost for held shares: 55.73690906348861 (Total sales value: 0)
Net worth: 9809.56004

  self.cost_basis = (prev_cost + additional_cost) / (self.shares_held + shares_bought)


Step: 439
Balance: 4729.264050342529
Shares held: 113 (Total sold: 186)
Avg cost for held shares: 59.60024348369819 (Total sales value: 11985.677593744535)
Net worth: 11457.674576659922 (Max net worth: 13744.11000341239)
Profit: 1457.6745766599215
Step: 440
Balance: 4729.264050342529
Shares held: 113 (Total sold: 186)
Avg cost for held shares: 59.60024348369819 (Total sales value: 11985.677593744535)
Net worth: 11501.345448995453 (Max net worth: 13744.11000341239)
Profit: 1501.345448995453
Step: 441
Balance: 4729.264050342529
Shares held: 113 (Total sold: 186)
Avg cost for held shares: 59.60024348369819 (Total sales value: 11985.677593744535)
Net worth: 11720.028990817938 (Max net worth: 13744.11000341239)
Profit: 1720.0289908179384
Step: 442
Balance: 4729.264050342529
Shares held: 113 (Total sold: 186)
Avg cost for held shares: 59.60024348369819 (Total sales value: 11985.677593744535)
Net worth: 11843.387846097874 (Max net worth: 13744.11000341239)
Profit: 1843.387846097874
Step: 443


Step: 482
Balance: 35.37559409596906
Shares held: 182 (Total sold: 186)
Avg cost for held shares: 62.79514269178273 (Total sales value: 11985.677593744535)
Net worth: 17888.58185642813 (Max net worth: 17888.58185642813)
Profit: 7888.581856428129
Step: 483
Balance: 35.37559409596906
Shares held: 182 (Total sold: 186)
Avg cost for held shares: 62.79514269178273 (Total sales value: 11985.677593744535)
Net worth: 18457.692240965873 (Max net worth: 18457.692240965873)
Profit: 8457.692240965873
Step: 484
Balance: 35.37559409596906
Shares held: 182 (Total sold: 186)
Avg cost for held shares: 62.79514269178273 (Total sales value: 11985.677593744535)
Net worth: 19044.06082141278 (Max net worth: 19044.06082141278)
Profit: 9044.060821412779
Step: 485
Balance: 35.37559409596906
Shares held: 182 (Total sold: 186)
Avg cost for held shares: 62.79514269178273 (Total sales value: 11985.677593744535)
Net worth: 20756.47765234294 (Max net worth: 20756.47765234294)
Profit: 10756.47765234294
Step: 486
Bala

Step: 535
Balance: 267.28662724201155
Shares held: 166 (Total sold: 424)
Avg cost for held shares: 101.29608206882719 (Total sales value: 35436.79324774556)
Net worth: 19506.09347176723 (Max net worth: 21428.26761087693)
Profit: 9506.093471767232
Step: 536
Balance: 267.28662724201155
Shares held: 166 (Total sold: 424)
Avg cost for held shares: 101.29608206882719 (Total sales value: 35436.79324774556)
Net worth: 19477.031818384603 (Max net worth: 21428.26761087693)
Profit: 9477.031818384603
Step: 537
Balance: 37.09040716856768
Shares held: 168 (Total sold: 424)
Avg cost for held shares: 101.46039192558786 (Total sales value: 35436.79324774556)
Net worth: 19373.572893337852 (Max net worth: 21428.26761087693)
Profit: 9373.572893337852
Step: 538
Balance: 37.09040716856768
Shares held: 168 (Total sold: 424)
Avg cost for held shares: 101.46039192558786 (Total sales value: 35436.79324774556)
Net worth: 19225.223044146325 (Max net worth: 21428.26761087693)
Profit: 9225.223044146325
Step: 539
B

Step: 579
Balance: 4700.938126608529
Shares held: 150 (Total sold: 592)
Avg cost for held shares: 126.19631002040066 (Total sales value: 59030.08747024562)
Net worth: 23677.032185405507 (Max net worth: 23825.276056624563)
Profit: 13677.032185405507
Step: 580
Balance: 4700.938126608529
Shares held: 150 (Total sold: 592)
Avg cost for held shares: 126.19631002040066 (Total sales value: 59030.08747024562)
Net worth: 23040.20385403938 (Max net worth: 23825.276056624563)
Profit: 13040.20385403938
Step: 581
Balance: 6.550655402872508
Shares held: 188 (Total sold: 592)
Avg cost for held shares: 125.65869135247742 (Total sales value: 59030.08747024562)
Net worth: 23231.414986630858 (Max net worth: 23825.276056624563)
Profit: 13231.414986630858
Step: 582
Balance: 6.550655402872508
Shares held: 188 (Total sold: 592)
Avg cost for held shares: 125.65869135247742 (Total sales value: 59030.08747024562)
Net worth: 22260.995326029217 (Max net worth: 23825.276056624563)
Profit: 12260.995326029217
Step: 

In [22]:
#Save Agent that learnt on Apple Environment
model.save("model/base_model")

In [23]:
##############################################Fun Part Starts here########################################3

In [25]:
#build the new env based on Google Data & then test the agent that you built on Apple Env. 
#load the data
test_df = pd.read_csv('./data/GOOG.csv')
test_df = test_df.sort_values('Date')
test_df = add_indicators(test_df)

In [27]:
#create the test env for Google Data
test_env = DummyVecEnv([lambda: StockTradingEnv(test_df)])

In [28]:
#load the model/agent that trained on Apple Data Environment
del model #delete any older model that is in memory
model = PPO2.load("model/base_model", env=test_env)

In [30]:
#transfer learning - run this step if you want your agent to learn from google's price-action. 
#dont un-comment, in case you want to see your agent perform on Google based on what it has learnt from Apple
# model.learn(total_timesteps=1000)

In [31]:
#test the agent on Google Environment
#Agent that was trained on Apples Data, performed well on Google. 10.27% return on Google over 200 trading days.
obs = test_env.reset()
for i in range(200):
    action, _states = model.predict(obs)
    obs, rewards, done, info = test_env.step(action)
    test_env.render()

Step: 1377
Balance: 2875.7330508567766
Shares held: 27 (Total sold: 0)
Avg cost for held shares: 263.8617388571564 (Total sales value: 0)
Net worth: 10000.0 (Max net worth: 10000)
Profit: 0.0
Step: 1378
Balance: 2875.7330508567766
Shares held: 27 (Total sold: 0)
Avg cost for held shares: 263.8617388571564 (Total sales value: 0)
Net worth: 10039.292887473122 (Max net worth: 10039.292887473122)
Profit: 39.29288747312239
Step: 1379
Balance: 2875.7330508567766
Shares held: 27 (Total sold: 0)
Avg cost for held shares: 263.8617388571564 (Total sales value: 0)
Net worth: 10112.42869211335 (Max net worth: 10112.42869211335)
Profit: 112.42869211334983
Step: 1380
Balance: 2875.7330508567766
Shares held: 27 (Total sold: 0)
Avg cost for held shares: 263.8617388571564 (Total sales value: 0)
Net worth: 10058.91765352727 (Max net worth: 10112.42869211335)
Profit: 58.917653527270886
Step: 1381
Balance: 2875.7330508567766
Shares held: 27 (Total sold: 0)
Avg cost for held shares: 263.8617388571564 (Tota

Step: 1417
Balance: 156.18757122219176
Shares held: 37 (Total sold: 16)
Avg cost for held shares: 267.8185134402578 (Total sales value: 4309.290742149865)
Net worth: 10635.54949599697 (Max net worth: 10937.356467640642)
Profit: 635.5494959969692
Step: 1418
Balance: 156.18757122219176
Shares held: 37 (Total sold: 16)
Avg cost for held shares: 267.8185134402578 (Total sales value: 4309.290742149865)
Net worth: 10564.14694373782 (Max net worth: 10937.356467640642)
Profit: 564.14694373782
Step: 1419
Balance: 156.18757122219176
Shares held: 37 (Total sold: 16)
Avg cost for held shares: 267.8185134402578 (Total sales value: 4309.290742149865)
Net worth: 10545.039313580504 (Max net worth: 10937.356467640642)
Profit: 545.0393135805043
Step: 1420
Balance: 156.18757122219176
Shares held: 37 (Total sold: 16)
Avg cost for held shares: 267.8185134402578 (Total sales value: 4309.290742149865)
Net worth: 10615.176195719223 (Max net worth: 10937.356467640642)
Profit: 615.176195719223
Step: 1421
Balanc

  self.cost_basis = (prev_cost + additional_cost) / (self.shares_held + shares_bought)


Step: 1438
Balance: 9907.564950255448
Shares held: 0 (Total sold: 53)
Avg cost for held shares: 0 (Total sales value: 14060.66812118312)
Net worth: 9907.564950255448 (Max net worth: 11080.141122784064)
Profit: -92.43504974455209
Step: 1439
Balance: 103.24076361098923
Shares held: 39 (Total sold: 53)
Avg cost for held shares: 251.39292786267842 (Total sales value: 14060.66812118312)
Net worth: 9907.564950255448 (Max net worth: 11080.141122784064)
Profit: -92.43504974455209
Step: 1440
Balance: 9690.45672747733
Shares held: 0 (Total sold: 92)
Avg cost for held shares: 0 (Total sales value: 23647.88408504946)
Net worth: 9690.45672747733 (Max net worth: 11080.141122784064)
Profit: -309.5432725226692
Step: 1441
Balance: 9690.45672747733
Shares held: 0 (Total sold: 92)
Avg cost for held shares: 0 (Total sales value: 23647.88408504946)
Net worth: 9690.45672747733 (Max net worth: 11080.141122784064)
Profit: -309.5432725226692
Step: 1442
Balance: 3603.488914494227
Shares held: 24 (Total sold: 92

Step: 1480
Balance: 184.50725562249045
Shares held: 38 (Total sold: 128)
Avg cost for held shares: 248.5156282889114 (Total sales value: 32717.57989406405)
Net worth: 8569.410109599765 (Max net worth: 11080.141122784064)
Profit: -1430.5898904002352
Step: 1481
Balance: 184.50725562249045
Shares held: 38 (Total sold: 128)
Avg cost for held shares: 248.5156282889114 (Total sales value: 32717.57989406405)
Net worth: 8639.883647988638 (Max net worth: 11080.141122784064)
Profit: -1360.1163520113623
Step: 1482
Balance: 184.50725562249045
Shares held: 38 (Total sold: 128)
Avg cost for held shares: 248.5156282889114 (Total sales value: 32717.57989406405)
Net worth: 8781.91775505168 (Max net worth: 11080.141122784064)
Profit: -1218.0822449483203
Step: 1483
Balance: 184.50725562249045
Shares held: 38 (Total sold: 128)
Avg cost for held shares: 248.5156282889114 (Total sales value: 32717.57989406405)
Net worth: 9058.953290196296 (Max net worth: 11080.141122784064)
Profit: -941.0467098037043
Step: 

Step: 1523
Balance: 17.059402669945825
Shares held: 38 (Total sold: 224)
Avg cost for held shares: 244.89097204153907 (Total sales value: 56047.56819757664)
Net worth: 8919.482433329555 (Max net worth: 11080.141122784064)
Profit: -1080.5175666704454
Step: 1524
Balance: 17.059402669945825
Shares held: 38 (Total sold: 224)
Avg cost for held shares: 244.89097204153907 (Total sales value: 56047.56819757664)
Net worth: 8809.088635419059 (Max net worth: 11080.141122784064)
Profit: -1190.9113645809412
Step: 1525
Balance: 17.059402669945825
Shares held: 38 (Total sold: 224)
Avg cost for held shares: 244.89097204153907 (Total sales value: 56047.56819757664)
Net worth: 8825.232367576378 (Max net worth: 11080.141122784064)
Profit: -1174.7676324236218
Step: 1526
Balance: 17.059402669945825
Shares held: 38 (Total sold: 224)
Avg cost for held shares: 244.89097204153907 (Total sales value: 56047.56819757664)
Net worth: 9049.45318524206 (Max net worth: 11080.141122784064)
Profit: -950.5468147579395
St

Step: 1569
Balance: 101.11126423858514
Shares held: 37 (Total sold: 267)
Avg cost for held shares: 257.1204722234759 (Total sales value: 66859.69646592243)
Net worth: 11697.448968381721 (Max net worth: 11697.448968381721)
Profit: 1697.448968381721
Step: 1570
Balance: 101.11126423858514
Shares held: 37 (Total sold: 267)
Avg cost for held shares: 257.1204722234759 (Total sales value: 66859.69646592243)
Net worth: 11574.06681415353 (Max net worth: 11697.448968381721)
Profit: 1574.0668141535298
Step: 1571
Balance: 101.11126423858514
Shares held: 37 (Total sold: 267)
Avg cost for held shares: 257.1204722234759 (Total sales value: 66859.69646592243)
Net worth: 11485.738841186847 (Max net worth: 11697.448968381721)
Profit: 1485.738841186847
Step: 1572
Balance: 101.11126423858514
Shares held: 37 (Total sold: 267)
Avg cost for held shares: 257.1204722234759 (Total sales value: 66859.69646592243)
Net worth: 11403.021018766398 (Max net worth: 11697.448968381721)
Profit: 1403.021018766398
Step: 15