In [1]:
import pandas as pd
import numpy as np
import random
from collections import deque
import json
import talib
import gym
from gym import spaces
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()
import warnings
warnings.filterwarnings('ignore')

import plotly.offline as pyo
import plotly.graph_objects as go
from plotly.subplots import make_subplots

pyo.init_notebook_mode(connected=True)

import tensorflow as tf
tf.get_logger().setLevel('ERROR')

class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['cli', 'gui']}

    def preprocess_data(self, df):
        self.df_orig=df.copy(deep=True)
        logScale = pd.DataFrame(np.log(df.Close))
        df["CloseLogDiff"] = logScale - logScale.shift()
        df['Return'] = df.Close.pct_change()
        df['Return_5'] = df.Close.pct_change(5)
        df['Return_10'] = df.Close.pct_change(10)
        df['Return_15'] = df.Close.pct_change(15)
        df['RSI'] = talib.STOCHRSI(df.Close)[1]
        df['MACD'] = talib.MACD(df.Close)[1]
        df['ATR'] = talib.ATR(df.High, df.Low, df.Close)
        slowk, slowd = talib.STOCH(df.High, df.Low, df.Close)
        df['STOCH'] = slowd - slowk
        df['ULTOSC'] = talib.ULTOSC(df.High, df.Low, df.Close)
        df.fillna(method='bfill', inplace=True)
        return df
    
    def __init__(self, df, initial_balance=10000, lookback_window_size=30):
        # Define action space and state size and other custom parameters
        self.df = self.preprocess_data(df)
        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df)-1
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size

        # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell
        self.action_space = spaces.Box(low=np.array([0,0]), high=np.array([3,1]), dtype=np.float16)

        # Orders history contains the balance, net_worth, stock_bought, stock_sold, stock_held values for the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)
        
        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        # State size contains Market+Orders history for the last lookback_window_size steps
        self.state_size = (self.lookback_window_size, 10)

        self.observation_space = spaces.Box(low=0, high=1, shape=(lookback_window_size,15), dtype=np.float16)
        
    # Reset the state of the environment to an initial state
    def reset(self, env_steps_size = 201):
        self.display_reward = 0
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.stock_held = 0
        self.stock_sold = 0
        self.stock_bought = 0
        
        if env_steps_size > 0: # used for training dataset
            self.start_step = random.randint(self.lookback_window_size, self.df_total_steps - env_steps_size - 1)
            self.end_step = self.start_step + env_steps_size
        
        else: # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps
            
        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([self.balance, self.net_worth, self.stock_bought, self.stock_sold, self.stock_held])
            self.market_history.append([self.df.loc[current_step, 'CloseLogDiff'],
                                        self.df.loc[current_step, 'Return'],
                                        self.df.loc[current_step, 'Return_5'],
                                        self.df.loc[current_step, 'Return_10'],
                                        self.df.loc[current_step, 'Return_15'],
                                        self.df.loc[current_step, 'RSI'],
                                        self.df.loc[current_step, 'MACD'],
                                        self.df.loc[current_step, 'ATR'],
                                        self.df.loc[current_step, 'STOCH'],
                                        self.df.loc[current_step, 'ULTOSC']
                                        ])

        state = np.concatenate((self.market_history, self.orders_history), axis=1)
        self.trades = []
        return state

    # Get the data points for the given current_step
    def _next_observation(self):
        self.market_history.append([self.df.loc[self.current_step, 'CloseLogDiff'],
                                    self.df.loc[self.current_step, 'Return'],
                                    self.df.loc[self.current_step, 'Return_5'],
                                    self.df.loc[self.current_step, 'Return_10'],
                                    self.df.loc[self.current_step, 'Return_15'],
                                    self.df.loc[self.current_step, 'RSI'],
                                    self.df.loc[self.current_step, 'MACD'],
                                    self.df.loc[self.current_step, 'ATR'],
                                    self.df.loc[self.current_step, 'STOCH'],
                                    self.df.loc[self.current_step, 'ULTOSC'],
                                    ])
        obs = np.concatenate((self.market_history, self.orders_history), axis=1)
        return obs

    # Execute one time step within the environment
    def step(self, action):
        self.stock_bought = 0
        self.stock_sold = 0
        self.current_step += 1

        action_type = action[0]
        action_percent = action[1]
        
        # Set the current price to a random price between open and close
        current_price = random.uniform(
            self.df.loc[self.current_step, 'Open'],
            self.df.loc[self.current_step, 'Close'])
        
        if action_type < 1: # Hold

            self.trades.append({'Date': str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                                'Open': self.df_orig.Open[self.current_step],
                                'High': self.df_orig.High[self.current_step],
                                'Low': self.df_orig.Low[self.current_step],
                                'Close': self.df_orig.Close[self.current_step],
                                'Adj Close': self.df_orig['Adj Close'][self.current_step],
                                'Volume': self.df_orig.Volume[self.current_step],
                                'Net Worth': self.balance + self.stock_held * current_price,
                                'Current Price': current_price,
                                'Shares': self.stock_held,
                                'Type': "Hold"})

        elif action_type < 2 and int((self.balance * action_percent) / current_price) >= 1:
            # Buy with % of current balance
            self.stock_bought = int((self.balance * action_percent) / current_price)
            self.balance -= self.stock_bought * current_price
            self.stock_held += self.stock_bought
                        
            self.trades.append({'Date': str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                                'Open': self.df_orig.Open[self.current_step],
                                'High': self.df_orig.High[self.current_step],
                                'Low': self.df_orig.Low[self.current_step],
                                'Close': self.df_orig.Close[self.current_step],
                                'Adj Close': self.df_orig['Adj Close'][self.current_step],
                                'Volume': self.df_orig.Volume[self.current_step],
                                'Net Worth': self.balance + self.stock_held * current_price,
                                'Current Price': current_price,
                                'Shares': self.stock_bought,
                                'Type': "Buy"})

        elif action_type < 3 and int(self.stock_held * action_percent) >= 1:
            # Sell % of current stock held
            self.stock_sold = int(self.stock_held * action_percent)
            self.balance += self.stock_sold * current_price
            self.stock_held -= self.stock_sold

            self.trades.append({'Date': str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                                'Open': self.df_orig.Open[self.current_step],
                                'High': self.df_orig.High[self.current_step],
                                'Low': self.df_orig.Low[self.current_step],
                                'Close': self.df_orig.Close[self.current_step],
                                'Adj Close': self.df_orig['Adj Close'][self.current_step],
                                'Volume': self.df_orig.Volume[self.current_step],
                                'Net Worth': self.balance + self.stock_held * current_price,
                                'Current Price': current_price,
                                'Shares': self.stock_sold,
                                'Type': "Sell"})

            
        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.stock_held * current_price

        self.orders_history.append([self.balance, self.net_worth, self.stock_bought, self.stock_sold, self.stock_held])

        # Calculate reward
        reward = int((self.net_worth - self.initial_balance))
        self.display_reward = reward
        
        if self.net_worth <= self.initial_balance/2:
            done = True
        elif self.current_step > self.end_step:
            done = True
        else:
            done = False

        obs = self._next_observation()

        info = {'Step': self.current_step, 'Trades' : self.trades}
    
        return obs, reward, done, info

    # render environment
    def render(self, mode='cli'):
        if mode == 'cli':
            self.profit = self.net_worth - self.initial_balance
            print('Step:%5d, Date: %10s, Reward:%4d, Stock [Held: %3d, Bought: %3d, Sold :%3d], NetWorth: %5.2f, Profit: %5.2f' % 
                      (self.current_step, str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                       self.display_reward, self.stock_held, self.stock_bought, self.stock_sold, self.net_worth, self.profit))
        if mode == 'gui':
            pass


In [2]:
# Change the working directory to the currect path (of .ipynb and csv)
import os
import sys
import math 
os.chdir(sys.path[0])  
df = pd.read_csv('./KAJARIACER.csv')
df = df.sort_values('Date')

In [3]:
df['Date'] = pd.to_datetime(df['Date']) # Convert "Date" to datetime object.
df.sort_values('Date', ascending=True, inplace=True) # Sort the "Date" Column by the values.
df.set_index('Date', inplace=True) # Set the DataFrame index using "Date" Column.

In [4]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,26.049999,28.200001,26.049999,27.0,23.964306,877408
2010-01-05,27.125,28.200001,26.875,27.125,24.075254,508398
2010-01-06,27.5,28.4,26.625,26.825001,23.808989,402216
2010-01-07,27.25,27.549999,26.85,27.1,24.053064,478980
2010-01-08,27.25,28.025,26.525,26.700001,23.69804,366642


In [5]:
def Random_games(env):
    env.reset()
    for _ in range(10):
        env.render()
        action=env.action_space.sample()
#         print(action)
        env.step(action) # take a random action
    env.close()

In [6]:
import talib
import pandas as pd


diff = lambda x, y: x - y
abs_diff = lambda x, y: abs(x - y)


indicators = [
    ('RSI', talib.STOCHRSI, ['Close'])    
]


def add_indicators(df) -> pd.DataFrame:
    for name, f, arg_names in indicators:
        wrapper = lambda func, args: func(*args)
        args = [df[arg_name] for arg_name in arg_names]
        df[name] = wrapper(f, args)
    df.fillna(method='bfill', inplace=True)
    return df

In [7]:
import gym
import json
import datetime as dt
from stable_baselines.common.policies import MlpLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
# from StockTradingEnv import StockTradingEnv

In [8]:
# Train data = 70%, Validation data = 20%, Test data = 10%  
train_range = math.floor(len(df.index)*.7)
val_range = math.floor(len(df.index)*.2)
test_range = math.floor(len(df.index)*.1)

# Train DataFrame
train_df = df[:train_range].sort_index()

# Validation DataFrame
val_df = df[train_range:train_range + val_range].sort_index()

# Test DataFrame
test_df = df[(train_range + val_range):(train_range + val_range + test_range)].sort_index()

In [9]:
import optuna
from optuna import Trial, visualization

from optuna.samplers import TPESampler

In [10]:
def optimize_ppo2_lstm(trial):
    return {
        'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
        'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
        'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
        'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
        'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
        'lam': trial.suggest_uniform('lam', 0.8, 1.)
    }

In [11]:
def objective_ppo2_lstm(trial):
    agent_params = optimize_ppo2_lstm(trial)
    
    train_env = DummyVecEnv([lambda: StockTradingEnv(train_df)])
    model = PPO2(MlpLstmPolicy, train_env, nminibatches=1, **agent_params,  tensorboard_log="./tensorboard/PPO2_LSTM")
    
    model.learn(total_timesteps=len(train_df))
    
    rewards, done = [], False

    val_env = DummyVecEnv([lambda: StockTradingEnv(val_df)])
    obs = val_env.reset()
    
    for i in range(len(val_df)):
        action, _states = model.predict(obs, deterministic=False)
        obs, reward, done, info = val_env.step(action)
        rewards.append(reward)
        
    return -np.mean(rewards)

In [12]:
optuna.delete_study(study_name='PPO2_LSTM_optimize_profit', storage='sqlite:///params.db')
study_ppo2_lstm = optuna.create_study(study_name='PPO2_LSTM_optimize_profit', storage='sqlite:///params.db', 
                            load_if_exists=True, sampler=TPESampler())
study_ppo2_lstm.optimize(objective_ppo2_lstm, n_trials=40, n_jobs=4, show_progress_bar = True)

[32m[I 2022-04-10 14:18:45,217][0m A new study created in RDB with name: PPO2_LSTM_optimize_profit[0m
[32m[I 2022-04-10 14:19:06,184][0m Trial 2 finished with value: -0.0 and parameters: {'n_steps': 49.14833078198954, 'gamma': 0.9311993662930048, 'learning_rate': 0.29047220081481995, 'ent_coef': 0.008321034492867743, 'cliprange': 0.1527129039097868, 'noptepochs': 3.551938924623743, 'lam': 0.9064882768879099}. Best is trial 2 with value: 0.0.[0m
[32m[I 2022-04-10 14:19:11,836][0m Trial 1 finished with value: 275.64227294921875 and parameters: {'n_steps': 26.45964920429414, 'gamma': 0.9266622398623874, 'learning_rate': 0.0030813257144308866, 'ent_coef': 0.00058395361123539, 'cliprange': 0.17019513533685288, 'noptepochs': 7.997326512272801, 'lam': 0.9805056192794488}. Best is trial 2 with value: 0.0.[0m
[32m[I 2022-04-10 14:19:21,764][0m Trial 0 finished with value: -0.0 and parameters: {'n_steps': 105.53339662629706, 'gamma': 0.9793825196002992, 'learning_rate': 0.311224926591

[32m[I 2022-04-10 14:30:29,529][0m Trial 21 finished with value: 1436.554931640625 and parameters: {'n_steps': 2029.6461808522065, 'gamma': 0.9636572264816279, 'learning_rate': 0.00012631553559658106, 'ent_coef': 1.6677922299340595e-06, 'cliprange': 0.3218861170875468, 'noptepochs': 2.168086130271857, 'lam': 0.9491466484299607}. Best is trial 16 with value: -497.1666564941406.[0m
[32m[I 2022-04-10 14:30:32,635][0m Trial 20 finished with value: 338.7520446777344 and parameters: {'n_steps': 1747.1329132231456, 'gamma': 0.9590177100290107, 'learning_rate': 0.0001096893335924526, 'ent_coef': 3.323661338030736e-07, 'cliprange': 0.3012402786936245, 'noptepochs': 1.0095682385016438, 'lam': 0.9467548627401816}. Best is trial 16 with value: -497.1666564941406.[0m
[32m[I 2022-04-10 14:33:03,103][0m Trial 24 finished with value: 1118.8028564453125 and parameters: {'n_steps': 1106.5120492847723, 'gamma': 0.9973880160486096, 'learning_rate': 1.084662973169164e-05, 'ent_coef': 1.927617664098

In [13]:
study_ppo2_lstm.best_params

{'cliprange': 0.24124006250924698,
 'ent_coef': 8.093090661092379e-07,
 'gamma': 0.9933352367035166,
 'lam': 0.9361961631390628,
 'learning_rate': 2.0429228294860593e-05,
 'n_steps': 1500.3809700861657,
 'noptepochs': 8.41902312546633}

In [14]:
optuna.visualization.plot_optimization_history(study_ppo2_lstm)

In [15]:
optuna.visualization.plot_slice(study_ppo2_lstm)

In [16]:
study = optuna.load_study(study_name='PPO2_LSTM_optimize_profit', storage='sqlite:///params.db')
params = study.best_trial.params

train_env = DummyVecEnv([lambda: StockTradingEnv(train_df)])

model_params = {
    'n_steps': int(params['n_steps']),
    'gamma': params['gamma'],
    'learning_rate': params['learning_rate'],
    'ent_coef': params['ent_coef'],
    'cliprange': params['cliprange'],
    'noptepochs': int(params['noptepochs']),
    'lam': params['lam']
}

model = PPO2(MlpLstmPolicy, train_env, nminibatches=1, **model_params)

In [17]:
model.learn(total_timesteps=100000)

<stable_baselines.ppo2.ppo2.PPO2 at 0x28dd3ea48c8>

In [18]:
test_env = DummyVecEnv([lambda: StockTradingEnv(test_df)])

In [19]:
obs = test_env.reset()

for i in range(200):
    action, _states = model.predict(obs, deterministic=False)
    obs, reward, done, info = test_env.step(action)
    test_env.render(mode='cli')
    if done:
        obs = test_env.reset()
        break

Step:   33, Date: 2019-02-08 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   34, Date: 2019-02-11 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   35, Date: 2019-02-12 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   36, Date: 2019-02-14 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   37, Date: 2019-02-15 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   38, Date: 2019-02-18 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   39, Date: 2019-02-19 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   40, Date: 2019-02-20 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   41, Date: 2019-02-21 , R

Step:  118, Date: 2019-06-20 , Reward: 398, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10398.53, Profit: 398.53
Step:  119, Date: 2019-06-21 , Reward: 389, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10389.57, Profit: 389.57
Step:  120, Date: 2019-06-24 , Reward: 311, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10311.42, Profit: 311.42
Step:  121, Date: 2019-06-25 , Reward: 318, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10318.68, Profit: 318.68
Step:  122, Date: 2019-06-26 , Reward: 328, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10328.14, Profit: 328.14
Step:  123, Date: 2019-06-27 , Reward: 366, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10366.23, Profit: 366.23
Step:  124, Date: 2019-06-28 , Reward: 355, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10355.37, Profit: 355.37
Step:  125, Date: 2019-07-01 , Reward: 355, Stock [Held:   5, Bought:   0, Sold :  0], NetWorth: 10356.00, Profit: 356.00
Step:  126, Date: 2019-0

Step:  212, Date: 2019-11-11 , Reward:-225, Stock [Held:  15, Bought:   0, Sold :  0], NetWorth: 9774.91, Profit: -225.09
Step:  213, Date: 2019-11-13 , Reward:-355, Stock [Held:  15, Bought:   0, Sold :  0], NetWorth: 9644.69, Profit: -355.31
Step:  214, Date: 2019-11-14 , Reward:-380, Stock [Held:  15, Bought:   0, Sold :  0], NetWorth: 9619.04, Profit: -380.96
Step:  215, Date: 2019-11-15 , Reward:-306, Stock [Held:  15, Bought:   0, Sold :  0], NetWorth: 9693.05, Profit: -306.95
Step:  216, Date: 2019-11-18 , Reward:-426, Stock [Held:  15, Bought:   0, Sold :  0], NetWorth: 9573.70, Profit: -426.30
Step:  217, Date: 2019-11-19 , Reward:-372, Stock [Held:  15, Bought:   0, Sold :  0], NetWorth: 9627.32, Profit: -372.68
Step:  218, Date: 2019-11-20 , Reward:-349, Stock [Held:   0, Bought:   0, Sold : 15], NetWorth: 9650.74, Profit: -349.26
Step:  219, Date: 2019-11-21 , Reward:-349, Stock [Held:  13, Bought:  13, Sold :  0], NetWorth: 9650.74, Profit: -349.26
Step:  220, Date: 2019-1

In [20]:
trades = pd.DataFrame(info[0].get('Trades'))
trades['Date'] = pd.to_datetime(trades['Date']) # Convert "Date" to datetime object.
df.sort_values('Date', ascending=True, inplace=True) # Sort the "Date" Column by the values.

In [21]:
def display_trades(df):
    
    # Convert the column names from mixcase to lower case
    df.columns = map(str.lower, df.columns)
    
    # Fix date
    if type(df.index) != pd.core.indexes.datetimes.DatetimeIndex:
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date',inplace=True) # Set the Date column as the index
        df = df.sort_values(['date'])
    
     # Copy the dataframe
    tempdf=df.copy(deep=True)
    
    # Calculate profit
    
    profit = round(tempdf.iloc[-1]['net worth'] - tempdf.iloc[1]['net worth'], 2)
    profit_prcnt = round(((tempdf.iloc[-1]['net worth'] - tempdf.iloc[1]['net worth']) / tempdf.iloc[1]['net worth']) * 100 , 2)
    
    trade_buy = tempdf['type'][tempdf['type']=='Buy'].count()
    trade_sell = tempdf['type'][tempdf['type']=='Sell'].count()
    trade_hold = tempdf['type'][tempdf['type']=='Hold'].count()
    
    title_1 = 'RL Algorithm: PPO2_LSTM' + ' - ' + 'Trade Count ' + '[' + 'Buy: ' + str(trade_buy) + ' | ' + 'Sell: ' + str(trade_sell) + ' | ' + 'Hold: ' + str(trade_hold) + ']'
    title_2 = 'Initial Investment (INR): ' + str(round(tempdf.iloc[1]['net worth'],2)) + ' | ' +  'Net Worth (INR): ' + str(round(tempdf.iloc[-1]['net worth'],2)) + ' | ' + 'Profit (INR): ' + str(profit) + ' | ' + 'Profit (%): ' + str(profit_prcnt)+ '%'
    
    
    # Buy sell marker
    d=10
    tempdf[['marker', 'symbol', 'color']] = ''
    for i in range(0, len(tempdf)):
        if tempdf['type'].iloc[i] == 'Buy':
            tempdf['marker'].iloc[i] = tempdf["low"].iloc[i]-d
            tempdf['symbol'].iloc[i] = 'triangle-up'
            tempdf['color'].iloc[i] = 'green'
            
        elif tempdf['type'].iloc[i] == 'Sell':
            tempdf['marker'].iloc[i] = tempdf["high"].iloc[i]+d
            tempdf['symbol'].iloc[i] = 'triangle-down'
            tempdf['color'].iloc[i] = 'red'
        
        else:
            tempdf['marker'].iloc[i] = tempdf["low"].iloc[i]-d
            tempdf['symbol'].iloc[i] = 'cross-thin'
            tempdf['color'].iloc[i] = 'blue' 

    
    # Draw plot
    fig = make_subplots(rows=2, cols=1, 
                    shared_xaxes=True, 
                    row_width=[.25,.75], 
                    vertical_spacing=0.05,
                   subplot_titles=(title_1, title_2))

   
    fig.add_trace(go.Scatter(x=tempdf.index,
                             y=tempdf.marker,
                             mode='markers',
                             name='Buy/Sell/Hold',
                             marker=dict(size=15,symbol=tempdf["symbol"],color=tempdf["color"])),
                   row=1, col=1)
                  
    fig.add_trace(go.Candlestick(x=tempdf.index, 
                                 open=tempdf['open'], 
                                 high=tempdf['high'], 
                                 low=tempdf['low'], 
                                 close=tempdf['close'],
                                 name='OHLC'),
                  row=1, col=1)
   
                  
    fig.add_trace(go.Scatter(x=tempdf.index,
                             y=tempdf['net worth'],
                             mode='lines',
                             name='Net Worth (INR)'),
                  row=2, col=1)
    
    fig.add_hline(y=10000,
                  name='Initial Balance',
                  line=dict(color='firebrick', width=1, dash='dot'),
                  row=2, col=1)

    fig.update_layout(##title='Historical stock price chart',
                      xaxis_tickfont_size=12,
                      yaxis=dict(title='Price (INR/share)',
                                 titlefont_size=14,
                                 tickfont_size=10),
                      xaxis_rangeslider_visible=False,
                      width=1000,height=600, margin=dict(l=50, r=50, b=100, t=100, pad=4),
                      template='none')

    fig.show()

In [22]:
display_trades(trades)

In [23]:
# # %load_ext tensorboard
# %reload_ext tensorboard
# %tensorboard \
#     --host localhost \
#     --port 8080\
#     --logdir ".\\tensorboard\\PPO2_LSTM"

To view: http://localhost:8080/ <br>
To kill: `taskkill /IM "tensorboard.exe" /F`