In [1]:
import pandas as pd
import numpy as np
import random
from collections import deque
import json
import talib
import gym
from gym import spaces
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()
import warnings
warnings.filterwarnings('ignore')

import plotly.offline as pyo
import plotly.graph_objects as go
from plotly.subplots import make_subplots

pyo.init_notebook_mode(connected=True)

import tensorflow as tf
tf.get_logger().setLevel('ERROR')

class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['cli', 'gui']}

    def preprocess_data(self, df):
        self.df_orig=df.copy(deep=True)
        logScale = pd.DataFrame(np.log(df.Close))
        df["CloseLogDiff"] = logScale - logScale.shift()
        df['Return_5'] = df.Close.pct_change(5)
        df['MACD'] = talib.MACD(df.Close)[1]
        df['ATR'] = talib.ATR(df.High, df.Low, df.Close)
        slowk, slowd = talib.STOCH(df.High, df.Low, df.Close)
        df['STOCH'] = slowd - slowk
        df.fillna(method='bfill', inplace=True)
        return df
    
    def __init__(self, df, initial_balance=10000, lookback_window_size=30):
        # Define action space and state size and other custom parameters
        self.df = self.preprocess_data(df)
        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df)-1
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size

        # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell
        self.action_space = spaces.Box(low=np.array([0,0]), high=np.array([3,1]), dtype=np.float16)

        # Orders history contains the balance, net_worth, stock_bought, stock_sold, stock_held values for the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)
        
        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        # State size contains Market+Orders history for the last lookback_window_size steps
        self.state_size = (self.lookback_window_size, 5)

        self.observation_space = spaces.Box(low=0, high=1, shape=(lookback_window_size,10), dtype=np.float16)
        
    # Reset the state of the environment to an initial state
    def reset(self, env_steps_size = 201):
        self.display_reward = 0
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.stock_held = 0
        self.stock_sold = 0
        self.stock_bought = 0
        
        if env_steps_size > 0: # used for training dataset
            self.start_step = random.randint(self.lookback_window_size, self.df_total_steps - env_steps_size - 1)
            self.end_step = self.start_step + env_steps_size
        
        else: # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps
            
        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([self.balance, self.net_worth, self.stock_bought, self.stock_sold, self.stock_held])
            self.market_history.append([self.df.loc[current_step, 'CloseLogDiff'],
                                        self.df.loc[current_step, 'Return_5'],
                                        self.df.loc[current_step, 'MACD'],
                                        self.df.loc[current_step, 'ATR'],
                                        self.df.loc[current_step, 'STOCH']
                                        ])

        state = np.concatenate((self.market_history, self.orders_history), axis=1)
        self.trades = []
        return state

    # Get the data points for the given current_step
    def _next_observation(self):
        self.market_history.append([self.df.loc[self.current_step, 'CloseLogDiff'],
                                        self.df.loc[self.current_step, 'Return_5'],
                                        self.df.loc[self.current_step, 'MACD'],
                                        self.df.loc[self.current_step, 'ATR'],
                                        self.df.loc[self.current_step, 'STOCH']
                                        ])
        obs = np.concatenate((self.market_history, self.orders_history), axis=1)
        return obs

    # Execute one time step within the environment
    def step(self, action):
        self.stock_bought = 0
        self.stock_sold = 0
        self.current_step += 1

        action_type = action[0]
        action_percent = action[1]
        
        # Set the current price to a random price between open and close
        current_price = random.uniform(
            self.df.loc[self.current_step, 'Open'],
            self.df.loc[self.current_step, 'Close'])
        
        if action_type < 1: # Hold

            self.trades.append({'Date': str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                                'Open': self.df_orig.Open[self.current_step],
                                'High': self.df_orig.High[self.current_step],
                                'Low': self.df_orig.Low[self.current_step],
                                'Close': self.df_orig.Close[self.current_step],
                                'Adj Close': self.df_orig['Adj Close'][self.current_step],
                                'Volume': self.df_orig.Volume[self.current_step],
                                'Net Worth': self.balance + self.stock_held * current_price,
                                'Current Price': current_price,
                                'Shares': self.stock_held,
                                'Type': "Hold"})

        elif action_type < 2 and int((self.balance * action_percent) / current_price) >= 1:
            # Buy with % of current balance
            self.stock_bought = int((self.balance * action_percent) / current_price)
            self.balance -= self.stock_bought * current_price
            self.stock_held += self.stock_bought
                        
            self.trades.append({'Date': str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                                'Open': self.df_orig.Open[self.current_step],
                                'High': self.df_orig.High[self.current_step],
                                'Low': self.df_orig.Low[self.current_step],
                                'Close': self.df_orig.Close[self.current_step],
                                'Adj Close': self.df_orig['Adj Close'][self.current_step],
                                'Volume': self.df_orig.Volume[self.current_step],
                                'Net Worth': self.balance + self.stock_held * current_price,
                                'Current Price': current_price,
                                'Shares': self.stock_bought,
                                'Type': "Buy"})

        elif action_type < 3 and int(self.stock_held * action_percent) >= 1:
            # Sell % of current stock held
            self.stock_sold = int(self.stock_held * action_percent)
            self.balance += self.stock_sold * current_price
            self.stock_held -= self.stock_sold

            self.trades.append({'Date': str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                                'Open': self.df_orig.Open[self.current_step],
                                'High': self.df_orig.High[self.current_step],
                                'Low': self.df_orig.Low[self.current_step],
                                'Close': self.df_orig.Close[self.current_step],
                                'Adj Close': self.df_orig['Adj Close'][self.current_step],
                                'Volume': self.df_orig.Volume[self.current_step],
                                'Net Worth': self.balance + self.stock_held * current_price,
                                'Current Price': current_price,
                                'Shares': self.stock_sold,
                                'Type': "Sell"})

            
        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.stock_held * current_price

        self.orders_history.append([self.balance, self.net_worth, self.stock_bought, self.stock_sold, self.stock_held])

        # Calculate reward
        reward = int((self.net_worth - self.initial_balance))
        self.display_reward = reward
        
        if self.net_worth <= self.initial_balance/2:
            done = True
        elif self.current_step > self.end_step:
            done = True
        else:
            done = False


        obs = self._next_observation()

        info = {'Step': self.current_step, 'Trades' : self.trades}
    
        return obs, reward, done, info

    # render environment
    def render(self, mode='cli'):
        if mode == 'cli':
            self.profit = self.net_worth - self.initial_balance
            print('Step:%5d, Date: %10s, Reward:%4d, Stock [Held: %3d, Bought: %3d, Sold :%3d], NetWorth: %5.2f, Profit: %5.2f' % 
                      (self.current_step, str(self.df_orig.index[self.current_step]).rstrip("00:00:00"),
                       self.display_reward, self.stock_held, self.stock_bought, self.stock_sold, self.net_worth, self.profit))
        if mode == 'gui':
            pass


In [2]:
# Change the working directory to the currect path (of .ipynb and csv)
import os
import sys
import math 
os.chdir(sys.path[0])  
df = pd.read_csv('./KAJARIACER.csv')
df = df.sort_values('Date')

In [3]:
df['Date'] = pd.to_datetime(df['Date']) # Convert "Date" to datetime object.
df.sort_values('Date', ascending=True, inplace=True) # Sort the "Date" Column by the values.
df.set_index('Date', inplace=True) # Set the DataFrame index using "Date" Column.

In [4]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,26.049999,28.200001,26.049999,27.0,23.964306,877408
2010-01-05,27.125,28.200001,26.875,27.125,24.075254,508398
2010-01-06,27.5,28.4,26.625,26.825001,23.808989,402216
2010-01-07,27.25,27.549999,26.85,27.1,24.053064,478980
2010-01-08,27.25,28.025,26.525,26.700001,23.69804,366642


In [5]:
def Random_games(env):
    env.reset()
    for _ in range(10):
        env.render()
        action=env.action_space.sample()
#         print(action)
        env.step(action) # take a random action
    env.close()

In [6]:
import talib
import pandas as pd


diff = lambda x, y: x - y
abs_diff = lambda x, y: abs(x - y)


indicators = [
    ('RSI', talib.STOCHRSI, ['Close'])    
]


def add_indicators(df) -> pd.DataFrame:
    for name, f, arg_names in indicators:
        wrapper = lambda func, args: func(*args)
        args = [df[arg_name] for arg_name in arg_names]
        df[name] = wrapper(f, args)
    df.fillna(method='bfill', inplace=True)
    return df

In [7]:
import gym
import json
import datetime as dt
from stable_baselines.common.policies import MlpLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
# from StockTradingEnv import StockTradingEnv

In [8]:
# Train data = 70%, Validation data = 20%, Test data = 10%  
train_range = math.floor(len(df.index)*.7)
val_range = math.floor(len(df.index)*.2)
test_range = math.floor(len(df.index)*.1)

# Train DataFrame
train_df = df[:train_range].sort_index()

# Validation DataFrame
val_df = df[train_range:train_range + val_range].sort_index()

# Test DataFrame
test_df = df[(train_range + val_range):(train_range + val_range + test_range)].sort_index()

In [9]:
import optuna
from optuna import Trial, visualization

from optuna.samplers import TPESampler

In [10]:
def optimize_ppo2_lstm(trial):
    return {
        'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
        'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
        'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
        'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
        'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
        'lam': trial.suggest_uniform('lam', 0.8, 1.)
    }

In [11]:
def objective_ppo2_lstm(trial):
    agent_params = optimize_ppo2_lstm(trial)
    
    train_env = DummyVecEnv([lambda: StockTradingEnv(train_df)])
    model = PPO2(MlpLstmPolicy, train_env, nminibatches=1, **agent_params,  tensorboard_log="./tensorboard/PPO2_LSTM_Test")
    
    model.learn(total_timesteps=len(train_df))
    
    rewards, done = [], False

    val_env = DummyVecEnv([lambda: StockTradingEnv(val_df)])
    obs = val_env.reset()
    
    for i in range(len(val_df)):
        action, _states = model.predict(obs, deterministic=False)
        obs, reward, done, info = val_env.step(action)
        rewards.append(reward)
        
    return -np.mean(rewards)

In [12]:
optuna.delete_study(study_name='PPO2_LSTM_optimize_profit_test', storage='sqlite:///params.db')
study_ppo2_lstm_test = optuna.create_study(study_name='PPO2_LSTM_optimize_profit_test', storage='sqlite:///params.db', 
                            load_if_exists=True, sampler=TPESampler())
study_ppo2_lstm_test.optimize(objective_ppo2_lstm, n_trials=40, n_jobs=4, show_progress_bar = True)

[32m[I 2022-04-11 15:01:02,493][0m A new study created in RDB with name: PPO2_LSTM_optimize_profit_test[0m
[32m[I 2022-04-11 15:02:14,369][0m Trial 1 finished with value: -0.0 and parameters: {'n_steps': 245.00627523248428, 'gamma': 0.977723564331734, 'learning_rate': 0.5870666566018553, 'ent_coef': 5.9211562517822755e-06, 'cliprange': 0.2129550451480523, 'noptepochs': 45.9591868218829, 'lam': 0.9039011648262867}. Best is trial 1 with value: 0.0.[0m
[32m[I 2022-04-11 15:02:24,196][0m Trial 3 finished with value: -0.0 and parameters: {'n_steps': 377.0133262328042, 'gamma': 0.9512199733158527, 'learning_rate': 0.14414757055354727, 'ent_coef': 5.254451708412536e-06, 'cliprange': 0.15926094325548973, 'noptepochs': 17.336860492309423, 'lam': 0.9720644304495372}. Best is trial 1 with value: 0.0.[0m
[32m[I 2022-04-11 15:03:00,130][0m Trial 4 finished with value: 115.61991882324219 and parameters: {'n_steps': 44.78042958605413, 'gamma': 0.9434190444588576, 'learning_rate': 0.0361809

[32m[I 2022-04-11 15:11:25,395][0m Trial 24 finished with value: 729.5426635742188 and parameters: {'n_steps': 260.42728551329105, 'gamma': 0.9123670802691259, 'learning_rate': 0.0008414079322725764, 'ent_coef': 1.0353206192437349e-07, 'cliprange': 0.36424336124021783, 'noptepochs': 1.1320966982701481, 'lam': 0.8227256237447651}. Best is trial 2 with value: -729.6260375976562.[0m
[32m[I 2022-04-11 15:11:25,792][0m Trial 25 finished with value: 590.150390625 and parameters: {'n_steps': 269.2945983901756, 'gamma': 0.9137202361315576, 'learning_rate': 0.0006888620552109986, 'ent_coef': 8.430948368196741e-08, 'cliprange': 0.355728714161879, 'noptepochs': 1.028689420891211, 'lam': 0.8221678072450758}. Best is trial 2 with value: -729.6260375976562.[0m
[32m[I 2022-04-11 15:11:26,777][0m Trial 23 finished with value: 1253.3740234375 and parameters: {'n_steps': 272.58723520424655, 'gamma': 0.9864000687700071, 'learning_rate': 0.0008415479102642459, 'ent_coef': 2.2691048443243334e-05, '

In [13]:
study_ppo2_lstm_test.best_params

{'cliprange': 0.17030840956549298,
 'ent_coef': 0.00034110977107535095,
 'gamma': 0.9617908455983807,
 'lam': 0.8440857892672741,
 'learning_rate': 8.982793251604273e-05,
 'n_steps': 1281.6623202521903,
 'noptepochs': 1.3835522178858635}

In [14]:
optuna.visualization.plot_optimization_history(study_ppo2_lstm_test)

In [15]:
optuna.visualization.plot_slice(study_ppo2_lstm_test)

In [16]:
study_PPO2_LSTM_test = optuna.load_study(study_name='PPO2_LSTM_optimize_profit_test', storage='sqlite:///params.db')
params = study_PPO2_LSTM_test.best_trial.params

train_env = DummyVecEnv([lambda: StockTradingEnv(train_df)])

model_params = {
    'n_steps': int(params['n_steps']),
    'gamma': params['gamma'],
    'learning_rate': params['learning_rate'],
    'ent_coef': params['ent_coef'],
    'cliprange': params['cliprange'],
    'noptepochs': int(params['noptepochs']),
    'lam': params['lam']
}

model = PPO2(MlpLstmPolicy, train_env, nminibatches=1, **model_params)

In [17]:
model.learn(total_timesteps=100000)

<stable_baselines.ppo2.ppo2.PPO2 at 0x214d9f6e788>

In [18]:
test_env = DummyVecEnv([lambda: StockTradingEnv(test_df)])

In [19]:
obs = test_env.reset()

for i in range(200):
    action, _states = model.predict(obs, deterministic=False)
    obs, reward, done, info = test_env.step(action)
    test_env.render(mode='cli')
    if done:
        obs = test_env.reset()
        break

Step:   39, Date: 2019-02-19 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   40, Date: 2019-02-20 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   41, Date: 2019-02-21 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   42, Date: 2019-02-22 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   43, Date: 2019-02-25 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   44, Date: 2019-02-26 , Reward:   0, Stock [Held:   0, Bought:   0, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   45, Date: 2019-02-27 , Reward:   0, Stock [Held:   6, Bought:   6, Sold :  0], NetWorth: 10000.00, Profit:  0.00
Step:   46, Date: 2019-02-28 , Reward:  18, Stock [Held:   6, Bought:   0, Sold :  0], NetWorth: 10018.13, Profit: 18.13
Step:   47, Date: 2019-03-01 , R

Step:  126, Date: 2019-07-02 , Reward:  66, Stock [Held:  16, Bought:   0, Sold :  0], NetWorth: 10066.51, Profit: 66.51
Step:  127, Date: 2019-07-03 , Reward: -98, Stock [Held:  16, Bought:   0, Sold :  0], NetWorth: 9901.31, Profit: -98.69
Step:  128, Date: 2019-07-04 , Reward: -43, Stock [Held:  16, Bought:   0, Sold :  0], NetWorth: 9956.73, Profit: -43.27
Step:  129, Date: 2019-07-05 , Reward: -63, Stock [Held:  16, Bought:   0, Sold :  0], NetWorth: 9936.14, Profit: -63.86
Step:  130, Date: 2019-07-08 , Reward:-361, Stock [Held:   9, Bought:   0, Sold :  7], NetWorth: 9638.13, Profit: -361.87
Step:  131, Date: 2019-07-09 , Reward:-328, Stock [Held:   9, Bought:   0, Sold :  0], NetWorth: 9671.41, Profit: -328.59
Step:  132, Date: 2019-07-10 , Reward:-339, Stock [Held:   9, Bought:   0, Sold :  0], NetWorth: 9660.30, Profit: -339.70
Step:  133, Date: 2019-07-11 , Reward:-385, Stock [Held:   9, Bought:   0, Sold :  0], NetWorth: 9614.75, Profit: -385.25
Step:  134, Date: 2019-07-12

Step:  221, Date: 2019-11-25 , Reward:-535, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9464.38, Profit: -535.62
Step:  222, Date: 2019-11-26 , Reward:-643, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9356.15, Profit: -643.85
Step:  223, Date: 2019-11-27 , Reward:-645, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9354.52, Profit: -645.48
Step:  224, Date: 2019-11-28 , Reward:-606, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9393.21, Profit: -606.79
Step:  225, Date: 2019-11-29 , Reward:-574, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9425.32, Profit: -574.68
Step:  226, Date: 2019-12-02 , Reward:-534, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9465.81, Profit: -534.19
Step:  227, Date: 2019-12-03 , Reward:-637, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9362.30, Profit: -637.70
Step:  228, Date: 2019-12-04 , Reward:-700, Stock [Held:  14, Bought:   0, Sold :  0], NetWorth: 9299.02, Profit: -700.98
Step:  229, Date: 2019-1

In [20]:
trades = pd.DataFrame(info[0].get('Trades'))
trades['Date'] = pd.to_datetime(trades['Date']) # Convert "Date" to datetime object.
df.sort_values('Date', ascending=True, inplace=True) # Sort the "Date" Column by the values.

In [21]:
def display_trades(df):
    
    # Convert the column names from mixcase to lower case
    df.columns = map(str.lower, df.columns)
    
    # Fix date
    if type(df.index) != pd.core.indexes.datetimes.DatetimeIndex:
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date',inplace=True) # Set the Date column as the index
        df = df.sort_values(['date'])
    
     # Copy the dataframe
    tempdf=df.copy(deep=True)
    
    # Calculate profit
    
    profit = round(tempdf.iloc[-1]['net worth'] - tempdf.iloc[1]['net worth'], 2)
    profit_prcnt = round(((tempdf.iloc[-1]['net worth'] - tempdf.iloc[1]['net worth']) / tempdf.iloc[1]['net worth']) * 100 , 2)
    
    trade_buy = tempdf['type'][tempdf['type']=='Buy'].count()
    trade_sell = tempdf['type'][tempdf['type']=='Sell'].count()
    trade_hold = tempdf['type'][tempdf['type']=='Hold'].count()
    
    title_1 = 'RL Algorithm: PPO2_LSTM' + ' - ' + 'Trade Count ' + '[' + 'Buy: ' + str(trade_buy) + ' | ' + 'Sell: ' + str(trade_sell) + ' | ' + 'Hold: ' + str(trade_hold) + ']'
    title_2 = 'Initial Investment (INR): ' + str(round(tempdf.iloc[1]['net worth'],2)) + ' | ' +  'Net Worth (INR): ' + str(round(tempdf.iloc[-1]['net worth'],2)) + ' | ' + 'Profit (INR): ' + str(profit) + ' | ' + 'Profit (%): ' + str(profit_prcnt)+ '%'
    
    
    # Buy sell marker
    d=10
    tempdf[['marker', 'symbol', 'color']] = ''
    for i in range(0, len(tempdf)):
        if tempdf['type'].iloc[i] == 'Buy':
            tempdf['marker'].iloc[i] = tempdf["low"].iloc[i]-d
            tempdf['symbol'].iloc[i] = 'triangle-up'
            tempdf['color'].iloc[i] = 'green'
            
        elif tempdf['type'].iloc[i] == 'Sell':
            tempdf['marker'].iloc[i] = tempdf["high"].iloc[i]+d
            tempdf['symbol'].iloc[i] = 'triangle-down'
            tempdf['color'].iloc[i] = 'red'
        
        else:
            tempdf['marker'].iloc[i] = tempdf["low"].iloc[i]-d
            tempdf['symbol'].iloc[i] = 'cross-thin'
            tempdf['color'].iloc[i] = 'blue' 

    
    # Draw plot
    fig = make_subplots(rows=2, cols=1, 
                    shared_xaxes=True, 
                    row_width=[.25,.75], 
                    vertical_spacing=0.05,
                   subplot_titles=(title_1, title_2))

   
    fig.add_trace(go.Scatter(x=tempdf.index,
                             y=tempdf.marker,
                             mode='markers',
                             name='Buy/Sell/Hold',
                             marker=dict(size=15,symbol=tempdf["symbol"],color=tempdf["color"])),
                   row=1, col=1)
                  
    fig.add_trace(go.Candlestick(x=tempdf.index, 
                                 open=tempdf['open'], 
                                 high=tempdf['high'], 
                                 low=tempdf['low'], 
                                 close=tempdf['close'],
                                 name='OHLC'),
                  row=1, col=1)
   
                  
    fig.add_trace(go.Scatter(x=tempdf.index,
                             y=tempdf['net worth'],
                             mode='lines',
                             name='Net Worth (INR)'),
                  row=2, col=1)
    
    fig.add_hline(y=10000,
                  name='Initial Balance',
                  line=dict(color='firebrick', width=1, dash='dot'),
                  row=2, col=1)

    fig.update_layout(##title='Historical stock price chart',
                      xaxis_tickfont_size=12,
                      yaxis=dict(title='Price (INR/share)',
                                 titlefont_size=14,
                                 tickfont_size=10),
                      xaxis_rangeslider_visible=False,
                      width=1000,height=600, margin=dict(l=50, r=50, b=100, t=100, pad=4),
                      template='none')

    fig.show()

In [22]:
display_trades(trades)

In [23]:
# # %load_ext tensorboard
# %reload_ext tensorboard
# %tensorboard \
#     --host localhost \
#     --port 8080\
#     --logdir ".\\tensorboard\\PPO2_LSTM_Test"

To view: http://localhost:8080/ <br>
To kill: `taskkill /IM "tensorboard.exe" /F`