<br>

<h1 style="text-align:center;">Trading Simulation</h1>

<br>


Content:
- Step 1: [Data Gathering](#step1)
- Step 2: [Stock Selection](#step2)
- Step 3: [Stock Trading](#step3)
    - Step 3.1: [Data Clening & Feature Extraction (with Indicators)](#step31)
    - Step 3.2: [Simulation (or Environment)](#step32)
    - Step 3.3: [Reinforcement Learning Agent](#step32)

<img src="assets/diagram.png"/>

<br>

# 0. Import all Packages

---

In [1]:
import pandas as pd
import requests
import time
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import io 
import numpy as np
from PIL import Image
%matplotlib inline

<br>

<a id="step1"></a>

# 1. Data Gathering

---

__TODO Tasks:__

0️⃣ Set a staring data and ending data as parameters.

0️⃣ Convert the parameter dates into epochs (in mili-seconds).

0️⃣ Call the function with 1min, 5min, and daily frequency.

0️⃣ <del>Visualize the data.</de>

0️⃣ Wrap codes for visualization inside a function.

0️⃣ Add the data into a SQL database.

In [2]:
# Function for fetching the minute level dataset
def fetch_minute_dataset(ticker_name = "TSLA", frequency_type="minute", frequency="1"):
    
    # Client ID
    client_id = "E5AXP16J02E3JTODQW9LW7L1AH726L9X"

    # Get the current time
    current_time_epoch = str(int(time.time()) * 1000)

    # Endpoint
    endpoint = r"https://api.tdameritrade.com/v1/marketdata/{}/pricehistory".format(ticker_name)

    # Define the payload                
    payload = {"apikey": client_id,
               #"periodType": "day",
               #"period": "2",
               "frequencyType": frequency_type,
               "frequency": frequency,
               "endDate": current_time_epoch,        # Use "www.epochconverter.com" for getting the time
               #"startDate": "1546300801000",
               "needExtendedHoursData": "true"}

    # Make a request
    content = requests.get(url = endpoint, params = payload)

    # Convert data into a json dictionary
    data = content.json()

    # Get the dataset in dataframe form
    data = pd.DataFrame(data["candles"])

    # Add a readable date column
    data["year"] = data["datetime"].map(lambda x: time.strftime("%Y", time.localtime(x/1000)))
    data["month"] = data["datetime"].map(lambda x: time.strftime("%m", time.localtime(x/1000)))
    data["day"] = data["datetime"].map(lambda x: time.strftime("%d", time.localtime(x/1000)))
    data["week day"] = data["datetime"].map(lambda x: time.strftime("%a", time.localtime(x/1000)))
    data["hour"] = data["datetime"].map(lambda x: time.strftime("%H:%M:%S", time.localtime(x/1000)))
    
    return data

In [3]:
dataset = fetch_minute_dataset(ticker_name = "AAPL")
print("Dataset Shape: ", dataset.shape)
dataset

Dataset Shape:  (8254, 11)


Unnamed: 0,open,high,low,close,volume,datetime,year,month,day,week day,hour
0,121.27,121.35,121.15,121.15,17517,1616151600000,2021,03,19,Fri,14:30:00
1,121.10,121.37,121.03,121.28,11082,1616151660000,2021,03,19,Fri,14:31:00
2,121.29,121.30,121.24,121.24,1572,1616151720000,2021,03,19,Fri,14:32:00
3,121.24,121.24,121.23,121.23,1421,1616151780000,2021,03,19,Fri,14:33:00
4,121.23,121.24,121.21,121.24,2773,1616151840000,2021,03,19,Fri,14:34:00
...,...,...,...,...,...,...,...,...,...,...,...
8249,123.06,123.06,123.06,123.06,641,1617321300000,2021,04,02,Fri,04:25:00
8250,123.06,123.09,123.05,123.06,2211,1617321360000,2021,04,02,Fri,04:26:00
8251,123.08,123.08,123.05,123.05,1272,1617321420000,2021,04,02,Fri,04:27:00
8252,123.06,123.06,123.05,123.06,1025,1617321480000,2021,04,02,Fri,04:28:00


In [4]:
dataset_temp = dataset.iloc[-150:, :]

In [5]:
fig = make_subplots(rows=2, 
                    cols=1, 
                    row_heights=[0.8, 0.2],
                    vertical_spacing=0.02)


fig.append_trace(go.Candlestick(x=dataset_temp['hour'],
                                open=dataset_temp['open'],
                                high=dataset_temp['high'],
                                low=dataset_temp['low'],
                                close=dataset_temp['close']),
                  row=1, col=1)

fig.add_trace(go.Candlestick(x=dataset_temp['hour'],
                             open=dataset_temp['open'],
                             high=dataset_temp['high'],
                             low=dataset_temp['low'],
                             close=dataset_temp['close']),
              row=1, col=1)

fig.append_trace(go.Bar(x = dataset_temp['hour'], y = dataset_temp['volume'], marker_color="gray"),
                 row=2, 
                 col=1)

fig.layout.xaxis1.showgrid = False
fig.layout.xaxis2.showgrid = False
fig.layout.yaxis1.showgrid = False
fig.layout.yaxis2.showgrid = False
fig.layout.xaxis.rangeslider.visible = False

fig.update_layout(width=1600, 
                  height=600,
                  showlegend=False,
                  font=dict(size=8))

fig.update_xaxes(showticklabels=False)
fig.layout.xaxis2.showticklabels = True
fig.update_yaxes(showticklabels=True)

fig.show()

In [10]:
def plotly_fig2array(fig):
    #convert Plotly fig to  an array
    fig_bytes = fig.to_image(format="png")
    buf = io.BytesIO(fig_bytes)
    img = Image.open(buf)
    return np.asarray(img)

In [11]:
plotly_fig2array(fig)

array([[[255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        ...,
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255]],

       [[255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        ...,
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255]],

       [[255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        ...,
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255]],

       ...,

       [[255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        ...,
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255]],

       [[255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        ...,
        [255, 255, 255, 255],
        [255, 255, 255, 255],
        [255, 255, 255, 255]],

       [[255

In [6]:
# Gather news data
pd.DataFrame(columns=["news title", "news article", "date", "time", "source", ])

Unnamed: 0,news title,news article,date,time,source


<br>

<a id="step2"></a>

# 2. Stock Selection (with Sentiment Analysis & Filtering)

---

<br>

<a id="step3"></a>

# 3. Stock Trading

---

<br>

<a id="step31"></a>

## 3.1. Data Clening & Feature Extraction (with Indicators)

---

__TODO Tasks:__

0️⃣ <del>Choose the timestep (i.e. 1min, 5min, daily)</del>

0️⃣ Choose the features

0️⃣ Get the table for level 2 data and other useful information for the chart.

0️⃣ Research on which indicator to add to chart.

In [None]:
# Import the libraries
import psycopg2
from psycopg2 import sql
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT 
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import numpy as np
import pandas as pd
import time


# Function for adding more features to the dataset
def add_features(df, window_size = 500, get_last_n_rows = None):
    
    # Loop over "Sell" and "Buy" string
    for i_type in ["Sell", "Buy"]:
        
        # Spread
        df["Spread"] = pd.DataFrame(df["Buy"] - df["Sell"])
        
        # Price change
        df["Change - {}".format(i_type)] = df[i_type].pct_change()
        
        # Simple Moving Average (SMA)
        df["Simple Moving Average (SMA) - {}".format(i_type)] = df[i_type].rolling(window = window_size).mean()
        
        # Exponential Moving Average (EMA)
        df["Exponential Moving Average (EMA) - {}".format(i_type)] = df[i_type].ewm(com = window_size).mean()
        
        # Bollinger Bands (Upper)
        df["Bollinger Bands (Upper) - {}".format(i_type)] = df["Simple Moving Average (SMA) - {}".format(i_type)] + (df[i_type].rolling(window = window_size).std() * 2)
        
        # Bollinger Bands (Lower)
        df["Bollinger Bands (Lower) - {}".format(i_type)] = df["Simple Moving Average (SMA) - {}".format(i_type)] - (df[i_type].rolling(window = window_size).std() * 2)
        
        # Standard Deviation (STD)
        df["Standard Deviation (STD) - {}".format(i_type)] = df[i_type].rolling(window = window_size).std()
        
        # Sharpe Ratio
        df["Sharpe Ratio - {}".format(i_type)] = df["Change - {}".format(i_type)].rolling(window = window_size).mean() / df["Change - {}".format(i_type)].rolling(window = window_size).std()
        
        ### Relative Strength Index (RSI)

        # Get the difference in price from previous step
        delta = df[i_type].diff()

        # Make the positive gains (up) and negative gains (down) Series
        up, down = delta.copy(), delta.copy()
        up[up < 0] = 0
        down[down > 0] = 0

        # Calculate the EWMA
        roll_up1 = up.ewm(span = window_size).mean()
        roll_down1 = down.abs().ewm(span = window_size).mean()

        # Calculate the RSI based on EWMA
        RS1 = roll_up1 / roll_down1
        RSI1 = 100.0 - (100.0 / (1.0 + RS1))

        # Add to the pandas
        df["Relative Strength Index (RSI) via EWMA - {}".format(i_type)] = RSI1

        # Calculate the SMA
        roll_up2 = up.rolling(window_size).mean()
        roll_down2 = down.abs().rolling(window_size).mean()

        # Calculate the RSI based on SMA
        RS2 = roll_up2 / roll_down2
        RSI2 = 100.0 - (100.0 / (1.0 + RS2))

        # Add to pandas
        df["Relative Strength Index (RSI) via SMA - {}".format(i_type)] = RSI2

        ### Fibonacci Retracement

        # Find the maximum price per window size
        price_max = df[i_type].rolling(window = window_size).max()

        # Find the minimum price per window size
        price_min = df[i_type].rolling(window = window_size).min()

        # Fibonacci Levels considering original trend as upward move
        diff = price_max - price_min
        level1 = price_max - 0.236 * diff
        level2 = price_max - 0.382 * diff
        level3 = price_max - 0.618 * diff

        # Add to dataframe
        df["Fibonacci Retracement (Price Min) - {}".format(i_type)] = price_min
        df["Fibonacci Retracement (Level 1) - {}".format(i_type)] = level1
        df["Fibonacci Retracement (Level 2) - {}".format(i_type)] = level2
        df["Fibonacci Retracement (Level 3) - {}".format(i_type)] = level3
        df["Fibonacci Retracement (Price Max) - {}".format(i_type)] = price_max
        
    # Remove the first window_size rows
    df = df.iloc[window_size:, :].reset_index().iloc[:, 1:]
    
    # If get_last_n_rows is defined
    if (type(get_last_n_rows) == int):
        
        # Get only last N rows
        df = df.iloc[get_last_n_rows:, :]
        
    # Remove the "ticker" column
    df = df.drop(labels = ["Ticker"], axis = 1)

    return df

<br>

<a id="step32"></a>

## 3.2. Simulation

---

__Game Rules:__

1️⃣ The agent sees the stock price, plus their indicators in different timesteps (i.e. 1min, 5min, daily).

2️⃣ The agent can take the following actions:

    ⭕️ Holding on without doing any action.
    ⭕️ Buying or selling the stock. Note that the agent can only buy and sell once.
    ⭕️ Bailing out if the agent thinks there won't be any uptrend.

3️⃣ After selling the hold stock, or bailing out the episode gets finish. At the end of each episode, the agent get rewarded based on the percentage of return it could have.

<br>

__TODO Tasks:__

0️⃣ Implement the render function

In [190]:
# Import the libraries
import enum
import gym
from gym import spaces
import matplotlib.pyplot as plt
from IPython.display import clear_output

In [191]:
# Action space
class Actions(enum.Enum):
    
    # Set digits to each actions
    HOLD = 0
    BUY_OR_SELL = 1
    BAILOUT = 2

In [28]:
entry_price = 100
exit_price = 95
return_rate = (exit_price-entry_price)/entry_price
print("Return Rate: ", return_rate)

Return Rate:  -0.05


In [34]:
# ============= #
#  Environment  #
# ============= #
class EnvironmentTrading(gym.Env):
    
    metadata = {'render.modes': ['human']}

    
    # ============= #
    #  Constructor  #
    # ============= #
    def __init__(self):
    
        # Inherit the parent's constructor
        super(EnvironmentTrading, self).__init__()

        # TODO: Action space
        self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)

        # TODO: Observation space
        self.observation_space = spaces.Box(low=0, high=255, shape=(HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)
        
        # Initialization
        self.bought_stock = False      # A flag indicating if we already bought the stock
        self.reward = 0                # Reward value
        self.stop_loss_rate = -0.05    # Rate for stop loss (i.e. game is over if we lose more than 5%)
        self.episode = 1               # Episode number
        self.mode = "virtually"        # there are two different modes: virtually and online
        self.entry_price = None
        self.exit_price = None

        
    # ============= #
    #     Step      #
    # ============= #
    def step(self, action):
        
        # Initialize done
        done = False
        
        # If holding
        if action==0:
            
            # Add reward for waiting
            #self.reward += 0.000001
            
            # If we are holding after buying
            if (self.bought_stock):
                
                # Calculate the return rate
                current_price = None
                returns_rate = (exit_price-entry_price)/entry_price
                
                # If return rate is less than the stop loss rate
                if self.stop_loss_rate>return_rate:
                    
                    # Finish the game and punish the agent
                    done=True
                    self.reward=-100
            
        
        # If buying or selling
        elif action==1:
            
            # If buying
            if (self.bought_stock==False):
                
                # Set the bought stock to true
                self.bought_stock = True
                
                # TODO: Actually buy the stock
                if (self.mode=="online"): pass
                
                # TODO: Get the entry price
                self.entry_price = None
            
            # If selling
            if (self.bought_stock==True):
                
                # TODO: Actually sell the stock
                if (self.mode=="online"): pass
                
                # TODO: Get the exit price
                self.exit_price = None
                
                # Calculate the return
                returns = exit_price-entry_price
            
                # TODO: Calculat the reward (i.e. the return rate)
                self.reward += returns/entry_price
                
                # The game is done
                done = True
                
                
        # If bailing out
        elif action==2:
            
                # TODO: Set the reward
                self.reward = 0
                
                # The game is done
                done = True
            
               
        # TODO: Get the next observations
        observations = self._generate_observation()

        # Render
        if (rendering == True):  
            self.render(action)
        
        # Increment the episode number if in terminal state
        if done:  
            self.episode += 1
            
        return observations, self.reward, done, {} 
        
    
    # ============= #
    #     Reset     #
    # ============= #
    def reset(self):
        
        self.reward = 0
        
        pass
    
    
    # ============= #
    #     Render    #
    # ============= #
    def render(self, mode='human', close=False):
        
        # Clear out the kernel
        clear_output(wait = True)

        # Get the action type in strings
        if (action == 0):   action_type = "🟡 BUY"
        elif (action == 1): 
            if (self.bought_stock): 
                action_type = "🟢 BUY"
            else: 
                action_type = "🔴 SELL"

        # Networth 
        if (self.NETWORTH >= self.initial_networth): current_networth = "{:.9f} 🔥".format(self.NETWORTH)
        else: current_networth = "{:.9f} 👎".format(self.NETWORTH)

        # Update investor's information
        self.update_investor_profile()

        # Report
        print("\t\t\t      ===================================================")
        print("\t\t\t      \t               EPISODE {}".format(self.episode))
        print("\t\t\t      \t              TIMESTEP {}".format(self.dataset_step % 1000))
        print("\t\t\t      ===================================================")
        print("\t\t\t           Action:                       {}".format(action_type))
        print("\t\t\t           Profit:                     $ {:.9f}".format(self.profit))
        print("\t\t\t           Reward:                       {:.9f}".format(self.reward))
        print("\t\t\t        .............................................")
        print("\t\t\t           Account Balance:            $ {:.9f}".format(self.BALANCE_ACCOUNT))
        print("\t\t\t           Initial Networth:           $ {:.9f}".format(self.initial_networth))
        print("\t\t\t           Current Networth:           $ {}".format(current_networth))
        print("\t\t\t        .............................................")
        print("\t\t\t           Per Share Price (Buy):      $ {:.9f}".format(self.per_share_price_buy))
        print("\t\t\t           Per Share Price (Sell):     $ {:.9f}".format(self.per_share_price_sell))
        print("\t\t\t           Number of Holding Shares:     {}".format(self.num_holding_shares))
        print("\t\t\t           Shares Dollar Values:       $ {:.9f}".format(self.num_holding_shares * self.per_share_price_sell))
        print("\t\t\t      ===================================================")

        # Save current networth
        self.NETWORTH_LIST_VIS.append(self.NETWORTH)

        # Visualization - Networth
        plt.figure(figsize = (16, 4))
        plt.plot(self.NETWORTH_LIST_VIS, "red")
        plt.vlines(x = np.array(range(0, len(self.NETWORTH_LIST_VIS), 1000)), ymin = min(self.NETWORTH_LIST_VIS), ymax = max(self.NETWORTH_LIST_VIS), colors = "gray", linestyles = 'dashed',)
        plt.xlabel("Timestep", fontsize = 15)
        plt.ylabel("Networth", fontsize = 15)
        plt.show()

<br>

<a id="step33"></a>

## 3.3. Reinforcement Learning Agent

---

In [41]:
# Import the libraries
import gym, time, requests, json

In [None]:
# Import the RL libraries
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv 
from stable_baselines import PPO2, ACKTR, DQN, TD3, ACER , A2C, HER, SAC

In [None]:
# Initialize the environment
env = TradingEnvironment(dataset = df)

In [None]:
# Initialize the vectorized environment
environment = DummyVecEnv([lambda: env])

In [None]:
# Initialize the PPO2
model = PPO2(policy = MlpPolicy, 
             env = environment, 
             noptepochs = 25,
             verbose = 1)

In [None]:
# Train the model
model.learn(total_timesteps = 1000000)

# The End!