### PPO AGENT:

#### Load the dataset and preprocess the dataframe in the required format.

Technical indicators created for the environment include:

- RSI
- MACD
- Stoch_k
- OBV
- Upper_BB
- ATR_1
- ATR_2
- ATR_5
- ATR_10
- ATR_20

In [1]:
!apt-get update
!apt-get install -y build-essential wget

!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzf ta-lib-0.4.0-src.tar.gz

%cd ta-lib
!./configure --prefix=/usr
!make
!make install

!pip install ta-lib

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.ubuntu.com (185.125.10% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.ubuntu.com (185.125.1                                                                                                    Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
0% [Waiting for headers] [Waiting for headers] [Connecting to ppa.launchpadcontent.net (185.125.190.                                                                                                    Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:6 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Hit:7

In [2]:
import pandas as pd
import numpy as np
import talib as ta

class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
        self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        self.data['Stoch_k'], self.data['Stoch_d'] = ta.STOCH(self.data['High'], self.data['Low'], self.data['Close'],
                                                              fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        self.data['OBV'] = ta.OBV(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        self.data['Upper_BB'], self.data['Middle_BB'], self.data['Lower_BB'] = ta.BBANDS(self.data['Close'], timeperiod=20)
        self.data['ATR_1'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=1)
        self.data['ATR_2'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=2)
        self.data['ATR_5'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)
        self.data['ATR_10'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=10)
        self.data['ATR_20'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=20)

    def add_trend_indicators(self):
        self.data['ADX'] = ta.ADX(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['+DI'] = ta.PLUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['-DI'] = ta.MINUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['CCI'] = ta.CCI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)

    def add_other_indicators(self):
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
data = pd.read_csv('/content/drive/MyDrive/xnas-itch-20230703.tbbo.csv')

# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


# Adding five new features
data['TI'] = data['size'] / (data['ts_recv'].diff().abs() + 1)

data['LI'] = (data['bid_sz_00'] - data['ask_sz_00']) / (data['bid_sz_00'] + data['ask_sz_00'])

window_size = 5
data['price_mom'] = data['price'] - data['price'].shift(window_size)

data['bid_vola'] = data['bid_px_00'].rolling(window=window_size).std()
data['ask_vola'] = data['ask_px_00'].rolling(window=window_size).std()
data['vol_skew'] = data['bid_vola'] - data['ask_vola']

data['avg_vol'] = data['size'].rolling(window=window_size).mean()
data['relative_vol'] = data['size'] / data['avg_vol']



ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()
market_features_df = df_with_indicators[35:]

Checking the dataset:

In [5]:
# Show all columns in pandas
pd.set_option('display.max_columns', None)

market_features_df.head(35)

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,TI,LI,price_mom,bid_vola,ask_vola,vol_skew,avg_vol,relative_vol,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP
35,1688371214386057385,1688371214385893078,1,2,32,T,N,0,194.05,50,130,164307,326232,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,2.517682e-08,0.987821,0.0,0.0,0.111803,-0.111803,88.4,0.565611,54.544543,0.006271,-0.00313,0.009401,52.52525,61.952862,-266.0,194.065621,194.017,193.968379,0.3,0.175078,0.098615,0.075141,0.072403,97.257397,30.435801,0.196362,166.666667,0.0,194.02,194.021894
36,1688371214386063777,1688371214385899379,1,2,32,T,N,0,194.05,50,130,164398,326233,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,0.007821054,0.987821,0.0,0.0,0.136931,-0.136931,76.0,0.657895,54.544543,0.007108,-0.001082,0.00819,38.38384,52.525253,-266.0,194.06899,194.02,193.97101,0.3,0.237539,0.138892,0.097627,0.083783,97.361721,22.989295,0.14832,83.333333,0.0,194.020811,194.025188
37,1688371215804852019,1688371215804687301,1,2,32,T,B,0,194.21,10,130,164718,328131,194.0,194.21,3101,29,4,1,AAPL,194.21,10,194.21,194.0,194.05,7.048268e-09,0.98147,0.16,0.0,0.125976,-0.125976,55.6,0.179856,85.890753,0.020446,0.003223,0.017223,40.40404,43.771044,-256.0,194.125889,194.0305,193.935111,0.21,0.22377,0.153114,0.108864,0.090094,97.458593,19.409454,0.125224,79.268293,0.000824,194.025789,194.025596
38,1688371219671476629,1688371219671312224,1,2,32,T,N,0,194.14,10,130,164405,331406,194.0,194.16,3101,400,4,1,AAPL,194.14,10,194.16,194.0,194.21,2.586235e-09,0.771494,0.09,0.0,0.105024,-0.105024,35.2,0.284091,64.827662,0.025079,0.007594,0.017484,49.49495,42.760943,-266.0,194.142928,194.0375,193.932072,0.21,0.216885,0.164491,0.118978,0.096089,97.548546,16.622008,0.10724,-3.205128,-0.00036,194.028718,194.025873
39,1688371223368835585,1688371223368671235,1,2,32,T,B,0,194.13,10,130,164350,334235,194.0,194.13,3101,400,4,1,AAPL,194.13,10,194.13,194.0,194.14,2.704633e-09,0.771494,0.08,0.0,0.078422,-0.078422,26.0,0.384615,62.470772,0.027625,0.011601,0.016025,57.57576,49.158249,-276.0,194.155247,194.044,193.932753,0.14,0.178442,0.159593,0.12108,0.098285,97.632074,15.068361,0.097216,-113.095238,-5.2e-05,194.03125,194.026071
40,1688371229849940201,1688371229849775570,1,2,32,T,B,0,194.13,100,130,164631,339055,194.01,194.13,101,390,2,1,AAPL,194.13,100,194.13,194.01,194.13,1.542947e-08,-0.588595,0.08,0.004472,0.071624,-0.067152,36.0,2.777778,62.470772,0.029305,0.015141,0.014164,49.49495,52.188552,-276.0,194.165821,194.0505,193.935179,0.12,0.149221,0.151674,0.120972,0.09937,97.709636,13.871534,0.089495,-64.102564,0.0,194.033659,194.028188
41,1688371230451172473,1688371230451005195,1,2,32,T,N,0,194.02,10,0,167278,339547,194.01,194.16,101,400,2,1,AAPL,194.02,10,194.16,194.01,194.13,1.663251e-08,-0.596806,-0.03,0.005477,0.032711,-0.027234,28.0,0.357143,42.676806,0.021513,0.016416,0.005097,34.54545,47.205387,-286.0,194.16539,194.0515,193.93761,0.15,0.149611,0.151339,0.123875,0.101902,97.79382,14.463498,0.08085,-113.425926,-0.000567,194.033333,194.02846
42,1688371230451172473,1688371230451005195,1,2,32,T,A,0,194.01,1,0,167278,339548,194.01,194.16,101,400,2,1,AAPL,194.01,1,194.16,194.01,194.02,1.0,-0.596806,-0.2,0.005477,0.016432,-0.010954,26.2,0.038168,41.392784,0.014365,0.016006,-0.001641,19.49495,34.511785,-287.0,194.165066,194.052,193.938934,0.15,0.149805,0.151071,0.126487,0.104307,97.871992,13.100725,0.073232,-90.909091,-5.2e-05,194.032791,194.028488
43,1688371230451172473,1688371230451005195,1,2,32,T,A,0,194.01,100,130,167278,339549,194.01,194.16,100,400,1,1,AAPL,194.01,100,194.16,194.01,194.01,100.0,-0.6,-0.13,0.004472,0.016432,-0.01196,44.2,2.262443,41.392784,0.008601,0.014525,-0.005924,4.444444,19.494949,-287.0,194.164727,194.0525,193.940273,0.15,0.149903,0.150857,0.128839,0.106591,97.94458,11.893862,0.066486,-61.22449,0.0,194.032273,194.031073
44,1688371230451995982,1688371230451829005,1,2,32,T,A,0,194.0,3075,0,166977,339553,194.0,194.16,3119,400,5,1,AAPL,194.0,3075,194.16,194.0,194.01,0.003734017,0.772663,-0.13,0.004472,0.013416,-0.008944,657.2,4.678941,39.997126,0.003189,0.012257,-0.009069,2.222222,8.720539,-3362.0,194.164727,194.0525,193.940273,0.16,0.154951,0.152686,0.131955,0.109262,97.267563,10.755675,0.65822,-81.081081,-5.2e-05,194.031556,194.05967


In [6]:
df_with_indicators.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59271 entries, 0 to 59270
Data columns (total 55 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ts_recv        59271 non-null  int64  
 1   ts_event       59271 non-null  int64  
 2   rtype          59271 non-null  int64  
 3   publisher_id   59271 non-null  int64  
 4   instrument_id  59271 non-null  int64  
 5   action         59271 non-null  object 
 6   side           59271 non-null  object 
 7   depth          59271 non-null  int64  
 8   price          59271 non-null  float64
 9   size           59271 non-null  int64  
 10  flags          59271 non-null  int64  
 11  ts_in_delta    59271 non-null  int64  
 12  sequence       59271 non-null  int64  
 13  bid_px_00      59271 non-null  float64
 14  ask_px_00      59271 non-null  float64
 15  bid_sz_00      59271 non-null  int64  
 16  ask_sz_00      59271 non-null  int64  
 17  bid_ct_00      59271 non-null  int64  
 18  ask_ct

#### Create the Trading Environment class for the PPO Agent

In [7]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit):
        super(TradingEnvironment, self).__init__()
        self.data = data
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI',
                              'TI', 'LI', 'price_mom','vol_skew', 'relative_vol']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0

        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1: # and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
        elif action == 2: # and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

#### Train the PPO Agent with the environment and for different tickers.

In [8]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

env = TradingEnvironment(ticker_data, daily_trading_limit)  # Adjust window_size if needed

In [9]:
!pip install stable-baselines3

Collecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/182.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/953.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m40.3 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_

In [48]:
# !pip install 'gym==0.9.4'

In [47]:
# !pip install --upgrade 'shimmy>=0.2.1'

In [12]:
import pandas as pd
from stable_baselines3 import PPO


# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
env = TradingEnvironment(ticker_data, daily_trading_limit)  # Adjust window_size if needed

# Define the best hyperparameters
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6} # type: ignore

# Create the RL model with the best hyperparameters
model = PPO('MlpPolicy', env, verbose=1, **best_hyperparameters)

# Train the model
model.learn(total_timesteps=10000)

# Save the model
# model.save("trading_agent")

# Evaluate the model
obs = env.reset()
for _ in range(len(ticker_data)):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        break

# Render the final state
env.render()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 51461, Timestamp: 2023-07-03 16:41:27.969522306, Action: 2, Price: 192.25, Shares: 237.0, Reward: -0.28055858204300665, Transaction Cost: 0.25398218204301576, Slippage: 0.009999999999990905, Time Penalty: 0.0165764
Step: 51464, Timestamp: 2023-07-03 16:41:28.046992051, Action: 2, Price: 192.25, Shares: 10.0, Reward: -0.1535403910215272, Transaction Cost: 0.12699109102150788, Slippage: 0.010000000000019327, Time Penalty: 0.0165493
Step: 51473, Timestamp: 2023-07-03 16:41:30.846556897, Action: 1, Price: 192.26, Shares: 160.0, Reward: -0.2805314820430351, Transaction Cost: 0.25398218204301576, Slippage: 0.010000000000019327, Time Penalty: 0.0165493
Step: 51474, Timestamp: 2023-07-03 16:41:32.108693424, Action: 2, Price: 192.26, Shares: 54.0, Reward: -0.05269840464463453, Transaction Cost: 0.03591850464463453, Slippage: 0.0, Time Penalty: 0.0167799
Step: 51475, Timestamp: 2023-07-03 16:41:32.426126719, Action: 2, Price:

### TRADING BLOTTER:

#### Preprocess the data for the trading blotter:

In [13]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

INITIAL_CASH = 10_000_000  # $10 million

def preprocess_data(df):
    df['liquidity'] = df['bid_sz_00'] * df['bid_px_00'] + df['ask_sz_00'] * df['ask_px_00']
    return df

def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_vol_and_liquidity(price_df, volume_df, window_size):
    # Calculate rolling statistics
    rolling_mean_vol = price_df.pct_change().rolling(window=window_size).mean()
    rolling_std_vol = price_df.pct_change().rolling(window=window_size).std()
    rolling_mean_liq = volume_df.rolling(window=window_size).mean()
    rolling_std_liq = volume_df.rolling(window=window_size).std()

    return rolling_mean_vol, rolling_std_vol, rolling_mean_liq, rolling_std_liq

def get_percentile(current_value, mean, std):
    if std > 0:
        z_score = (current_value - mean) / std
        percentile = norm.cdf(z_score)
    else:
        percentile = 0.5  # No variation
    return percentile

def get_trade_price(base_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction):
    vol_percentile = get_percentile(current_vol, mean_vol, std_vol)
    liq_percentile = get_percentile(current_liq, mean_liq, std_liq)

    # Define price adjustment scenarios based on market conditions
    if vol_percentile >= 0.9 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.25, -0.15)
    elif vol_percentile <= 0.1 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.10, -0.05)
    elif vol_percentile >= 0.9 and liq_percentile >= 0.9:
        price_adjustment_percent = np.random.uniform(-0.05, +0.10)
    else:
        price_adjustment_percent = np.random.uniform(-0.05, +0.05)  # Default for normal conditions

    # Adjust price based on trade direction
    if trade_direction == 'BUY':
        adjusted_price = base_price * (1 - price_adjustment_percent)
    else:  # SELL
        adjusted_price = base_price * (1 + price_adjustment_percent)

    return adjusted_price


#### Create trading environment for the blotter

In [14]:
class TradingEnvironmentwithBlotter:
    def __init__(self, data, daily_trading_limit, window_size):
        self.data = preprocess_data(data)
        self.daily_trading_limit = daily_trading_limit
        self.window_size = window_size
        self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI',
                              'TI', 'LI', 'price_mom','vol_skew', 'relative_vol']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        self.data['RSI'] = calculate_rsi(self.data['price'])
        self.data['pct_change'] = self.data['price'].pct_change()
        self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)

    def step(self):
        row = self.data.iloc[self.current_step]
        current_price = row['price']
        current_time = pd.to_datetime(row['ts_event'])
        current_rsi = row['RSI']
        current_vol = row['pct_change']
        current_liq = row['liquidity']
        mean_vol = row['rolling_mean_vol']
        std_vol = row['rolling_std_vol']
        mean_liq = row['rolling_mean_liq']
        std_liq = row['rolling_std_liq']

        if current_rsi < 30:  # Entry signal based on RSI
            trade_direction = 'BUY'
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
            if self.portfolio['cash'] >= trade_size * trade_price:
                self.portfolio['cash'] -= trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif current_rsi > 70:  # Exit signal based on RSI
            trade_direction = 'SELL'
            if self.portfolio['holdings'][row['symbol']] > 0:
                trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
                trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
                self.portfolio['cash'] += trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                trade_status = 'filled'
            else:
                trade_size = 0
                trade_status = 'cancelled'
        else:
            trade_direction = 'HOLD'
            trade_size = 0
            trade_price = current_price
            trade_status = 'skipped'

        if trade_size > 0:
            expected_price = row['ask_px_00']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
            slippage = expected_price - actual_price
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)

            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': trade_direction,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty
            })



        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done=True
            self.current_step = 0

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        row = self.data.iloc[self.current_step]
        print(f'Total portfolio value: {self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]]*row["Close"]}')
        # get trades in a pandas dataframe
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_blotter.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

#### Run the trading blotter

In [15]:
# Filter data for the specified ticker
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
# Create the trading environment
env = TradingEnvironmentwithBlotter(ticker_data, daily_trading_limit=1000, window_size=window_size)  # Daily trading limit of 1000 shares

# Run the environment
cumulative_reward, trades = env.run()

# Render the results
env.render()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 49815, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 187.00432220262982, Shares: 0.3088043244621489, Symbol: AAPL, Reward: -0.25111421803554473, Transaction Cost: 0.08031621803554471, Slippage: 0.0, Time Penalty: 0.170798
Step: 49816, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 197.27874154222047, Shares: 0.22006440662708832, Symbol: AAPL, Reward: -0.19619621820430158, Transaction Cost: 0.025398218204301576, Slippage: 0.0, Time Penalty: 0.170798
Step: 49817, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 193.04670190508767, Shares: 0.21730645998214818, Symbol: AAPL, Reward: -0.42478018204301576, Transaction Cost: 0.25398218204301576, Slippage: 0.0, Time Penalty: 0.170798
Step: 49818, Timestamp: 2023-07-03 16:29:54.474421200, Action: SELL, Price: 195.36290373586374, Shares: 0.27080605847846834, Symbol: AAPL, Reward: -0.4312691820430067, Transaction Cost: 0.253982182

In [16]:
df=market_features_df.copy()

  and should_run_async(code)


In [17]:
df['timestamp']=pd.to_datetime(df['ts_recv'])

In [18]:
df.head()

  and should_run_async(code)


Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,TI,LI,price_mom,bid_vola,ask_vola,vol_skew,avg_vol,relative_vol,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,timestamp
35,1688371214386057385,1688371214385893078,1,2,32,T,N,0,194.05,50,130,164307,326232,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,2.517682e-08,0.987821,0.0,0.0,0.111803,-0.111803,88.4,0.565611,54.544543,0.006271,-0.00313,0.009401,52.525253,61.952862,-266.0,194.065621,194.017,193.968379,0.3,0.175078,0.098615,0.075141,0.072403,97.257397,30.435801,0.196362,166.666667,0.0,194.02,194.021894,2023-07-03 08:00:14.386057385
36,1688371214386063777,1688371214385899379,1,2,32,T,N,0,194.05,50,130,164398,326233,194.0,194.3,3101,19,4,10,AAPL,194.05,50,194.3,194.0,194.05,0.007821054,0.987821,0.0,0.0,0.136931,-0.136931,76.0,0.657895,54.544543,0.007108,-0.001082,0.00819,38.383838,52.525253,-266.0,194.06899,194.02,193.97101,0.3,0.237539,0.138892,0.097627,0.083783,97.361721,22.989295,0.14832,83.333333,0.0,194.020811,194.025188,2023-07-03 08:00:14.386063777
37,1688371215804852019,1688371215804687301,1,2,32,T,B,0,194.21,10,130,164718,328131,194.0,194.21,3101,29,4,1,AAPL,194.21,10,194.21,194.0,194.05,7.048268e-09,0.98147,0.16,0.0,0.125976,-0.125976,55.6,0.179856,85.890753,0.020446,0.003223,0.017223,40.40404,43.771044,-256.0,194.125889,194.0305,193.935111,0.21,0.22377,0.153114,0.108864,0.090094,97.458593,19.409454,0.125224,79.268293,0.000824,194.025789,194.025596,2023-07-03 08:00:15.804852019
38,1688371219671476629,1688371219671312224,1,2,32,T,N,0,194.14,10,130,164405,331406,194.0,194.16,3101,400,4,1,AAPL,194.14,10,194.16,194.0,194.21,2.586235e-09,0.771494,0.09,0.0,0.105024,-0.105024,35.2,0.284091,64.827662,0.025079,0.007594,0.017484,49.494949,42.760943,-266.0,194.142928,194.0375,193.932072,0.21,0.216885,0.164491,0.118978,0.096089,97.548546,16.622008,0.10724,-3.205128,-0.00036,194.028718,194.025873,2023-07-03 08:00:19.671476629
39,1688371223368835585,1688371223368671235,1,2,32,T,B,0,194.13,10,130,164350,334235,194.0,194.13,3101,400,4,1,AAPL,194.13,10,194.13,194.0,194.14,2.704633e-09,0.771494,0.08,0.0,0.078422,-0.078422,26.0,0.384615,62.470772,0.027625,0.011601,0.016025,57.575758,49.158249,-276.0,194.155247,194.044,193.932753,0.14,0.178442,0.159593,0.12108,0.098285,97.632074,15.068361,0.097216,-113.095238,-5.2e-05,194.03125,194.026071,2023-07-03 08:00:23.368835585


# Slippage Calculations #1

## Volatility Forecasting with LSTM Model

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

data['volatility'] = data['price'].std()
data = data.dropna()

X = data[['volatility', 'price']]
y = data['volatility']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_reshaped = np.reshape(X_train.values, (X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = np.reshape(X_test.values, (X_test.shape[0], X_test.shape[1], 1))

model_LSTM = Sequential()
model_LSTM.add(LSTM(50, return_sequences=True, input_shape=(X_train_reshaped.shape[1], 1)))
model_LSTM.add(LSTM(50))
model_LSTM.add(Dense(1))

model_LSTM.compile(optimizer='adam', loss='mean_squared_error')
model_LSTM.fit(X_train_reshaped, y_train, epochs=10, batch_size=1, verbose=2)

predicted_volatility = model_LSTM.predict(X_test_reshaped)

Epoch 1/10
47390/47390 - 109s - loss: 3.4787e-05 - 109s/epoch - 2ms/step
Epoch 2/10
47390/47390 - 103s - loss: 1.8296e-07 - 103s/epoch - 2ms/step
Epoch 3/10
47390/47390 - 101s - loss: 1.2144e-07 - 101s/epoch - 2ms/step
Epoch 4/10
47390/47390 - 104s - loss: 8.9459e-08 - 104s/epoch - 2ms/step
Epoch 5/10
47390/47390 - 102s - loss: 6.8562e-08 - 102s/epoch - 2ms/step
Epoch 6/10
47390/47390 - 100s - loss: 5.4720e-08 - 100s/epoch - 2ms/step
Epoch 7/10
47390/47390 - 98s - loss: 4.3838e-08 - 98s/epoch - 2ms/step
Epoch 8/10
47390/47390 - 98s - loss: 3.7260e-08 - 98s/epoch - 2ms/step
Epoch 9/10
47390/47390 - 99s - loss: 3.2250e-08 - 99s/epoch - 2ms/step
Epoch 10/10
47390/47390 - 100s - loss: 2.8647e-08 - 100s/epoch - 2ms/step


In [20]:
predictions_df = pd.DataFrame(predicted_volatility, index=X_test.index, columns=['predicted_volatility'])
data.loc[predictions_df.index, 'predicted_volatility'] = predictions_df['predicted_volatility']
data['predicted_volatility'].fillna(0, inplace=True)

  and should_run_async(code)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[predictions_df.index, 'predicted_volatility'] = predictions_df['predicted_volatility']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['predicted_volatility'].fillna(0, inplace=True)


In [21]:
ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()
market_features_df = df_with_indicators[35:]

  and should_run_async(code)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retu

In [22]:
class TradingEnvironment_SC1(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit):
        super(TradingEnvironment_SC1, self).__init__()
        self.data = data
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI',
                              'TI', 'LI', 'price_mom','vol_skew', 'relative_vol']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0

        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            if self.data.iloc[self.current_step]['predicted_volatility'] == 0:
                volatility = 0.3
            else:
                volatility = self.data.iloc[self.current_step]['predicted_volatility']
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], volatility, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1: # and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
        elif action == 2: # and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

In [23]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
env_SC1 = TradingEnvironment_SC1(ticker_data, daily_trading_limit)  # Adjust window_size if needed

# Define the best hyperparameters
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6} # type: ignore

# Create the RL model with the best hyperparameters
model_SC1 = PPO('MlpPolicy', env_SC1, verbose=1, **best_hyperparameters)

# Train the model
model_SC1.learn(total_timesteps=10000)

# Save the model
# model.save("trading_agent")

# Evaluate the model
obs = env_SC1.reset()
for _ in range(len(ticker_data)):
    action, _states = model_SC1.predict(obs)
    obs, rewards, done, info = env_SC1.step(action)
    if done:
        break

# Render the final state
env_SC1.render()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 50212, Timestamp: 2023-07-03 16:33:25.655343681, Action: 2, Price: 192.06, Shares: 46.0, Reward: -0.1984459056208947, Transaction Cost: 0.1795832056208947, Slippage: 0.0, Time Penalty: 0.0188627
Step: 50215, Timestamp: 2023-07-03 16:33:25.661566457, Action: 1, Price: 192.06, Shares: 179.0, Reward: -0.1987304056208947, Transaction Cost: 0.1795832056208947, Slippage: 0.0, Time Penalty: 0.0191472
Step: 50216, Timestamp: 2023-07-03 16:33:25.661566903, Action: 2, Price: 192.06, Shares: 179.0, Reward: -0.2730716049635056, Transaction Cost: 0.25396900496350555, Slippage: 0.0, Time Penalty: 0.0191026
Step: 50220, Timestamp: 2023-07-03 16:33:25.665696098, Action: 1, Price: 192.06, Shares: 132.0, Reward: -0.3772054112417894, Transaction Cost: 0.3591664112417894, Slippage: 0.0, Time Penalty: 0.018039
Step: 50221, Timestamp: 2023-07-03 16:33:25.683144276, Action: 2, Price: 192.07, Shares: 132.0, Reward: -0.27054710496350554, Tr

In [24]:
class TradingEnvironmentwithBlotter_SC1:
    def __init__(self, data, daily_trading_limit, window_size):
        self.data = preprocess_data(data)
        self.daily_trading_limit = daily_trading_limit
        self.window_size = window_size
        self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI',
                              'TI', 'LI', 'price_mom','vol_skew', 'relative_vol']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        self.data['RSI'] = calculate_rsi(self.data['price'])
        self.data['pct_change'] = self.data['price'].pct_change()
        self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)

    def step(self):
        row = self.data.iloc[self.current_step]
        current_price = row['price']
        current_time = pd.to_datetime(row['ts_event'])
        current_rsi = row['RSI']
        current_vol = row['pct_change']
        current_liq = row['liquidity']
        mean_vol = row['rolling_mean_vol']
        std_vol = row['rolling_std_vol']
        mean_liq = row['rolling_mean_liq']
        std_liq = row['rolling_std_liq']

        if current_rsi < 30:  # Entry signal based on RSI
            trade_direction = 'BUY'
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
            if self.portfolio['cash'] >= trade_size * trade_price:
                self.portfolio['cash'] -= trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif current_rsi > 70:  # Exit signal based on RSI
            trade_direction = 'SELL'
            if self.portfolio['holdings'][row['symbol']] > 0:
                trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
                trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
                self.portfolio['cash'] += trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                trade_status = 'filled'
            else:
                trade_size = 0
                trade_status = 'cancelled'
        else:
            trade_direction = 'HOLD'
            trade_size = 0
            trade_price = current_price
            trade_status = 'skipped'

        if trade_size > 0:
            expected_price = row['ask_px_00']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            if self.data.iloc[self.current_step]['predicted_volatility'] == 0:
                volatility = 0.3
            else:
                volatility = self.data.iloc[self.current_step]['predicted_volatility']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], volatility, self.data['Volume'].mean())
            slippage = expected_price - actual_price
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)

            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': trade_direction,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty
            })



        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done=True
            self.current_step = 0

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        row = self.data.iloc[self.current_step]
        print(f'Total portfolio value: {self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]]*row["Close"]}')
        # get trades in a pandas dataframe
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_blotter.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

  and should_run_async(code)


In [25]:
# Filter data for the specified ticker
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
# Create the trading environment
env_SC1 = TradingEnvironmentwithBlotter_SC1(ticker_data, daily_trading_limit=1000, window_size=window_size)  # Daily trading limit of 1000 shares

# Run the environment
cumulative_reward, trades = env_SC1.run()

# Render the results
env_SC1.render()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 49782, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 190.66323776115803, Shares: 0.5061835868081105, Symbol: AAPL, Reward: -0.2511100510771286, Transaction Cost: 0.08031205107712859, Slippage: 0.0, Time Penalty: 0.170798
Step: 49783, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 195.39494054918265, Shares: 0.4190249401946541, Symbol: AAPL, Reward: -0.19619490049635055, Transaction Cost: 0.025396900496350554, Slippage: 0.0, Time Penalty: 0.170798
Step: 49784, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 187.8840147829766, Shares: 0.37698665111122404, Symbol: AAPL, Reward: -0.42476700496350556, Transaction Cost: 0.25396900496350555, Slippage: 0.0, Time Penalty: 0.170798
Step: 49785, Timestamp: 2023-07-03 16:29:54.474421200, Action: SELL, Price: 190.4201037480945, Shares: 0.3108221797573575, Symbol: AAPL, Reward: -0.4312560049634965, Transaction Cost: 0.25396900496350

In [26]:
df_SC1=market_features_df.copy()

  and should_run_async(code)


In [27]:
df_SC1['timestamp']=pd.to_datetime(df_SC1['ts_recv'])

In [28]:
df_SC1.head()

  and should_run_async(code)


Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,TI,LI,price_mom,bid_vola,ask_vola,vol_skew,avg_vol,relative_vol,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,volatility,predicted_volatility,timestamp
68,1688371272250210772,1688371272250045693,1,2,32,T,B,0,194.26,50,130,165079,380451,194.15,194.26,99,50,1,1,AAPL,194.26,50,194.26,194.15,194.26,50.0,0.328859,0.08,0.013416,0.040988,-0.027571,13.4,3.731343,76.268706,0.043789,0.032184,0.011605,100.0,100.0,-2903.0,194.244265,194.1485,194.052735,0.11,0.094064,0.066819,0.067874,0.09256,95.281211,16.935724,0.183994,83.333333,0.0,194.108611,194.083518,0.562756,0.563167,2023-07-03 08:01:12.250210772
69,1688371272377490201,1688371272377325167,1,2,32,T,N,0,194.28,1,130,165034,380534,194.15,194.29,99,400,1,1,AAPL,194.28,1,194.29,194.15,194.26,7.856729e-09,-0.603206,0.1,0.0,0.046043,-0.046043,13.4,0.074627,78.176224,0.048246,0.035396,0.012849,98.148148,99.382716,-2902.0,194.267003,194.1565,194.045997,0.14,0.117032,0.081455,0.075086,0.094932,95.487853,17.493132,0.161167,85.106383,0.000103,194.113243,194.083553,0.562756,0.0,2023-07-03 08:01:12.377490201
70,1688371272692670751,1688371272692506346,1,2,32,T,A,0,194.15,99,130,164405,380698,194.15,194.29,99,400,1,1,AAPL,194.15,99,194.29,194.15,194.28,3.141057e-07,-0.603206,-0.03,0.0,0.036742,-0.036742,32.0,3.09375,50.027687,0.040817,0.036481,0.004337,72.222222,90.123457,-3001.0,194.267003,194.1565,194.045997,0.14,0.128516,0.093164,0.081578,0.097185,95.679734,15.431361,0.142171,-59.52381,-0.000669,194.114211,194.086876,0.562756,0.0,2023-07-03 08:01:12.692670751
71,1688371273530115653,1688371273529950930,1,2,32,T,N,0,194.22,1,130,164723,381395,194.12,194.29,10,400,1,1,AAPL,194.22,1,194.29,194.12,194.15,1.194108e-09,-0.95122,0.02,0.013416,0.016432,-0.003015,32.0,0.03125,58.659405,0.040116,0.037208,0.002909,59.259259,76.54321,-3000.0,194.273112,194.1625,194.051888,0.17,0.149258,0.108531,0.09042,0.100826,93.753283,13.370593,2.479847,-47.101449,0.00036,194.116923,194.086905,0.562756,0.0,2023-07-03 08:01:13.530115653
72,1688371273720868350,1688371273720703771,1,2,32,T,N,0,194.22,8,130,164579,381539,194.12,194.29,10,400,1,1,AAPL,194.22,8,194.29,194.12,194.22,4.193912e-08,-0.95122,-0.04,0.016432,0.013416,0.003015,31.8,0.251572,58.659405,0.03911,0.037588,0.001522,48.148148,59.876543,-3000.0,194.277912,194.1685,194.059088,0.17,0.159629,0.120825,0.098378,0.104285,91.964437,11.689454,2.168045,-31.914894,0.0,194.1195,194.087137,0.562756,0.0,2023-07-03 08:01:13.720868350


# Slippage Calculations #2

## Slippage Calculations Using Monte Carlo Simulations

In [30]:
def monte_carlo_ask_price_simulation(ask_prices, volatilities, num_simulations=1000):
    num_points = len(ask_prices)
    simulated_ask_prices = np.zeros((num_simulations, num_points))

    for i in range(num_simulations):
        simulated_prices = np.zeros(num_points)
        simulated_prices[0] = ask_prices[0]

        for t in range(1, num_points):
            simulated_prices[t] = simulated_prices[t-1] * np.exp(
                -0.5 * volatilities[t]**2 + volatilities[t] * np.random.normal()
            )

        simulated_ask_prices[i, :] = simulated_prices

    # Return the average of all simulations to smooth out the randomness
    return np.mean(simulated_ask_prices, axis=0)

ask_prices = data['ask_px_00'].values
volatilities = data['volatility'].values

data['simulated_ask_prices'] = monte_carlo_ask_price_simulation(ask_prices, volatilities)

In [39]:
ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()
market_features_df = df_with_indicators[35:]

In [40]:
class TradingEnvironment_SC2(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit):
        super(TradingEnvironment_SC2, self).__init__()
        self.data = data
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI',
                              'TI', 'LI', 'price_mom','vol_skew', 'relative_vol']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['simulated_ask_prices']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0

        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1: # and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
        elif action == 2: # and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

In [41]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
env_SC2 = TradingEnvironment_SC2(ticker_data, daily_trading_limit)  # Adjust window_size if needed

# Define the best hyperparameters
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6} # type: ignore

# Create the RL model with the best hyperparameters
model_SC2 = PPO('MlpPolicy', env_SC2, verbose=1, **best_hyperparameters)

# Train the model
model_SC2.learn(total_timesteps=10000)

# Save the model
# model.save("trading_agent")

# Evaluate the model
obs = env_SC2.reset()
for _ in range(len(ticker_data)):
    action, _states = model_SC2.predict(obs)
    obs, rewards, done, info = env_SC2.step(action)
    if done:
        break

# Render the final state
env_SC2.render()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 53376, Timestamp: 2023-07-03 16:50:03.129063515, Action: 2, Price: 192.23, Shares: 120.0, Reward: 191.9593322950365, Transaction Cost: 0.25396900496350555, Slippage: -192.23, Time Penalty: 0.0166987
Step: 53377, Timestamp: 2023-07-03 16:50:03.129092995, Action: 2, Price: 192.23, Shares: 137.0, Reward: 192.18765889950362, Transaction Cost: 0.025396900496350554, Slippage: -192.23, Time Penalty: 0.0169442
Step: 53378, Timestamp: 2023-07-03 16:50:03.612858781, Action: 1, Price: 192.22, Shares: 176.0, Reward: 192.16751025887584, Transaction Cost: 0.035916641124178944, Slippage: -192.22, Time Penalty: 0.0165731
Step: 53379, Timestamp: 2023-07-03 16:50:04.714056483, Action: 1, Price: 192.225, Shares: 85.0, Reward: 192.18302929950363, Transaction Cost: 0.025396900496350554, Slippage: -192.225, Time Penalty: 0.0165738
Step: 53380, Timestamp: 2023-07-03 16:50:05.102427312, Action: 2, Price: 192.225, Shares: 222.0, Reward: 192

In [42]:
class TradingEnvironmentwithBlotter_SC2:
    def __init__(self, data, daily_trading_limit, window_size):
        self.data = preprocess_data(data)
        self.daily_trading_limit = daily_trading_limit
        self.window_size = window_size
        self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI',
                              'TI', 'LI', 'price_mom','vol_skew', 'relative_vol']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        self.data['RSI'] = calculate_rsi(self.data['price'])
        self.data['pct_change'] = self.data['price'].pct_change()
        self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)

    def step(self):
        row = self.data.iloc[self.current_step]
        current_price = row['price']
        current_time = pd.to_datetime(row['ts_event'])
        current_rsi = row['RSI']
        current_vol = row['pct_change']
        current_liq = row['liquidity']
        mean_vol = row['rolling_mean_vol']
        std_vol = row['rolling_std_vol']
        mean_liq = row['rolling_mean_liq']
        std_liq = row['rolling_std_liq']

        if current_rsi < 30:  # Entry signal based on RSI
            trade_direction = 'BUY'
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
            if self.portfolio['cash'] >= trade_size * trade_price:
                self.portfolio['cash'] -= trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif current_rsi > 70:  # Exit signal based on RSI
            trade_direction = 'SELL'
            if self.portfolio['holdings'][row['symbol']] > 0:
                trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
                trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
                self.portfolio['cash'] += trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                trade_status = 'filled'
            else:
                trade_size = 0
                trade_status = 'cancelled'
        else:
            trade_direction = 'HOLD'
            trade_size = 0
            trade_price = current_price
            trade_status = 'skipped'

        if trade_size > 0:
            expected_price = row['simulated_ask_prices']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
            slippage = expected_price - actual_price
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)

            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': trade_direction,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty
            })



        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done=True
            self.current_step = 0

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        row = self.data.iloc[self.current_step]
        print(f'Total portfolio value: {self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]]*row["Close"]}')
        # get trades in a pandas dataframe
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_blotter.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

  and should_run_async(code)


In [43]:
# Filter data for the specified ticker
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
# Create the trading environment
env_SC2 = TradingEnvironmentwithBlotter_SC2(ticker_data, daily_trading_limit=1000, window_size=window_size)  # Daily trading limit of 1000 shares

# Run the environment
cumulative_reward, trades = env_SC2.run()

# Render the results
env_SC2.render()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 49782, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 187.74471674667004, Shares: 0.20986231747201056, Symbol: AAPL, Reward: 191.86888994892288, Transaction Cost: 0.08031205107712859, Slippage: -192.12, Time Penalty: 0.170798
Step: 49783, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 192.2288644876356, Shares: 0.15802915362475423, Symbol: AAPL, Reward: 191.92380509950365, Transaction Cost: 0.025396900496350554, Slippage: -192.12, Time Penalty: 0.170798
Step: 49784, Timestamp: 2023-07-03 16:29:54.454344746, Action: SELL, Price: 183.11491387035113, Shares: 0.19566495976499654, Symbol: AAPL, Reward: 191.69523299503652, Transaction Cost: 0.25396900496350555, Slippage: -192.12, Time Penalty: 0.170798
Step: 49785, Timestamp: 2023-07-03 16:29:54.474421200, Action: SELL, Price: 182.623879394851, Shares: 0.1972483950817583, Symbol: AAPL, Reward: 191.69874399503652, Transaction Cost: 0.2539690

In [44]:
df_SC2=market_features_df.copy()

  and should_run_async(code)


In [45]:
df_SC2['timestamp']=pd.to_datetime(df_SC2['ts_recv'])

In [46]:
df_SC2.head()

  and should_run_async(code)


Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Volume,High,Low,Open,TI,LI,price_mom,bid_vola,ask_vola,vol_skew,avg_vol,relative_vol,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,volatility,predicted_volatility,simulated_ask_prices,timestamp
68,1688371272250210772,1688371272250045693,1,2,32,T,B,0,194.26,50,130,165079,380451,194.15,194.26,99,50,1,1,AAPL,194.26,50,194.26,194.15,194.26,50.0,0.328859,0.08,0.013416,0.040988,-0.027571,13.4,3.731343,76.268706,0.043789,0.032184,0.011605,100.0,100.0,-2903.0,194.244265,194.1485,194.052735,0.11,0.094064,0.066819,0.067874,0.09256,95.281211,16.935724,0.183994,83.333333,0.0,194.108611,194.083518,0.562756,0.563167,79.937432,2023-07-03 08:01:12.250210772
69,1688371272377490201,1688371272377325167,1,2,32,T,N,0,194.28,1,130,165034,380534,194.15,194.29,99,400,1,1,AAPL,194.28,1,194.29,194.15,194.26,7.856729e-09,-0.603206,0.1,0.0,0.046043,-0.046043,13.4,0.074627,78.176224,0.048246,0.035396,0.012849,98.148148,99.382716,-2902.0,194.267003,194.1565,194.045997,0.14,0.117032,0.081455,0.075086,0.094932,95.487853,17.493132,0.161167,85.106383,0.000103,194.113243,194.083553,0.562756,0.0,99.571871,2023-07-03 08:01:12.377490201
70,1688371272692670751,1688371272692506346,1,2,32,T,A,0,194.15,99,130,164405,380698,194.15,194.29,99,400,1,1,AAPL,194.15,99,194.29,194.15,194.28,3.141057e-07,-0.603206,-0.03,0.0,0.036742,-0.036742,32.0,3.09375,50.027687,0.040817,0.036481,0.004337,72.222222,90.123457,-3001.0,194.267003,194.1565,194.045997,0.14,0.128516,0.093164,0.081578,0.097185,95.679734,15.431361,0.142171,-59.52381,-0.000669,194.114211,194.086876,0.562756,0.0,87.000329,2023-07-03 08:01:12.692670751
71,1688371273530115653,1688371273529950930,1,2,32,T,N,0,194.22,1,130,164723,381395,194.12,194.29,10,400,1,1,AAPL,194.22,1,194.29,194.12,194.15,1.194108e-09,-0.95122,0.02,0.013416,0.016432,-0.003015,32.0,0.03125,58.659405,0.040116,0.037208,0.002909,59.259259,76.54321,-3000.0,194.273112,194.1625,194.051888,0.17,0.149258,0.108531,0.09042,0.100826,93.753283,13.370593,2.479847,-47.101449,0.00036,194.116923,194.086905,0.562756,0.0,89.392061,2023-07-03 08:01:13.530115653
72,1688371273720868350,1688371273720703771,1,2,32,T,N,0,194.22,8,130,164579,381539,194.12,194.29,10,400,1,1,AAPL,194.22,8,194.29,194.12,194.22,4.193912e-08,-0.95122,-0.04,0.016432,0.013416,0.003015,31.8,0.251572,58.659405,0.03911,0.037588,0.001522,48.148148,59.876543,-3000.0,194.277912,194.1685,194.059088,0.17,0.159629,0.120825,0.098378,0.104285,91.964437,11.689454,2.168045,-31.914894,0.0,194.1195,194.087137,0.562756,0.0,116.468743,2023-07-03 08:01:13.720868350
