In [4]:
"""Module installation"""
# !pip install gymnasium stable-baselines3 pyarrow

"""Module imports"""
import math
import ta.momentum
import ta.trend
import pandas as pd
import datetime as dt
import numpy as np
import gymnasium as gym
# import stable_baselines3 as sb

from enum import Enum

In [5]:
"""Mount a Google Drive folder to access uploaded data files: Avoid re-uploading multple times"""
ROOT_DATA_FOLDER = "./data-out" #data
TIME_COLUMN = "time" #timestamp
TIME_UNIT= None #"s"

In [192]:
"""Load the data to be used for the training of the agent"""
SYMBOL = "#AAPL_otc"
PERIOD = 60
data = pd.read_csv(f"{ROOT_DATA_FOLDER}/{SYMBOL}_{PERIOD}_1.csv", engine="pyarrow", usecols=[TIME_COLUMN, "high", "low", "close"])
data

Unnamed: 0,time,high,low,close
0,2019-07-10 07:51:00,201.16,200.81,200.87
1,2019-07-10 07:52:00,201.04,200.65,200.73
2,2019-07-10 07:53:00,201.05,200.54,201.05
3,2019-07-10 07:54:00,201.21,200.72,201.21
4,2019-07-10 07:55:00,201.34,201.06,201.25
...,...,...,...,...
3298,2019-07-12 14:49:00,201.96,201.54,201.54
3299,2019-07-12 14:50:00,201.83,201.44,201.58
3300,2019-07-12 14:51:00,201.71,201.17,201.71
3301,2019-07-12 14:52:00,201.80,201.21,201.27


In [202]:
"""Perform basic formatting to the data to make sure the data is loaded with the proper data types."""
index = pd.to_datetime(data[TIME_COLUMN], unit=TIME_UNIT)
rsi = ta.momentum.rsi(close=data["close"], window=13)/100
adx = ta.trend.adx(high=data["high"], low=data["low"], close=data["close"], window=15)/100
close = data["close"].pct_change()
rsi, adx

(0            NaN
 1            NaN
 2            NaN
 3            NaN
 4            NaN
           ...   
 3298    0.698397
 3299    0.702689
 3300    0.716874
 3301    0.610133
 3302    0.622585
 Name: rsi, Length: 3303, dtype: float64,
 0       0.000000
 1       0.000000
 2       0.000000
 3       0.000000
 4       0.000000
           ...   
 3298    0.325130
 3299    0.336533
 3300    0.337566
 3301    0.339967
 3302    0.341133
 Name: adx, Length: 3303, dtype: float64)

In [203]:
"""Date time based signals."""
day_of_week = index.apply(lambda x: round(x.day_of_week/6, 3))
hour_of_day = index.apply(lambda x: round(x.hour/24, 3))
day_of_week.name = "day_of_week"
hour_of_day.name = "hour_of_day"

data_norm = pd.concat([index, day_of_week, hour_of_day, close, rsi, adx], axis=1)
data_norm.set_index(TIME_COLUMN, inplace=True)
data_norm.dropna(inplace=True)
data_norm

Unnamed: 0_level_0,day_of_week,hour_of_day,close,rsi,adx
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-07-10 08:03:00,0.333,0.333,-0.003039,0.309768,0.000000
2019-07-10 08:04:00,0.333,0.333,-0.001649,0.261483,0.000000
2019-07-10 08:05:00,0.333,0.333,-0.000150,0.257530,0.000000
2019-07-10 08:06:00,0.333,0.333,-0.001452,0.222329,0.000000
2019-07-10 08:07:00,0.333,0.333,-0.000301,0.215720,0.000000
...,...,...,...,...,...
2019-07-12 14:49:00,0.667,0.583,-0.001387,0.698397,0.325130
2019-07-12 14:50:00,0.667,0.583,0.000198,0.702689,0.336533
2019-07-12 14:51:00,0.667,0.583,0.000645,0.716874,0.337566
2019-07-12 14:52:00,0.667,0.583,-0.002181,0.610133,0.339967


In [210]:
class Actions():
  Hold = 0
  Put = 1
  Call = 2

In [245]:
class TradingEnv(gym.Env):
  metadata = dict(render_modes=['human'])

  def __init__(self, data, capital=100, period=PERIOD, strike_time=120, render_mode=None):
    assert render_mode is None or render_mode in self.metadata['render_modes']
    self.render_mode = render_mode
    """Setup trading environment"""
    self.capital = capital
    self.initial_capital = self.capital
    self.period = period
    self.strike_time = strike_time
    self.data = data.copy()
    self.data_size = len(data)
    self.window_size = int(self.strike_time/self.period)
    self.profit_ratio = .92
    self.balance_at_risk = .1
    """Prepare signal features"""
    self.signal_features, self.signal_outcomes = self._process_data()
    self.shape = (self.window_size, self.signal_features.shape[1])
    """Setup action and observation space"""
    self.action_space = gym.spaces.Discrete(3)
    self.observation_space = gym.spaces.Box(
        low=-1, high=1, shape=self.shape, dtype=np.float32,
    )
    """Risk management factors"""
    self.MAXIMUM_DAILY_ORDERS = 5
    self.TARGET_ACCURACY = 1/(self.profit_ratio + 1)
    self.MAXIMUM_LOSS = 3
    self.current_step = None
    self.daily_orders = None
    self.total_profit = None
    self.total_reward = None
    self.total_predictions = None
    self.correct_predictions = None
    self.current_accuracy = None
    self.loss_streak = None
    self.open_orders = None
    self.order_history = None
    self.has_ended = None


  def _set_new_values(self):
    self.capital = self.initial_capital
    self.current_step = 0
    self.current_risk = 0
    self.current_order = None
    self.order_history = []
    self.daily_orders = 0
    self.total_reward = 0
    self.total_predictions = 0
    self.correct_predictions = 0
    self.current_accuracy = 0
    self.current_day = self.signal_features[0][-5]
    self.loss_streak = 0
    self.has_ended = False


  def _set_new_risk(self):
      self.current_risk = round(self.capital*self.balance_at_risk, 2)


  def _process_data(self):
    self.data.loc[:, "next_close"] = self.data["close"].shift(-self.window_size)
    self.data.loc[:, "trend"] = self.data.apply(lambda x: Actions.Hold if x["next_close"] == 0 else (Actions.Call if x["next_close"] > 0 else Actions.Put), axis=1)
    self.data.reset_index(names="time", inplace=True)
    self.data.loc[:, "delta"] = self.data["time"].diff(1)
    self.data["delta"].fillna(pd.Timedelta(seconds=self.period))
    self.data.set_index("time", inplace=True)
    self.data.dropna(inplace=True)
    """Determine the amount of data that can be used for the training"""
    runnable_data_count = self.data_size if self.data_size%self.window_size==0 else math.floor(self.data_size/self.window_size)*self.window_size
    runnable_data = self.data.copy()[:runnable_data_count]
    """Compile group based signals flattened into one single array"""
    signals = []
    outcomes = []
    for i_index in range(0, runnable_data_count, self.window_size):
      j_index = i_index + self.window_size
      signal_group = runnable_data.iloc[i_index:j_index+1].copy()
      if len(signal_group) == self.window_size:
        """No points to predict beyond this point."""
        break
      for signal_item in signal_group["delta"]:
        if signal_item != pd.Timedelta(seconds=self.period):
          continue
      signal_group.drop(signal_group.iloc[self.window_size].name, inplace=True)
      if i_index >= runnable_data.shape[0]:
        continue
      outcomes.append(runnable_data.iloc[i_index]["trend"])
      signals.append(signal_group[["day_of_week", "hour_of_day", "close", "rsi", "adx"]].values)
    signals = np.array(signals)
    num_samples = signals.shape[0]
    sample_height, sample_width = signals.shape[1:]
    return signals.reshape(num_samples, sample_height * sample_width).astype(np.float32), np.array(outcomes).astype(np.int32)


  def _get_observation(self):
    self._set_new_risk()
    return self.signal_features[self.current_step:self.current_step+1]


  def _get_info(self, action: int):
    return dict(actual_outcome=self.signal_outcomes[self.current_step], predicted_outcome=action, total_reward=self.total_reward, current_capital=self.capital)


  def _get_actual_outcome(self):
    return self.signal_outcomes[self.current_step]


  def _get_reward(self, predicted_outcome, actual_outcome):
    reward = 0
    if self.daily_orders > self.MAXIMUM_DAILY_ORDERS:
      reward -= 0.1
    if self.current_accuracy < self.TARGET_ACCURACY:
      reward -= 0.1
    if predicted_outcome == actual_outcome:
      reward += 1
    else:
      reward = -1
    return reward


  def reset(self):
    self._set_new_values()
    return self._get_observation()


  def step(self, prediction):
    actual_outcome = self._get_actual_outcome()
    if prediction != Actions.Hold:
      self.current_position = prediction
      self.order_history.append(self.current_position)
      self.total_predictions += 1
      if prediction == actual_outcome:
        self.correct_predictions += 1
        self.capital += self.current_risk*self.profit_ratio
      else:
        self.capital -= self.current_risk
      self.current_accuracy = self.correct_predictions/self.total_predictions
      self.daily_orders += 1
    """Evaulate the action to determine the reward"""
    step_reward = self._get_reward(prediction, actual_outcome)
    self.total_reward += step_reward
    """Prepare for the next step"""
    self.current_step += 1
    if self.current_step == len(self.signal_features)-1 or self.capital < 0.5:
      self.has_ended = True
    
    info = self._get_info(prediction)
    next_observation = self._get_observation()
    """Check if we have a day change progression"""
    if next_observation[0][-5] != self.current_day:
      self.daily_orders = 0
    return next_observation, step_reward, self.has_ended, info

In [232]:
env = TradingEnv(data_norm)

In [244]:
obs = env.reset()
while True:
    obs, step_reward, has_ended, info = env.step(env.action_space.sample())
    if has_ended:
        break
    print(info)

[[ 3.3300000e-01  3.3300000e-01 -1.4518148e-03  2.2232893e-01
   0.0000000e+00  3.3300000e-01  3.3300000e-01 -3.0081219e-04
   2.1571995e-01  0.0000000e+00]]
{'actual_outcome': 1, 'predicted_outcome': 2, 'total_reward': -1, 'current_capital': 90.0}
[[ 0.333       0.333      -0.00100301  0.19480835  0.          0.333
   0.333       0.00040161  0.2272683   0.        ]]
{'actual_outcome': 2, 'predicted_outcome': 2, 'total_reward': -2, 'current_capital': 81.0}
[[0.333      0.333      0.00140506 0.3297232  0.         0.333
  0.333      0.00085187 0.38348806 0.        ]]
{'actual_outcome': 2, 'predicted_outcome': 1, 'total_reward': -3, 'current_capital': 72.9}
[[3.3300000e-01 3.3300000e-01 3.0541231e-03 5.3002888e-01 0.0000000e+00
  3.3300000e-01 3.3300000e-01 2.9949087e-04 5.4163826e-01 0.0000000e+00]]
{'actual_outcome': 1, 'predicted_outcome': 2, 'total_reward': -2.1, 'current_capital': 79.6068}
[[ 3.330000e-01  3.330000e-01 -2.495010e-04  5.298228e-01  0.000000e+00
   3.330000e-01  3.3300

In [242]:
pd.DataFrame(env.signal_features.reshape((-1,5)))

Unnamed: 0,0,1,2,3,4
0,0.333,0.333,-0.001649,0.261483,0.000000
1,0.333,0.333,-0.000150,0.257530,0.000000
2,0.333,0.333,-0.001452,0.222329,0.000000
3,0.333,0.333,-0.000301,0.215720,0.000000
4,0.333,0.333,-0.001003,0.194808,0.000000
...,...,...,...,...,...
3281,0.667,0.583,0.001246,0.671572,0.271343
3282,0.667,0.583,0.000398,0.682079,0.280711
3283,0.667,0.583,0.001443,0.717560,0.291581
3284,0.667,0.583,0.002733,0.770245,0.308823
