In [1]:
# This notebook is for creating and testing method on curating datasets on stock trading data for offline reinforcement learning with decision transformer model
# This will first create a custom dataset class that can provide sliding window data
# Then it will use those datasets to create gym environments and sample state, action, reward which then store as a replay buffer
# Group these replay buffers and export as a dataset

In [3]:
# import helper function for getting stock data
from getstock import get_stock_data_yf_between_with_indicators
# import time library
from datetime import datetime, timedelta
# get stock data with technical indicators
stock_name = 'AAPL'

# period of data to get
period = 600
# start_date in format 'YYYY-MM-DD'
start_date = '2019-01-01'
# calculate end date being x days after start date
start_date_obj = datetime.strptime(start_date, '%Y-%m-%d')
end_date_obj = start_date_obj + timedelta(days=period)
end_date = end_date_obj.strftime('%Y-%m-%d')


interval = '1d'
indicators = ['volume_obv', 'trend_macd', 'momentum_rsi']

stockdata = get_stock_data_yf_between_with_indicators(stock_name, start_date, end_date, interval, indicators)


[*********************100%***********************]  1 of 1 completed


  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [10]:
print(end_date)

2020-08-23


In [4]:
cust_signals = list(stockdata.columns[5:])

In [5]:
# create a custom dataset class that can provide sliding window data
import torch
from torch.utils.data import Dataset

class SlidingDataset(Dataset):
    def __init__(self, data, window_size, stride=1, transform=None):
        self.data = data
        self.window_size = window_size
        self.stride = stride
        self.transform = transform

    def __len__(self):
        return (len(self.data) - self.window_size) // self.stride + 1

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        start = idx * self.stride
        end = start + self.window_size
        sample = self.data[start:end]

        if self.transform:
            sample = self.transform(sample)

        return sample

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# create a sliding dataset and its dataloader
window_size = 30
stride = 1
dataset = SlidingDataset(stockdata, window_size, stride)



In [12]:
len(dataset[0])

30

In [7]:
# customise the trading environment signal features
def process_data(df,window_size, frame_bound, price_feature, signal_features):
    start = frame_bound[0] - window_size
    end = frame_bound[1]
    prices = df.loc[:, price_feature].to_numpy()[start:end]
    signals = df.loc[:, signal_features].to_numpy()[start:end]
    return prices, signals

# create a gym environment from loading sliding dataset and sample state, action, reward
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv

# define the custom environment with signal features
class CustomStocksEnv(StocksEnv):
    def __init__(self, prices, signal_features, **kwargs):
        self._prices = prices
        self._signal_features = signal_features
        super().__init__(**kwargs)

    def _process_data(self):
        return self._prices, self._signal_features

envwindow_size = 0


In [8]:
# create dictionary with state, action, reward as keys and store the values in a list
# then create a huggingface dataset from the dictionary
# then save the huggingface dataset to a file
import numpy as np
from datasets import Dataset as huggingfaceDataset

price_feature = ['Low']
signal_features = ['Low', 'Volume'] + cust_signals
data = {'data':[]}
# repeat the process for 5 episodes
for j in range(5):
    # create list for storing state, action, reward
    dict = {'state': [], 'action': [], 'reward': [], 'timestep': []}
    # loop through the sliding dataset
    for i in range(len(dataset)):
        prices, signals = process_data(dataset[i], envwindow_size, (envwindow_size, window_size), price_feature, signal_features)
        env = CustomStocksEnv(prices, signals, df=dataset[i], window_size=envwindow_size, frame_bound=(envwindow_size, window_size))
        state = env.reset()
        state = state.tolist()
        dict['state'].append(state[0])
        timestep = 0
        # sample a state, action, reward from the environment until the episode is done
        while True:
            action = env.action_space.sample()
            nextstate, reward, done, _ = env.step(action)
            # get reward item from the reward numpy array
            if type(reward) is not int:
                reward = reward.item()
            # store state, action, reward, nextstate, done in dictionary
            dict['action'].append(action)
            dict['reward'].append(reward)
            dict['timestep'].append(timestep)
            timestep += 1

            print(_)
            if done:
                print('done')
                break
            else:
                nextstate = nextstate.tolist()
                dict['state'].append(nextstate[0])
        # close the environment
        env.close()
    # store the state, action, reward list in the dictionary
    data['data'].append(dict)


{'total_reward': 0.0, 'total_profit': 1.0, 'position': 1}
{'total_reward': array([0.45000076], dtype=float32), 'total_profit': array([0.9975366], dtype=float32), 'position': 0}
{'total_reward': array([0.45000076], dtype=float32), 'total_profit': array([0.9975366], dtype=float32), 'position': 0}
{'total_reward': array([0.45000076], dtype=float32), 'total_profit': array([0.9975366], dtype=float32), 'position': 0}
{'total_reward': array([0.45000076], dtype=float32), 'total_profit': array([0.9975366], dtype=float32), 'position': 0}
{'total_reward': array([0.45000076], dtype=float32), 'total_profit': array([0.9975366], dtype=float32), 'position': 0}
{'total_reward': array([0.45000076], dtype=float32), 'total_profit': array([0.9975366], dtype=float32), 'position': 0}
{'total_reward': array([0.45000076], dtype=float32), 'total_profit': array([0.9975366], dtype=float32), 'position': 1}
{'total_reward': array([0.6575012], dtype=float32), 'total_profit': array([0.98808914], dtype=float32), 'posi

In [9]:
file_name = stock_name + '_replaybuffer.json'
# output the dictionary to a json file
import json
with open(file_name, 'w') as fp:
    json.dump(data, fp)

In [25]:
i = 0
dict = {'state': [], 'action': [], 'reward': [], 'timestep': []}
print(envwindow_size)
print(window_size)

prices, signals = process_data(dataset[i], envwindow_size, (envwindow_size, window_size), price_feature, signal_features)
env = CustomStocksEnv(prices, signals, df=dataset[i], window_size=envwindow_size, frame_bound=(envwindow_size, window_size))

print(len(prices))
# check the size of environment
print(len(env.prices))

state = env.reset()
state = state.tolist()
dict['state'].append(state[0])
timestep = 0
# sample a state, action, reward from the environment until the episode is done
while True:
    action = env.action_space.sample()
    nextstate, reward, done, _ = env.step(action)
    # get reward item from the reward numpy array
    if type(reward) is not int:
        reward = reward.item()
    # store state, action, reward, nextstate, done in dictionary
    dict['action'].append(action)
    dict['reward'].append(reward)
    dict['timestep'].append(timestep)
    timestep += 1
    #print(_)
    if done:
        print('done')
        break
    else:
        nextstate = nextstate.tolist()
        dict['state'].append(nextstate[0])

1
30
30
30
done


In [15]:
print(timestep)

28
