In [None]:
# This notebook is for creating and testing method on curating datasets on stock trading data for offline reinforcement learning with decision transformer model
# This will first create a custom dataset class that can provide sliding window data
# Then it will use those datasets to create gym environments and sample state, action, reward which then store as a replay buffer
# Group these replay buffers and export as a dataset

In [1]:
# import helper function for getting stock data
from getstock import get_stock_data_yf_between_with_indicators

# get stock data with technical indicators
stock_name = 'AAPL'
start_date = '2019-01-01'
end_date = '2021-12-31'
interval = '1d'
indicators = ['volume_obv', 'trend_macd', 'momentum_rsi']

stockdata = get_stock_data_yf_between_with_indicators(stock_name, start_date, end_date, interval, indicators)


[*********************100%***********************]  1 of 1 completed


  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [17]:
cust_signals = list(stockdata.columns[5:])

['volume_obv', 'trend_macd', 'momentum_rsi']

In [2]:
# create a custom dataset class that can provide sliding window data
import torch
from torch.utils.data import Dataset

class SlidingDataset(Dataset):
    def __init__(self, data, window_size, stride=1, transform=None):
        self.data = data
        self.window_size = window_size
        self.stride = stride
        self.transform = transform

    def __len__(self):
        return (len(self.data) - self.window_size) // self.stride + 1

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        start = idx * self.stride
        end = start + self.window_size
        sample = self.data[start:end]

        if self.transform:
            sample = self.transform(sample)

        return sample

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from datasets import Dataset as huggingfaceDataset

In [4]:
# create a sliding dataset and its dataloader
window_size = 30
stride = 1
dataset = SlidingDataset(stockdata, window_size, stride)



In [18]:
list1 = ['a', 'b', 'c']
list2 = ['d', 'e', 'f'] + list1

print(list2)

['d', 'e', 'f', 'a', 'b', 'c']


In [5]:
# customise the trading environment signal features
def process_data(df,window_size, frame_bound, price_feature, signal_features):
    start = frame_bound[0] - window_size
    end = frame_bound[1]
    prices = df.loc[:, price_feature].to_numpy()[start:end]
    signals = df.loc[:, signal_features].to_numpy()[start:end]
    return prices, signals

# create a gym environment from loading sliding dataset and sample state, action, reward
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv

# define the custom environment with signal features
class CustomStocksEnv(StocksEnv):
    def __init__(self, df, window_size, frame_bound, price_feature, signal_features, **kwargs):
        self._prices, self._signal_features = process_data(df, window_size, frame_bound, price_feature, signal_features)
        super().__init__(**kwargs)

    def _process_data(self):
        return self._prices, self._signal_features

envwindow_size = 1


In [6]:
# create dictionary with state, action, reward as keys and store the values in a list
# then create a huggingface dataset from the dictionary
# then save the huggingface dataset to a file
import numpy as np
from datasets import Dataset as huggingfaceDataset
dict = {'state': [], 'action': [], 'reward': []}
price_feature = ['Low']
signal_features = ['Low', 'Volumn'] + cust_signals

# repeat the process for 5 episodes
for i in range(5):
    # loop through the sliding dataset
    for i in range(len(dataset)):
        # sample a state, action, reward from the environment until the episode is done
        while True:
            
            env = CustomStocksEnv(df=dataset[i], window_size=envwindow_size, frame_bound=(0, len(dataset[i])), price_feature=price_feature, signal_features=signal_features)
            env.reset()
            state = env._get_obs()
            action = env.action_space.sample()
            nextstate, reward, done, _ = env.step(action)
            # store state, action, reward, nextstate, done in dictionary
            dict['state'].append(state)
            dict['action'].append(action)
            dict['reward'].append(reward)
            print(_)
            if done:
                print('done')
                break

# create huggingface dataset from dictionary
dataset = huggingfaceDataset.from_dict(dict)



KeyError: "['SMA', 'RSI', 'OBV'] not in index"