In [1]:
import time
import pickle
import datetime as dt
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from stock_image_dataset import StockImageDataset
from indicators import get_rsi, get_momentum, get_ppo, get_so, get_bbp, get_fibonacci, get_macd
from utils import get_stock_data, clean_stock_data

In [2]:
# Hyperparameterss

# Symbols need to be a length that can divide the batch size evenly to prevent peaking into the future
symbol = 'IBM'
#symbols = ['IBM']

symbols = ['XRX', 'IBM', 'HP', 'BMY',
           'PFE', 'UNH', 'LLY', 'JPM',
           'AXP', 'AIG', 'NKE', 'PEP',
           'HD', 'GE', 'CAT', 'HON']

window = 20
std = 2
rp = 0.618
height = 9
width = 9
train_start_date = dt.datetime(2000, 1, 1)
train_end_date = dt.datetime(2005, 12, 31)
test_start_date = dt.datetime(2008, 1, 1)
test_end_date = dt.datetime(2008, 12, 31)
val_start_date = dt.datetime(2016, 1, 1)
val_end_date = dt.datetime(2016, 12, 31)
batch_size = 16

In [3]:
# Create Training Dataset
dates = pd.date_range(train_start_date, train_end_date)
mod_start_date = train_start_date - dt.timedelta(days=(height*width+height+width))
mod_dates = pd.date_range(mod_start_date, train_end_date)

# Load the stock data for standard and windowed
adj_stock_data = get_stock_data(symbols, dates, col_name='Adj Close')
open_stock_data = get_stock_data(symbols, dates, col_name='Open')
high_stock_data = get_stock_data(symbols, dates, col_name='High')
low_stock_data = get_stock_data(symbols, dates, col_name='Low')
close_stock_data = get_stock_data(symbols, dates, col_name='Close')
w_adj_stock_data = get_stock_data(symbols, mod_dates, col_name='Adj Close')
w_open_stock_data = get_stock_data(symbols, mod_dates, col_name='Open')
w_high_stock_data = get_stock_data(symbols, mod_dates, col_name='High')
w_low_stock_data = get_stock_data(symbols, mod_dates, col_name='Low')
w_close_stock_data = get_stock_data(symbols, mod_dates, col_name='Close')

# Back fill and forward fill
train_adj_stock_data = clean_stock_data(adj_stock_data)
open_stock_data = clean_stock_data(open_stock_data)
high_stock_data = clean_stock_data(high_stock_data)
low_stock_data = clean_stock_data(low_stock_data)
close_stock_data = clean_stock_data(close_stock_data)
w_adj_stock_data = clean_stock_data(w_adj_stock_data)
w_open_stock_data = clean_stock_data(w_open_stock_data)
w_high_stock_data = clean_stock_data(w_high_stock_data)
w_low_stock_data = clean_stock_data(w_low_stock_data)
w_close_stock_data = clean_stock_data(w_close_stock_data)

# Setup all of the indicators
rsi = get_rsi(w_adj_stock_data, window=window)
momentum = get_momentum(w_adj_stock_data, window=window)
ppo = get_ppo(w_adj_stock_data, window=window)
so = get_so(w_high_stock_data, w_low_stock_data, w_close_stock_data, window=window)
bb = get_bbp(w_adj_stock_data, window=window, std=std)
fibonacci = get_fibonacci(w_adj_stock_data, rp, window)
macd = get_macd(w_adj_stock_data, window // 2, window)

indicator_list = [rsi, momentum, ppo, so, bb, fibonacci, macd]

# Adjust index to desired date range
for index in range(len(indicator_list)):
    indicator_list[index].dropna(inplace=True)
    indicator_list[index] = indicator_list[index].reindex(train_adj_stock_data.index)

price_list = [train_adj_stock_data, open_stock_data, high_stock_data, low_stock_data, close_stock_data]
#print(indicator_list[0].shape)
#print(indicator_list[0].columns)
#print(indicator_list)
train_dataset = StockImageDataset(train_adj_stock_data, symbols, price_list, indicator_list, height, width)
#print(train_dataset.images.shape)

In [4]:
# Create Test Dataset
dates = pd.date_range(test_start_date, test_end_date)
mod_start_date = test_start_date - dt.timedelta(days=45)
mod_dates = pd.date_range(mod_start_date, test_end_date)

# Load the stock data for standard and windowed
adj_stock_data = get_stock_data(symbols, dates, col_name='Adj Close')
open_stock_data = get_stock_data(symbols, dates, col_name='Open')
high_stock_data = get_stock_data(symbols, dates, col_name='High')
low_stock_data = get_stock_data(symbols, dates, col_name='Low')
close_stock_data = get_stock_data(symbols, dates, col_name='Close')
w_adj_stock_data = get_stock_data(symbols, mod_dates, col_name='Adj Close')
w_open_stock_data = get_stock_data(symbols, mod_dates, col_name='Open')
w_high_stock_data = get_stock_data(symbols, mod_dates, col_name='High')
w_low_stock_data = get_stock_data(symbols, mod_dates, col_name='Low')
w_close_stock_data = get_stock_data(symbols, mod_dates, col_name='Close')

# Back fill and forward fill
test_adj_stock_data = clean_stock_data(adj_stock_data)
open_stock_data = clean_stock_data(open_stock_data)
high_stock_data = clean_stock_data(high_stock_data)
low_stock_data = clean_stock_data(low_stock_data)
close_stock_data = clean_stock_data(close_stock_data)
w_adj_stock_data = clean_stock_data(w_adj_stock_data)
w_open_stock_data = clean_stock_data(w_open_stock_data)
w_high_stock_data = clean_stock_data(w_high_stock_data)
w_low_stock_data = clean_stock_data(w_low_stock_data)
w_close_stock_data = clean_stock_data(w_close_stock_data)

# Setup all of the indicators
rsi = get_rsi(w_adj_stock_data, window=window)
momentum = get_momentum(w_adj_stock_data, window=window)
ppo = get_ppo(w_adj_stock_data, window=window)
so = get_so(w_high_stock_data, w_low_stock_data, w_close_stock_data, window=window)
bb = get_bbp(w_adj_stock_data, window=window, std=std)
fibonacci = get_fibonacci(w_adj_stock_data, rp, window)
macd = get_macd(w_adj_stock_data, window // 2, window)

indicator_list = [rsi, momentum, ppo, so, bb, fibonacci, macd]

# Adjust index to desired date range
for index in range(len(indicator_list)):
    indicator_list[index].dropna(inplace=True)
    indicator_list[index] = indicator_list[index].reindex(test_adj_stock_data.index)

price_list = [test_adj_stock_data, open_stock_data, high_stock_data, low_stock_data, close_stock_data]

test_dataset = StockImageDataset(test_adj_stock_data, symbols, price_list, indicator_list, height, width)

In [5]:
# Create Validation Dataset
dates = pd.date_range(val_start_date, val_end_date)
mod_start_date = val_start_date - dt.timedelta(days=45)
mod_dates = pd.date_range(mod_start_date, val_end_date)

# Load the stock data for standard and windowed
adj_stock_data = get_stock_data(symbols, dates, col_name='Adj Close')
open_stock_data = get_stock_data(symbols, dates, col_name='Open')
high_stock_data = get_stock_data(symbols, dates, col_name='High')
low_stock_data = get_stock_data(symbols, dates, col_name='Low')
close_stock_data = get_stock_data(symbols, dates, col_name='Close')
w_adj_stock_data = get_stock_data(symbols, mod_dates, col_name='Adj Close')
w_open_stock_data = get_stock_data(symbols, mod_dates, col_name='Open')
w_high_stock_data = get_stock_data(symbols, mod_dates, col_name='High')
w_low_stock_data = get_stock_data(symbols, mod_dates, col_name='Low')
w_close_stock_data = get_stock_data(symbols, mod_dates, col_name='Close')

# Back fill and forward fill
val_adj_stock_data = clean_stock_data(adj_stock_data)
open_stock_data = clean_stock_data(open_stock_data)
high_stock_data = clean_stock_data(high_stock_data)
low_stock_data = clean_stock_data(low_stock_data)
close_stock_data = clean_stock_data(close_stock_data)
w_adj_stock_data = clean_stock_data(w_adj_stock_data)
w_open_stock_data = clean_stock_data(w_open_stock_data)
w_high_stock_data = clean_stock_data(w_high_stock_data)
w_low_stock_data = clean_stock_data(w_low_stock_data)
w_close_stock_data = clean_stock_data(w_close_stock_data)

# Setup all of the indicators
rsi = get_rsi(w_adj_stock_data, window=window)
momentum = get_momentum(w_adj_stock_data, window=window)
ppo = get_ppo(w_adj_stock_data, window=window)
so = get_so(w_high_stock_data, w_low_stock_data, w_close_stock_data, window=window)
bb = get_bbp(w_adj_stock_data, window=window, std=std)
fibonacci = get_fibonacci(w_adj_stock_data, rp, window)
macd = get_macd(w_adj_stock_data, window // 2, window)

indicator_list = [rsi, momentum, ppo, so, bb, fibonacci, macd]

# Adjust index to desired date range
for index in range(len(indicator_list)):
    indicator_list[index].dropna(inplace=True)
    indicator_list[index] = indicator_list[index].reindex(val_adj_stock_data.index)

price_list = [val_adj_stock_data, open_stock_data, high_stock_data, low_stock_data, close_stock_data]

val_dataset = StockImageDataset(val_adj_stock_data, symbols, price_list, indicator_list, height, width)

In [6]:
# Convert to PyTorch Datasets
print(train_dataset.images.shape)
print(test_dataset.images.shape)
print(val_dataset.images.shape)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

train_filename = f"dataset/dqn_train_data.pkl"
test_filename = f"dataset/dqn_test_data.pkl"
val_filename = f"dataset/dqn_val_data.pkl"

train_stock_filename = f"dataset/train_stock.pkl"
test_stock_filename = f"dataset/test_stock.pkl"
val_stock_filename = f"dataset/val_stock.pkl"

with open(train_filename, 'wb') as file:
    pickle.dump(train_loader, file)
with open(test_filename, 'wb') as file:
    pickle.dump(test_loader, file)
with open(val_filename, 'wb') as file:
    pickle.dump(val_loader, file)
with open(train_stock_filename, 'wb') as file:
    pickle.dump(train_adj_stock_data, file)
with open(test_stock_filename, 'wb') as file:
    pickle.dump(test_adj_stock_data, file)
with open(val_stock_filename, 'wb') as file:
    pickle.dump(val_adj_stock_data, file)

torch.Size([22832, 12, 9, 9])
torch.Size([2752, 12, 9, 9])
torch.Size([2736, 12, 9, 9])
