In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
from configs import TIMEZONE, LOG_FILE_NAME, set_logger
from datetime import datetime, timedelta
import pytz
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from alpaca.trading.client import TradingClient
from alpaca.trading.requests import MarketOrderRequest
from alpaca.trading.enums import OrderSide, TimeInForce
from alpaca.trading.requests import GetAssetsRequest
from alpaca.data.historical import StockHistoricalDataClient, CryptoHistoricalDataClient
from alpaca.data.requests import StockLatestQuoteRequest, StockBarsRequest, CryptoLatestQuoteRequest
from alpaca.data.requests import CryptoBarsRequest
from alpaca.trading.models import Order
from alpaca.data.timeframe import TimeFrame
from my_secrets import ALPACA_API_BASE_URL, PAPER_API_ID, PAPER_SECRET_KEY
import logging
import plotly.express as px
import plotly.graph_objects as go
set_logger()

In [3]:
import torch
import torch.nn as nn

In [4]:
from Trade_Class import Stock_Trader, Crypto_Trader
from Market_Monitor import Market_Monitor
from ALGO_crossover import bars_df_filter_dates, add_sma_columns, add_sma_crossovers

In [5]:
stock_trader = Stock_Trader(PAPER_API_ID, PAPER_SECRET_KEY, paper=True)
monitor = Market_Monitor(stock_trader.trading_client, TIMEZONE)

In [6]:
start = datetime(year=2015, month=1, day=1, hour=0, minute=0, second=0)
end = datetime(year=2023, month=2, day=1, hour=0, minute=0, second=0)
bars_df = stock_trader.get_bars('AAPL', start=start, end=end, time_resolution='day')
bars_df.reset_index(inplace=True)
bars_df.sort_values(by=['timestamp'], ascending=True, inplace=True)
print(bars_df.shape)
display(bars_df.head())

(1804, 9)


Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap
0,AAPL,2015-12-01 05:00:00+00:00,118.75,118.81,116.86,117.34,34852374.0,187129.0,117.75676
1,AAPL,2015-12-02 05:00:00+00:00,117.05,118.11,116.08,116.28,33385643.0,180616.0,117.151198
2,AAPL,2015-12-03 05:00:00+00:00,116.55,116.79,114.22,115.2,41560785.0,245330.0,115.434888
3,AAPL,2015-12-04 05:00:00+00:00,115.29,119.25,115.11,119.03,57776977.0,307788.0,118.18729
4,AAPL,2015-12-07 05:00:00+00:00,118.98,119.86,117.81,118.28,32080754.0,190809.0,118.509111


In [7]:
def prep_RNN_data(bars_df, col='close', test_pct=0.25, scaler=None):
    array = bars_df[col].values
    if scaler!=None:
        array = scaler.fit_transform(array.reshape(-1, 1))
    train_size = len(array) - int(len(array)*test_pct)
    train_array = array[:train_size]
    test_array = array[train_size:]
    return train_array, test_array
    

In [8]:
SEQUENCE_LENGTH = 50
NUM_EXAMPLES = 949
NUM_LAYERS = 2
HIDDEN_SIZE = 256
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
TEST_PCT = 0.25
BATCH_SIZE = 32
EPOCHS = 100

In [9]:
train_array, test_array = prep_RNN_data(bars_df, col='close', test_pct=TEST_PCT, scaler=MinMaxScaler())
len(train_array), len(test_array)

(1353, 451)

In [10]:
class RNNDataset(torch.utils.data.Dataset):
    def __init__(self, data, seq_len):
        """
        Takes in `data` which is a numpy array and `seq_len` which is an int.
        """
        self.data = data
        self.seq_len = seq_len
        self.data = torch.from_numpy(data).float()

    def __len__(self):
        return (len(self.data) - self.seq_len) - 1
    def __getitem__(self, index):
        return self.data[index : index+self.seq_len], self.data[index+self.seq_len]

In [11]:
train_dataset = RNNDataset(train_array, SEQUENCE_LENGTH)
test_dataset = RNNDataset(test_array, SEQUENCE_LENGTH)

len(train_dataset), len(test_dataset)

(1302, 400)

In [12]:
train_loader = torch.utils.data.DataLoader(train_dataset, BATCH_SIZE, drop_last=True, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, BATCH_SIZE, drop_last=True)

In [49]:
x, y = train_dataset[0]
x.shape, y.shape

(torch.Size([50, 1]), torch.Size([1]))

In [50]:
x = x.unsqueeze(0)
x.shape #creating a "batch" of 1

torch.Size([1, 50, 1])

Here we have `torch.Size([1, 50, 1])`. <br>We will set `batch_first=True` in the `nn.RNN` such that we have the data provided as (*batch*,*seq*,*feature*).<br>
So in our case we have *batch* size 1, *seq* size 50, *feature* size 1. The `input_size` to the RNN should be the same as *feature* size.

In [51]:
rnn = nn.RNN(input_size=1, hidden_size=200, num_layers=1, batch_first=True)
fc = nn.Linear(200, 1)

In [52]:
rnn_out, hn = rnn(x)

In [55]:
rnn_out.shape

torch.Size([1, 50, 200])

In [54]:
hn.shape

torch.Size([1, 1, 200])

In [45]:
class LSTM_model(nn.Module):
    def __init__(self, in_size, hidden_size, num_layers, batch_size, device):
        super(LSTM_model, self).__init__()
        self.in_size = in_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.device = device
        self.lstm = nn.LSTM(input_size=self.in_size, hidden_size=self.hidden_size, num_layers=self.num_layers)
        self.fc = nn.Linear(self.hidden_size, 1)

    def forward(self, x, hn, cn):
        out, (hn, cn) = self.lstm(x, (hn, cn))
        final_out = self.fc(out[-1])
        return final_out, hn, cn
    
    def predict(self, x):
        hn, cn = self.init()
        final_out = self.fc(out[-1])
        return final_out
    
    def init(self):
        h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(self.device)
        c0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(self.device)
        return h0.to(self.device), c0.to(self.device)

In [46]:
class RNN(nn.Module):
    def __init__(self, in_size, num_layers, hidden_size, device):
        super(RNN, self).__init__()
        self.in_size= in_size
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.device = device

        self.rnn = nn.RNN(input_size=self.in_size, hidden_size=self.hidden_size, num_layers=self.num_layers)
        self.fc = nn.Linear(self.hidden_size*self.in_size, 1)
        

    def forward(self, x):
        #assert x.shape == (self.in_size,)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        return out


        





In [52]:
model = LSTM_model(1, HIDDEN_SIZE, NUM_LAYERS, BATCH_SIZE, DEVICE) #so I think 1 gets passed as the input size which would be like the number of features. 
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.to(DEVICE)

LSTM_model(
  (lstm): LSTM(1, 256, num_layers=2)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [56]:
def pytorch_fit(model, train_loader, optimizer, loss_fn):
    hn, cn = model.init()
    model.train()
    for i, batch in enumerate(train_loader):
        x, y = batch
        x = x.to(DEVICE)
        y = y.to(DEVICE)
        out, hn, cn = model(x.reshape(SEQUENCE_LENGTH, BATCH_SIZE, 1), hn, cn)
        loss = loss_fn(out.reshape(BATCH_SIZE), y)
        hn = hn.detach()
        cn = cn.detach()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i == len(train_loader)-1:
            print(f"Train Loss({i}): {loss:.4f}")


In [57]:
def pytorch_test(model, test_loader, loss_fn):
    hn, cn = model.init()
    model.eval()
    for i, batch in enumerate(test_loader):
        x, y = batch
        x = x.to(DEVICE)
        y = y.to(DEVICE)
        out, hn, cn = model(x.reshape(SEQUENCE_LENGTH, BATCH_SIZE, 1), hn, cn)
        loss = loss_fn(out.reshape(BATCH_SIZE), y)
        if i == len(test_loader)-1:
            print(f"Test Loss({i}): {loss:.4f}")

In [59]:
for epoch in range(EPOCHS):
    print(f"Epoch: {epoch}")
    pytorch_fit(model, train_loader, optimizer, loss_fn)
    pytorch_test(model, test_loader, loss_fn)

Epoch: 0


  return F.l1_loss(input, target, reduction=self.reduction)


Train Loss(39): 0.1207
Test Loss(11): 0.0212
Epoch: 1
Train Loss(39): 0.1570
Test Loss(11): 0.0433
Epoch: 2
Train Loss(39): 0.1148
Test Loss(11): 0.0910
Epoch: 3
Train Loss(39): 0.1298
Test Loss(11): 0.0659
Epoch: 4
Train Loss(39): 0.1282
Test Loss(11): 0.0349
Epoch: 5
Train Loss(39): 0.1307
Test Loss(11): 0.1361
Epoch: 6
Train Loss(39): 0.2266
Test Loss(11): 0.1831
Epoch: 7
Train Loss(39): 0.1514
Test Loss(11): 0.0533
Epoch: 8
Train Loss(39): 0.1263
Test Loss(11): 0.0522
Epoch: 9
Train Loss(39): 0.1461
Test Loss(11): 0.0225
Epoch: 10
Train Loss(39): 0.2020
Test Loss(11): 0.2466
Epoch: 11
Train Loss(39): 0.1940
Test Loss(11): 0.0795
Epoch: 12
Train Loss(39): 0.1606
Test Loss(11): 0.1267
Epoch: 13
Train Loss(39): 0.1962
Test Loss(11): 0.0338
Epoch: 14
Train Loss(39): 0.2705
Test Loss(11): 0.1215
Epoch: 15
Train Loss(39): 0.1646
Test Loss(11): 0.0396
Epoch: 16
Train Loss(39): 0.1682
Test Loss(11): 0.1207
Epoch: 17
Train Loss(39): 0.2259
Test Loss(11): 0.1391
Epoch: 18
Train Loss(39): 0.1