In [None]:
import torch
import time
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
from stock_analyzer import (csv_loader, data_ext)
from stock_analyzer.models import StockLSTM

raw_data = csv_loader.load_symbol('./data/raw/', 'TSLA', '1min', '2020')

In [None]:
from torch.utils.data import (TensorDataset, DataLoader)
def create_dataloader(data, lookback, bs):
    x = [data[i:i+lookback, :] for i in range(len(data)-lookback)]
    y = [[data[i, 0]] for i in range(lookback, len(data))]
    ds = TensorDataset(torch.as_tensor(x).float().to(device), torch.as_tensor(y).float().to(device))
    loader = DataLoader(ds, shuffle=True, batch_size=bs)
    return loader

In [None]:
from sklearn.preprocessing import MinMaxScaler
train_scaler = MinMaxScaler()
val_scaler = MinMaxScaler()
test_scaler = MinMaxScaler()

seq_length = 20
batch_size = 512

train_count, val_count, test_count = data_ext.split_counts(raw_data.shape[0], seq_length, train_per=0.7, val_per=0.2)

# 'close' is required and must be on index 0, everything else is optional
features = ['close', 'volume']
train_data = train_scaler.fit_transform(raw_data[features].values[:train_count+1])
val_data = val_scaler.fit_transform(raw_data[features].values[train_count:train_count+val_count+1])
test_data = test_scaler.fit_transform(raw_data[features].values[train_count+val_count:train_count+val_count+test_count+1])

train_loader = create_dataloader(train_data, seq_length, batch_size)
val_loader = create_dataloader(val_data, seq_length, batch_size)
test_loader = create_dataloader(test_data, seq_length, batch_size)

In [None]:
import os
import numpy as np

ckpt_dir = 'stock'
min_valid_loss = float('inf')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)
    
num_epochs = 5000
learning_rate = 0.005
print_every = 100

input_size = len(features) #number of features
hidden_size = 2 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 1 #number of output classes 
lstm1 = StockLSTM.StockLSTM(num_classes, input_size, hidden_size, num_layers, seq_length, device) #our lstm class 
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)
lstm1.to(device)
lstm1.train()
for epoch in range(1, num_epochs+1):
    train_losses = []
    val_losses = []
    for x_tensor, y_tensor in train_loader:
        out = lstm1.forward(x_tensor)
        optimizer.zero_grad()
        loss = criterion(out, y_tensor)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    lstm1.eval()
    for x_tensor, y_tensor in val_loader:
        prediction = lstm1.forward(x_tensor)
        val_losses.append(criterion(prediction, y_tensor).item())
        
    # Checkpointing logic
    if np.mean(val_losses) < min_valid_loss:
        min_valid_loss = np.mean(val_losses)
        torch.save(lstm1.state_dict(), ckpt_dir + '/stocklearner_e'+str(int(epoch/1000))+'xxx_lr'+str(learning_rate)+'.pth')
        print("Saving epoch: %d, Training Loss : %1.20f,  Validation Loss: %1.20f ... saving checkpoint %s" % (epoch, np.mean(train_losses), np.mean(val_losses), str(int(epoch/1000))+'xxx'))

    elif epoch % print_every == 0:
        print("Saving epoch: %d, Training Loss : %1.20f,  Validation Loss: %1.20f" % (epoch, np.mean(train_losses), np.mean(val_losses)))
        
    lstm1.train()
