In [1]:
import pandas as pd
import numpy as np
import helpers.functions as helpers

### Load Data

In [2]:
df = pd.read_csv("out.csv")
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2022-08-04 05:50:00,23780.0,23780.0,23198.0,23198.0,84.921
1,2022-08-04 05:51:00,23228.2,23900.0,23198.0,23900.0,84.941
2,2022-08-04 05:52:00,23852.2,23898.0,23198.0,23898.0,132.174
3,2022-08-04 05:53:00,23898.0,23900.0,23198.0,23228.2,74.257
4,2022-08-04 05:54:00,23520.4,23946.0,23198.0,23653.4,83.717


### Preprocess data

Here we take the data from source (in this case csv), then add/remove columns we need in order for the model to properly work

In [3]:
# Scale
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

## preprocess

# number of rows in the future
# in this case we want to predict 5 minutes into the future
FUTURE_PRICE_ROWS=1

data = helpers.preprocess_df(df, future_n_rows=FUTURE_PRICE_ROWS, scaler=scaler)
data.head()
data = data.to_numpy()

# Inverse scale
# print(data[0,:2])
# print(scaler.inverse_transform([data[0,:2]]))


### Split and load data

Split data and load them into dataloaders.

Dataloaders are a great way to abstract and handle data for training process, this way you don't have to work with matrices, but objects (dataloaders)

In [4]:
# split data
train_data, test_data = helpers.split_np_matrix(data, test_percent=5)
print(train_data.shape)
print(test_data.shape)


(9500, 3)
(500, 3)


In [5]:
## Dataloader
from helpers.dataloaders import StockDataloader
from torch.utils.data import DataLoader
train_dataset = StockDataloader(train_data)
test_dataset = StockDataloader(test_data)

BATCH_SIZE = 1

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)

### Set up model and training loop

Here we instantiate the model. In this case we use a simple RNN model, but we can do any sorts of combinations we want

We also set up the training process. This will be the same most of the time, so its easier to just to use a library for this to handle for us

Training data is uploaded to [wandb.ai](https://wandb.ai/). In there you can visualize the results

In [6]:
import models.recursive_nets as model

RNNModel = model.LSTM(2, 64, 60, 1)
X, Y = next(iter(train_dataloader))
RNNModel

LSTM(
  (rnn): LSTM(2, 64, batch_first=True)
  (linear): Linear(in_features=3840, out_features=1, bias=True)
)

In [7]:
from torchest.trainer import Trainer
import torch
from tqdm.notebook import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

class RNNTrainer(Trainer):
    def train(self, 
        data_train: DataLoader,
        data_dev: DataLoader | None = None,
        data_test: DataLoader | None = None,
        epochs: int = 100,
        early_stoping: float = 0.0
    ) -> None:
        self.model.to(device)
        with tqdm(range(epochs), unit="epoch") as t:
            for epoch in t:
                self.model.train()
                train_losses = list()


In [25]:
import torch.nn as nn
import torch.optim as optim
import wandb

# Training Loop
def train(model: nn.Module,
    data: DataLoader,
    optimizer: optim.Optimizer,
    loss_fn: nn.Module,
    epochs: int = 10) -> None:
    """
    Trains the RNN model for the specified number of epochs

    Inputs
    ------
    model: RNN model to train (should inherit from nn.Module)
    data: Iterable DataLoader
    epochs: Number of epochs to train the model
    optiimizer: Optimizer function to use for each epoch
    loss_fn: Loss function to use
    """
    train_losses = {}
    model.to(device)

    with tqdm(range(epochs), unit="epoch") as t:
            for epoch in t:
                model.train()
                epoch_losses = list()
                for X, Y in data:
                    # # skip batch if it doesnt match with the batch_size
                    # if X.shape[0] != model.batch_size:
                    #     continue

                    # send tensors to device
                    X, Y = X.to(device), Y.to(device)

                    # 2. clear gradients
                    model.zero_grad()

                    loss = 0
                    # Internal loop for RNN to train on each step of seq_length
                    in_vector = X
                    out = model(in_vector)
                    l = loss_fn(out, Y[:, -1])
                    # print(f'in: {in_vector}')
                    # print(f'{out}, res: {Y[:, c]}')
                    loss += l
                    # print(loss.detach().item() / X.shape[1])

                    # 4. Compte gradients gradients
                    loss.backward()

                    # 5. Adjust learnable parameters
                    # clip as well to avoid vanishing and exploding gradients
                    nn.utils.clip_grad_norm_(model.parameters(), 3)
                    optimizer.step()
                
                    epoch_losses.append(loss.detach().item() / X.shape[1])

                train_losses[epoch] = torch.tensor(epoch_losses).mean()
                print(f'=> epoch: {epoch + 1}, loss: {train_losses[epoch]}')
                # print(generate_text(model, data.dataset))

In [26]:
loss_function = nn.MSELoss()
optimizer = optim.Adam(RNNModel.parameters(), lr=1e-2)

train(RNNModel, train_dataloader, optimizer, loss_function, 20)

  0%|          | 0/20 [00:00<?, ?epoch/s]

tensor(2.0590, grad_fn=<MseLossBackward0>)
tensor(2.1346, grad_fn=<MseLossBackward0>)
tensor(4.2560, grad_fn=<MseLossBackward0>)
tensor(0.5145, grad_fn=<MseLossBackward0>)
tensor(0.7653, grad_fn=<MseLossBackward0>)
tensor(0.0139, grad_fn=<MseLossBackward0>)
tensor(2.6274, grad_fn=<MseLossBackward0>)
tensor(0.8232, grad_fn=<MseLossBackward0>)
tensor(1.2226, grad_fn=<MseLossBackward0>)
tensor(0.2685, grad_fn=<MseLossBackward0>)
tensor(0.0135, grad_fn=<MseLossBackward0>)
tensor(1.1600, grad_fn=<MseLossBackward0>)
tensor(3.5354, grad_fn=<MseLossBackward0>)
tensor(0.6457, grad_fn=<MseLossBackward0>)
tensor(4.4695, grad_fn=<MseLossBackward0>)
tensor(1.7715, grad_fn=<MseLossBackward0>)
tensor(1.3488, grad_fn=<MseLossBackward0>)
tensor(5.6651, grad_fn=<MseLossBackward0>)
tensor(5.1716, grad_fn=<MseLossBackward0>)
tensor(2.6560, grad_fn=<MseLossBackward0>)
tensor(1.0213, grad_fn=<MseLossBackward0>)
tensor(1.0522, grad_fn=<MseLossBackward0>)
tensor(0.1353, grad_fn=<MseLossBackward0>)
tensor(1.37

KeyboardInterrupt: 

### Save model