# Workflow for PyTorch Model Construction

### Tensor Attributes

- shape, dtype, device


In [None]:
sample_tensor = torch.rand(3, 4)
print(sample_tensor, [sample_tensor.shape, sample_tensor.dtype, sample_tensor.device])

- numpy default arrays -> int64, float64
- torch default tensors -> float32
- tensor = torch.from_numpy(array).type(torch.float32)
- array = tensor.numpy().astype("float64")

In [None]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy() # .astype("float64")
tensor.dtype, numpy_tensor.dtype

### Device-agnostic code
https://pytorch.org/docs/main/notes/cuda.html#device-agnostic-code

In [None]:
import argparse
import torch

parser = argparse.ArgumentParser(description='PyTorch Example')
parser.add_argument('--disable-cuda', action='store_true',
                    help='Disable CUDA')
args = parser.parse_args()
args.device = None
if not args.disable_cuda and torch.cuda.is_available():
    args.device = torch.device('cuda')
else:
    args.device = torch.device('cpu')

### Data

- EDA
- Preprocessing
- Train/Validation/Test Split
- Visualize

## Model

### Model Setup

In [None]:
import torch
from torchinfo import summary
import torch.nn as nn
import torch.utils.data as data
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

hp = {
    # Model Architecture Parameters
    "input_size": 1, # Number of features
    "hidden_size": 64, # Number of perceptron
    "num_layers": 2,
    "output_dim": 1,
    "dropout": 0.2,
    "h0": None,
    "c0": None,
    # Pre-Training Parameters
    "loss": nn.MSELoss(),
    "optimizer": "Adam",
    "learning_rate": 0.001,
    # Training Parameters
    "batch_size": 20,
    "num_epochs": 15,
}


class LSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=hp["input_size"], hidden_size=hp["hidden_size"], num_layers=hp["num_layers"],
                            batch_first=True)
        self.linear = nn.Linear(in_features=hp["hidden_size"], out_features=hp["output_dim"])

    def forward(self, x, h0=None, c0=None):
        if h0 is None or c0 is None:
            h0 = torch.zeros(hp["num_layers"], x.size(0), hp["hidden_size"]).to(device)
            c0 = torch.zeros(hp["num_layers"], x.size(0), hp["hidden_size"]).to(device)
        output, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.linear(output[:, -1, :])
        return out, hn, cn

model = LSTM().to(device)
summary(model)

### Train & Evaluate Model

In [None]:
loss_fn = hp["loss"]
optimizer = torch.optim.Adam(model.parameters(), lr=hp["learning_rate"])

In [None]:
def validate(model, val_loader, loss_fn):
    model.eval()
    total_loss = 0
    all_preds = []
    all_targets = []
    with torch.inference_mode():
        for x_batch, y_batch in val_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            preds, _, _ = model(x_batch)
            loss = loss_fn(preds, y_batch)
            total_loss += loss.item()
            all_preds.append(preds.cpu().detach().numpy())
            all_targets.append(y_batch.cpu().detach().numpy())

    avg_loss = total_loss / len(val_loader)
    all_preds = np.concatenate(all_preds)
    all_targets = np.concatenate(all_targets)
    rmse = np.sqrt(np.mean((all_targets - all_preds) ** 2))
    mae = np.mean(np.abs(all_targets - all_preds))
    return avg_loss, rmse, mae

def plot_loss_curves(train_losses, val_losses):
    plt.figure(figsize=(8, 6))
    plt.plot(train_losses, label="Train Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss Curves Over Epochs")
    plt.legend()
    plt.show()


def train():
    best_val_loss = float('inf')
    train_losses = []
    val_losses = []
    for epoch in range(1, hp["num_epochs"] + 1):
        model.train()
        running_loss = 0

        with tqdm(train_loader, desc=f"Epoch {epoch}", unit="batch") as t:
            for x_batch, y_batch in t:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                y_pred, h0, c0 = model(x_batch, h0=hp["h0"], c0=hp["c0"])
                loss = loss_fn(y_pred, y_batch)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                h0 = h0.detach()
                c0 = c0.detach()

                running_loss += loss.item()
                t.set_postfix(loss=loss.item())

        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        # Run validation and record loss
        val_loss, val_rmse, val_mae = validate(model, val_loader, loss_fn)
        val_losses.append(val_loss)

        print(f"Epoch {epoch} Train Loss: {train_loss:.5f}")
        print(f"Epoch {epoch} Validation Loss: {val_loss:.5f}, RMSE: {val_rmse:.5f}, MAE: {val_mae:.5f}")

        # Save the best model based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "models/best_model.pth")
            print("Model saved")

    # After training, plot loss curves
    plot_loss_curves(train_losses, val_losses)
def test():
    # Load the best model
    model.load_state_dict(torch.load("models/best_model.pth"))
    model.eval()

    y_pred_list = []
    with torch.inference_mode():
        for batch in test_loader:
            x = batch[0].to(device)
            outputs, _, _ = model(x)
            y_pred_list.append(outputs)

    # Concatenate all predictions and obtain the final prediction tensor
    y_pred = torch.cat(y_pred_list, dim=0)

    # The true values correspond to the values following the sliding window in the raw array.
    # Since X_test was created as: [raw[i-window_size:i, 0] for i in range(window_size, raw.shape[0])],
    # the true values are raw[window_size:].
    window_size = 50
    y_true_np = raw[window_size:]  # raw is expected to be defined in the global scope as the scaled array
    y_true = torch.from_numpy(y_true_np).type(torch.float32).to(device)

    return y_pred, y_true

def plot_results():
    # Get predictions and true values from the test function
    y_pred, y_true = test()

    # Convert tensors to numpy arrays and flatten them
    y_pred_np = y_pred.cpu().detach().numpy().flatten().reshape(-1, 1)
    y_true_np = y_true.cpu().detach().numpy().flatten().reshape(-1, 1)

    # Use the scaler stored in the DataPreparation instance to inverse transform
    y_pred_original = df_instance.scaler.inverse_transform(y_pred_np).flatten()
    y_true_original = df_instance.scaler.inverse_transform(y_true_np).flatten()

    # Prepare DataFrame and plot with seaborn
    df_plot = pd.DataFrame({
        "Index": range(len(y_true_original)),
        "True Value": y_true_original,
        "Predicted Value": y_pred_original
    })

    plt.figure(figsize=(10, 6))
    sns.lineplot(x="Index", y="True Value", data=df_plot, label="True")
    sns.lineplot(x="Index", y="Predicted Value", data=df_plot, label="Predicted")
    plt.xlabel("Sample")
    plt.ylabel("Close Price")
    plt.title("LSTM Predictions vs Original True Values")
    plt.legend()
    plt.show()


## Inference

In [None]:
# 1. Set the model in evaluation mode
model_0.eval()

# 2. Setup the inference mode context manager
with torch.inference_mode():
  # 3. Make sure the calculations are done with the model and data on the same device
  # in our case, we haven't setup device-agnostic code yet so our data and model are
  # on the CPU by default.
  # model_0.to(device)
  # X_test = X_test.to(device)
  y_preds = model_0(X_test)
y_preds
plot_predictions(predictions=y_preds)

## Save/Load Model

In [None]:
from pathlib import Path

# 1. Create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path
MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(), # only saving the state_dict() only saves the models learned parameters
           f=MODEL_SAVE_PATH)
!ls -l models/01_pytorch_workflow_model_0.pth

