# N-BEATS Model Training Notebook
This notebook implements the N-BEATS architecture for ridership prediction


### 1. Import Required Libraries

In [26]:
import torch.nn as nn
from torch import optim
from typing import Tuple, Optional
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import pytorch_lightning as pl
import torch
import os

### 1. Custom module imports

In [29]:
from torch.utils.data import Dataset, DataLoader
from model.models.train_nbeats import NBeatsBlock, GenericBasis

### 2. Custom Data Loading and Feature Engineering Modules

In [5]:
class RidershipDataset(Dataset):
    """Custom Dataset for ridership data"""
    def __init__(self, features: np.ndarray, targets: np.ndarray):
        """
        Args:
            features: Input features array
            targets: Target ridership values
        """
        self.features = features
        self.targets = targets

    def __len__(self) -> int:
        return len(self.features)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        return (
            torch.tensor(self.features[idx], dtype=torch.float32),
            torch.tensor(self.targets[idx], dtype=torch.float32)
        )

def load_data(path: str) -> pd.DataFrame:
    """Load data from CSV and create datetime column"""
    df = pd.read_csv(path)
    df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
    return df.sort_values(['origin', 'destination', 'datetime'])

def add_features(df: pd.DataFrame) -> pd.DataFrame:
    """Add time-based features to dataframe"""
    df['hour_sin'] = np.sin(2 * np.pi * df['datetime'].dt.hour / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['datetime'].dt.hour / 24)
    df['day_of_week'] = df['datetime'].dt.weekday
    df['is_weekend'] = df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)

    # Add lag features (1 week)
    df['lag_1_week'] = df.groupby(['origin', 'destination'])['ridership'].shift(24 * 7)
    df['lag_1_week'].fillna(0, inplace=True)

    return df

def prepare_data(df: pd.DataFrame, test_size: float = 0.2) -> Tuple[DataLoader, DataLoader, MinMaxScaler]:
    """Prepare data loaders and feature scaler"""
    # Ensure no NaN values
    df = df.dropna()

    # Separate features and target
    feature_cols = ['hour_sin', 'hour_cos', 'lag_1_week', 'day_of_week', 'is_weekend']
    features = df[feature_cols].values
    target = df['ridership'].values.reshape(-1, 1)

    # Normalize features
    scaler = MinMaxScaler()
    features = scaler.fit_transform(features)

    # Split data
    X_train, X_val, y_train, y_val = train_test_split(
        features, target, test_size=test_size, random_state=42
    )

    # Create datasets and dataloaders
    train_dataset = RidershipDataset(X_train, y_train)
    val_dataset = RidershipDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

    return train_loader, val_loader, scaler

### 3. Define the Complete N-BEATS Model

In [7]:
class NBeats(pl.LightningModule):
    """Complete N-BEATS model implemented as PyTorch Lightning module"""
    def __init__(self,
                 input_size: int,
                 output_size: int = 1,
                 n_stacks: int = 30,
                 n_layers: int = 4,
                 layer_width: int = 512,
                 learning_rate: float = 1e-3):
        super().__init__()
        self.save_hyperparameters()
        self.learning_rate = learning_rate

        self.stacks = nn.ModuleList()
        for _ in range(n_stacks):
            block = NBeatsBlock(
                input_size=input_size,
                theta_size=input_size + output_size,
                basis_function=GenericBasis(input_size, output_size),
                n_layers=n_layers,
                layer_width=layer_width
            )
            self.stacks.append(block)

        self.final_layer = nn.Linear(n_stacks * output_size, output_size)
        self.loss_fn = nn.MSELoss()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        forecasts = []
        for stack in self.stacks:
            backcast, forecast = stack(x)
            forecasts.append(forecast)
            x = x - backcast
        forecast = torch.cat(forecasts, dim=1)
        return self.final_layer(forecast)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_fn(y_hat, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_fn(y_hat, y)
        self.log("val_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.learning_rate)

### 4. Training Function

In [21]:
def train_model(data_path="data/cleaned_data.csv",
                n_stacks: int = 30,
                n_layers: int = 4,
                layer_width: int = 512,
                learning_rate: float = 1e-3,
                max_epochs: int = 25,
                patience: int = 25) -> Tuple[NBeats, MinMaxScaler]:
    """Complete training pipeline"""
    # Load and prepare data
    print("Loading and preprocessing data...")
    df = load_data(data_path)
    df = add_features(df)

    # Visualize sample data
    print("\nSample training data:")
    display(df.head())

    train_loader, val_loader, scaler = prepare_data(df)

    # Initialize model
    input_size = train_loader.dataset[0][0].shape[0]
    print(f"\nInitializing model with input size: {input_size}")

    model = NBeats(
        input_size=input_size,
        n_stacks=n_stacks,
        n_layers=n_layers,
        layer_width=layer_width,
        learning_rate=learning_rate
    )

    # Configure trainer
    trainer = pl.Trainer(
        max_epochs=max_epochs,
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=patience, mode="min"),
            ModelCheckpoint(
                monitor="val_loss",
                dirpath="../saved_models/",
                filename="nbeats-best",
                save_top_k=1
            )
        ],
        logger=TensorBoardLogger("lightning_logs", name="nbeats"),
        accelerator="auto",
        devices="auto"
    )

    # Train model
    print("\nStarting training...")
    trainer.fit(model, train_loader, val_loader)

    return model, scaler

### 5. Execute Training

In [28]:
if __name__ == "__main__":
    # Configuration
    config = {
        "data_path": "../data/cleaned_data.csv",
        "n_stacks": 30,
        "n_layers": 4,
        "layer_width": 512,
        "learning_rate": 1e-3,
        "max_epochs": 50,
        "patience": 10,
        "enable_logging": False  # Set to False to skip TensorBoard
    }

    # Create output directory
    os.makedirs("../saved_models", exist_ok=True)

    try:
        # Handle TensorBoard dependencies
        if config["enable_logging"]:
            try:
                import tensorboard
                from pytorch_lightning.loggers import TensorBoardLogger

                logger = TensorBoardLogger("lightning_logs", name="nbeats")
                print("✅ TensorBoard logging enabled")
            except ImportError:
                print("⚠️ TensorBoard not available - install with:")
                print("!pip install tensorboard tensorboardX")
                config["enable_logging"] = False

        # Fix pandas warning
        pd.options.mode.chained_assignment = None  # Suppress the warning

        # Load and prepare data
        print("\n🔍 Loading and preprocessing data...")
        df = load_data(config["data_path"])
        df = add_features(df)

        # Show sample data without copy warnings
        with pd.option_context('mode.chained_assignment', None):
            print("\n📊 Sample training data:")
            display(df.head())

        # Prepare data loaders
        train_loader, val_loader, scaler = prepare_data(df)
        print(f"\n🧠 Initializing model with input size: {train_loader.dataset[0][0].shape[0]}")

        # Initialize model
        model = NBeats(
            input_size=train_loader.dataset[0][0].shape[0],
            n_stacks=config["n_stacks"],
            n_layers=config["n_layers"],
            layer_width=config["layer_width"],
            learning_rate=config["learning_rate"]
        )

        # Configure trainer
        trainer = pl.Trainer(
            max_epochs=config["max_epochs"],
            callbacks=[
                EarlyStopping(monitor="val_loss", patience=config["patience"], mode="min"),
                ModelCheckpoint(
                    monitor="val_loss",
                    dirpath="../saved_models/",
                    filename="nbeats-best",
                    save_top_k=1
                )
            ],
            logger=logger if config["enable_logging"] else False,
            enable_progress_bar=True,
            accelerator="auto",
            devices="auto"
        )

        # Train model
        print("\n🚀 Starting training...")
        trainer.fit(model, train_loader, val_loader)

        # Save additional artifacts
        torch.save(model.state_dict(), "../saved_models/nbeats-weights.pt")
        torch.save(scaler, "../saved_models/nbeats-scaler.pt")

        print("\n✅ Training completed successfully!")
        print("Saved artifacts:")
        print(f"  • Model weights: ../saved_models/nbeats-weights.pt")
        print(f"  • Feature scaler: ../saved_models/nbeats-scaler.pt")
        print(f"  • Full checkpoint: ../saved_models/nbeats-best.ckpt")

    except Exception as e:
        print("\n❌ Training failed!")
        print(f"Error: {str(e)}")
        raise e


🔍 Loading and preprocessing data...

📊 Sample training data:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['lag_1_week'].fillna(0, inplace=True)


Unnamed: 0,date,time,origin,destination,ridership,day_of_week,is_weekend,is_holiday,datetime,hour_sin,hour_cos,lag_1_week
21554,2025-01-03,22:00,Abdullah Hukum,Abdullah Hukum,2,4,0,0,2025-01-03 22:00:00,-0.5,0.866025,0.0
30734,2025-01-05,14:00,Abdullah Hukum,Abdullah Hukum,1,6,1,0,2025-01-05 14:00:00,-0.5,-0.866025,0.0
40019,2025-01-06,19:00,Abdullah Hukum,Abdullah Hukum,2,0,0,0,2025-01-06 19:00:00,-0.965926,0.258819,0.0
45529,2025-01-07,16:00,Abdullah Hukum,Abdullah Hukum,1,1,0,0,2025-01-07 16:00:00,-0.866025,-0.5,0.0
46881,2025-01-07,19:00,Abdullah Hukum,Abdullah Hukum,1,1,0,0,2025-01-07 19:00:00,-0.965926,0.258819,0.0



🧠 Initializing model with input size: 5


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/homebrew/Caskroom/miniconda/base/envs/ML-Assignment/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/ahanbilbo/Documents/University/University Malaya/Semester 2/Courses/ML/Assignment/ML-Assignment/model/saved_models exists and is not empty.

  | Name        | Type       | Params | Mode 
---------------------------------------------------
0 | stacks      | ModuleList | 23.8 M | train
1 | final_layer | Linear     | 31     | train
2 | loss_fn     | MSELoss    | 0      | train
---------------------------------------------------
23.8 M    Trainable params
0         Non-trainable params
23.8 M    Total params
95.294    Total estimated model params size (MB)
363       Modules in train mode
0         Modules in eval mode



🚀 Starting training...
Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/opt/homebrew/Caskroom/miniconda/base/envs/ML-Assignment/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


                                                                           

/opt/homebrew/Caskroom/miniconda/base/envs/ML-Assignment/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0:   3%|▎         | 322/9760 [00:19<09:19, 16.86it/s, train_loss=70.20]  


Detected KeyboardInterrupt, attempting graceful shutdown ...



❌ Training failed!
Error: name 'exit' is not defined


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x103db1f10>>
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/envs/ML-Assignment/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 

KeyboardInterrupt

