In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import os


def is_colab():
    try:
        import google.colab

        return True
    except ImportError:
        return False


COLAB_ROOT_PATH = "/content/drive/MyDrive/final_project"

if is_colab():
    from google.colab import drive
    import sys

    drive.mount("/content/drive")
    if os.path.exists(COLAB_ROOT_PATH):
        os.chdir(COLAB_ROOT_PATH)
        sys.path.append(COLAB_ROOT_PATH)
    else:
        print(f"{COLAB_ROOT_PATH} is not exist")

    print("Running on Google Colab")
else:
    print("Not running on Google Colab")


!pip install -r requirements.txt

Mounted at /content/drive
Running on Google Colab
Collecting torchmetrics (from -r requirements.txt (line 9))
  Downloading torchmetrics-1.7.3-py3-none-any.whl.metadata (21 kB)
Collecting tensorboardX (from -r requirements.txt (line 13))
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics->-r requirements.txt (line 9))
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics->-r requirements.txt (line 9))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics->-r requirements.txt (line 9))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->torchmetrics->-r requirements.txt (line 9))
  Down

# Final Project Problem 2

![Image](./image.png)

In this jupyter notebook, we train a `neural network-based model` on the time series dataset from Problem 1.  
The conditions are as follows:

- Must use the same feature engineering method as the final model in problem 1 (since our goal is to compare ensemble methods vs neural networks on the same dataset)
- Use pytorch freely to build the model, as used in assignments #3 and #4
- Specifically:

  1. Use `problem_2.modeling.PandasDataset` to wrap the pandas dataset into a torch Dataset
  2. Build your model freely in `problem_2.modeling.load_model`
     - Input shape must match the features of final_train_dataset
     - Feel free to use anything including MLP or transformers
  3. Define optimizer and scheduler in `problem_2.modeling.load_optimizer` and `problem_2.modeling.load_scheduler`
     - Can use any optimizer defined in torch (ref: [link](https://docs.pytorch.org/docs/stable/optim.html#algorithms))
     - Can use any scheduler provided by torch (ref: [link](https://docs.pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate))
  4. Write training code in `problem_2.modeling.train`
     - Can write internal code freely but **must not modify the args**:

  ```python
  def train(
    model: torch.nn.Module,
    optimizer: torch.optim.Optimizer,
    scheduler: torch.optim.lr_scheduler.LRScheduler,
    train_loader: torch.utils.data.DataLoader,
    val_loader: torch.utils.data.DataLoader,
    device: Optional[torch.device] = None,
    ) -> torch.nn.Module:
    ....
  ```

  5. Model evaluation will be done using the `validate` function below


In [3]:
"""
The code below evaluates and saves the trained model.
Do not modify under any circumstances.
"""
import json
from typing import Callable, Optional, Tuple
import numpy as np
from torchmetrics.regression import MeanSquaredError
import torch
from pathlib import Path

from problem_2.modeling import get_device


def validate(
    model: torch.nn.Module,
    val_loader: torch.utils.data.DataLoader,
    invese_y: Callable[[np.ndarray], np.ndarray],
    device: Optional[torch.device] = None,
):
    device = device or get_device()
    mse = MeanSquaredError()
    model.to(device)
    model.eval()
    with torch.no_grad():
        for batch in val_loader:
            x, y = batch
            x = x.to(device)
            y = y.to(device)
            pred_y = model(x)

            invese_y_pred = torch.tensor(invese_y(pred_y.cpu().numpy().ravel()))
            invese_y_true = torch.tensor(invese_y(y.cpu().numpy().ravel()))
            mse.update(invese_y_pred, invese_y_true)

    return mse.compute()


def save_model(
    model: torch.nn.Module,
    optim: torch.optim.Optimizer,
    scheduler: torch.optim.lr_scheduler.LRScheduler,
    test_loader: torch.utils.data.DataLoader,
    train_loader: torch.utils.data.DataLoader,
    invese_y: Callable[[np.ndarray], np.ndarray],
    dst_path: str = "problem_2_cache",
    model_name: str = "trained_model",
) -> str:
    train_performance = validate(model, train_loader, invese_y)
    train_batch_size = train_loader.batch_size
    print(f"train performance: {train_performance}")
    test_performance = validate(model, test_loader, invese_y)
    test_batch_size = test_loader.batch_size
    print(f"test performance: {test_performance}")

    dst_path = (
        Path(dst_path)
        / f"{model_name}_tr_{train_performance:.4f}_te_{test_performance:.4f}_bs_{train_batch_size}_{test_batch_size}"
    )
    if dst_path.exists():
        cnt_exist = len(list(dst_path.parent.glob(f"{model_name}*")))
        version = cnt_exist + 1
        dst_path = dst_path.parent / f"{model_name}_{version}"
    dst_path.mkdir(parents=True, exist_ok=True)
    print(f"saving model to {dst_path}")

    torch.save(model.state_dict(), dst_path / "model.pth")
    torch.save(optim.state_dict(), dst_path / "optim.pth")
    torch.save(scheduler.state_dict(), dst_path / "scheduler.pth")
    with open(dst_path / "performance.json", "w") as f:
        json.dump(
            {
                "tr_performance": train_performance.item(),
                "te_performance": test_performance.item(),
            },
            f,
        )
    return dst_path


def load_trained_model(
    model_path: str,
    load_model: Callable[..., torch.nn.Module],
    load_optimizer: Callable[..., torch.optim.Optimizer],
    load_scheduler: Callable[..., torch.optim.lr_scheduler.LRScheduler],
) -> Tuple[
    torch.nn.Module,
    torch.optim.Optimizer,
    torch.optim.lr_scheduler.LRScheduler,
    float,
    float,
]:
    path_pth = Path(model_path)
    model_path = path_pth / "model.pth"
    optim_path = path_pth / "optim.pth"
    scheduler_path = path_pth / "scheduler.pth"
    performance_path = path_pth / "performance.json"
    with open(performance_path, "r") as f:
        performance = json.load(f)
    tr_performance = performance["tr_performance"]
    te_performance = performance["te_performance"]
    model = load_model()
    model.load_state_dict(torch.load(model_path, weights_only=True))
    optim = load_optimizer(model)
    optim.load_state_dict(torch.load(optim_path, weights_only=True))
    scheduler = load_scheduler(optim)
    scheduler.load_state_dict(torch.load(scheduler_path, weights_only=True))
    return model, optim, scheduler, tr_performance, te_performance


In [4]:
from feature_engineering import final_feature_engineering
import pandas as pd

training_dataset = pd.read_csv("ml_2025_final_project_training_dataset.csv")
test_dataset = pd.read_csv("ml_2025_final_project_test_dataset.csv")


final_train_dataset, final_test_dataset, final_inverse_y = final_feature_engineering(
    training_dataset, test_dataset
)

In [5]:
from problem_2.modeling import PandasDataset
from torch.utils.data import DataLoader

train_batch_size = 128  # Feel free to modify
test_batch_size = 128  # Feel free to modify

training_dataset = PandasDataset(final_train_dataset, "y")
test_dataset = PandasDataset(final_test_dataset, "y")
training_loader = DataLoader(
    training_dataset, batch_size=train_batch_size, shuffle=True
)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)
sample_batch = next(iter(training_loader))
x, y = sample_batch
print(f"x.shape: {x.shape}")
print(f"y.shape: {y.shape}")


x.shape: torch.Size([128, 16])
y.shape: torch.Size([128])


## 1. Model Setup

---

- Please freely write your model in problem_2/modeling.py's `load_model`.
- Refer to sample_load_model and sampe_type_2_load_model in the same file.


In [22]:
from problem_2.modeling import sample_load_model, load_model

# example of loading model
# model = sample_load_model()
# please load your model here
model = load_model()
print(model)

ETSformerWrapper(
  (actual_model): ETSformer(
    (embedding): Linear(in_features=16, out_features=64, bias=True)
    (layers): ModuleList(
      (0-1): 2 x ETSLayer(
        (smooth): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): ReLU()
          (2): Linear(in_features=64, out_features=64, bias=True)
        )
        (growth): Linear(in_features=64, out_features=64, bias=True)
        (fourier): FourierLayer()
        (seasonal): Linear(in_features=20, out_features=64, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (decoder): Linear(in_features=6400, out_features=1, bias=True)
  )
)


## 2. Optimizer and Scheduler

---

- Please freely write your model in problem_2/modeling.py's `load_optimizer` and `load_optimizer`
- Refer to `sample_load_optimizer` and `sample_load_scheduler` in the same file.


In [23]:
from problem_2.modeling import (
    sample_load_optimizer,
    sample_load_scheduler,
    load_optimizer,
    load_scheduler,
)

# example of loading optimizer and scheduler
# optim = sample_load_optimizer(model=model)
# please load your optimizer here
optim = load_optimizer(model=model)
print(optim)

# lr_scheduler = sample_load_scheduler(optimizer=optim)
# please load your scheduler here
lr_scheduler = load_scheduler(optimizer=optim)
print(lr_scheduler)

AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)
<torch.optim.lr_scheduler.CosineAnnealingLR object at 0x7cb3b3a06550>


## 3. Training

---

- Please freely write your model in problem_2/modeling.py's `train`
- Refer to `sample_train` in the same file.


In [25]:
from problem_2.modeling import sample_train, train

# example of training
# trained_model = sample_train(
#     model=model,
#     optimizer=optim,
#     scheduler=lr_scheduler,
#     train_loader=training_loader,
#     val_loader=test_loader,
# )
# please train your model here
trained_model = train(
    model=model,
    optimizer=optim,
    scheduler=lr_scheduler,
    train_loader=training_loader,
    val_loader=test_loader,
)

--- Training Started ---
  Epoch 1/20 | Processing batch 50/391
  Epoch 1/20 | Processing batch 100/391
  Epoch 1/20 | Processing batch 150/391
  Epoch 1/20 | Processing batch 200/391
  Epoch 1/20 | Processing batch 250/391
  Epoch 1/20 | Processing batch 300/391
  Epoch 1/20 | Processing batch 350/391
Epoch 1/20 | Train Loss: 1.8466 | Val Loss: 0.7659
  -> Found new best model with Val Loss: 0.7659
  Epoch 2/20 | Processing batch 50/391
  Epoch 2/20 | Processing batch 100/391
  Epoch 2/20 | Processing batch 150/391
  Epoch 2/20 | Processing batch 200/391
  Epoch 2/20 | Processing batch 250/391
  Epoch 2/20 | Processing batch 300/391
  Epoch 2/20 | Processing batch 350/391
Epoch 2/20 | Train Loss: 1.6307 | Val Loss: 0.2058
  -> Found new best model with Val Loss: 0.2058
  Epoch 3/20 | Processing batch 50/391
  Epoch 3/20 | Processing batch 100/391
  Epoch 3/20 | Processing batch 150/391
  Epoch 3/20 | Processing batch 200/391
  Epoch 3/20 | Processing batch 250/391
  Epoch 3/20 | Proce

## 4. Validation

---

- Measure the final performance of your model.
- Note. The internal test dataset will also be measured in the same way.

```python
validate(your_model, internal_test_loader, your_final_inverse_y)
```

- Therefore, no errors should occur.


In [26]:
validate(model, test_loader, final_inverse_y)

tensor(0.0044)

## 5. Save Trained Model

---

- Save your model and submit it

- This code is used to test your code as it is called and tested during grading, so it must work and no modifications to the code are allowed.


In [27]:
save_path = save_model(
    model=model,
    optim=optim,
    scheduler=lr_scheduler,
    train_loader=training_loader,
    test_loader=test_loader,
    invese_y=final_inverse_y,
)

train performance: 0.0043458654545247555
test performance: 0.00439171539619565
saving model to problem_2_cache/trained_model_tr_0.0043_te_0.0044_bs_128_128


## 6. Load trained Model

---

Load your saved model


In [28]:
model, optim, scheduler, tr_performance, te_performance = load_trained_model(
    model_path=save_path,
    load_model=load_model,
    load_optimizer=load_optimizer,
    load_scheduler=load_scheduler,
)
print(model)
print(optim)
print(scheduler)
print(tr_performance)
print(te_performance)

ETSformerWrapper(
  (actual_model): ETSformer(
    (embedding): Linear(in_features=16, out_features=64, bias=True)
    (layers): ModuleList(
      (0-1): 2 x ETSLayer(
        (smooth): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): ReLU()
          (2): Linear(in_features=64, out_features=64, bias=True)
        )
        (growth): Linear(in_features=64, out_features=64, bias=True)
        (fourier): FourierLayer()
        (seasonal): Linear(in_features=20, out_features=64, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (decoder): Linear(in_features=6400, out_features=1, bias=True)
  )
)
AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.0001
    maximize: False
    weight_decay: 0.01
)
<torch.optim.lr_scheduler.Cosine

### Problem Summary

- Final submission file structure
- problem_2 /
  - best_model
    - model.pth
    - optim.pth
    - performance.json
    - scheduler.pth
- modeling.py
