In [1]:
import os
import sys

notebook_dir = os.getcwd()
project_root_path = os.path.dirname(notebook_dir)
sys.path.insert(0, project_root_path)

from src.models import ModelXtoCResNet  # noqa: E402
from src.preprocessing.RIVAL10 import preprocessing_rival10  # noqa: E402
from src.utils import *  # noqa: E402, F403

In [3]:
concept_labels, train_loader, val_loader, test_loader = preprocessing_rival10(training=False, class_concepts=True, verbose=True)

Found 26384 unique images.
Found 18 unique concepts.
Generated one-hot training matrix with shape: (21098, 10)
Found 21098 images.
Processing in 330 batches of size 64 (for progress reporting)...


Processing batches: 100%|██████████| 330/330 [00:45<00:00,  7.18it/s]



Finished processing.
Successfully transformed: 21098 images.
Found 5286 images.
Processing in 83 batches of size 64 (for progress reporting)...


Processing batches: 100%|██████████| 83/83 [00:17<00:00,  4.70it/s]



Finished processing.
Successfully transformed: 5286 images.
Dataset initialized with 21098 pre-sorted items.
Dataset initialized with 5286 pre-sorted items.
Split train dataset: 16878 training samples, 4220 validation samples


# Training Implementation

In [4]:
from src.config import RIVAL10_CONFIG as config_dict
import torch
import torch.nn as nn
import torch.optim as optim

In [5]:
N_TRIMMED_CONCEPTS = config_dict['N_TRIMMED_CONCEPTS']
N_CLASSES = config_dict['N_CLASSES']

**Find device to run model on (CPU or GPU).**

In [6]:
device = torch.device("cuda" if torch.cuda.is_available()
                    else "mps" if torch.backends.mps.is_available()
                    else "cpu")
print(f"Using device: {device}")

Using device: mps


**Instantiate the model.**

In [7]:
model = ModelXtoCResNet(pretrained=True,
                freeze=True,
                n_concepts=N_TRIMMED_CONCEPTS,
                label_mode=True,
                n_classes=N_CLASSES)

model = model.to(device)
print("Model Instantiated (X -> C)")



Model Instantiated (X -> C)


### Loss
We use weighted loss.

`BCEWithLogitsLoss()` performs 2 steps:
1. $\sigma(x)$
    - Applies the sigmoid function to the logits to get probabilities.
2. $\text{BCE}(\sigma(x), y) = y \cdot \text{log}(\sigma(x)) + (1-y) \cdot (1-\text{log}(\sigma(x)))$
    - Compute binary cross-entropy between output probabilities ($\sigma(x)$) and ground truths ($y$)

In [8]:
attr_criterion = nn.CrossEntropyLoss()

### Optimiser
Use same settings as used in CBM repo.

In [9]:
lr = 0.01
weight_decay = 0.00004 # same as lambda in L2-regularisation

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
                    lr=lr,
                    momentum=0.9,
                    weight_decay=weight_decay)

# scheduler_step = n -> decrease the LR every n epochs
scheduler_step = 1000
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step, gamma=0.1)

print("Optimizer and Scheduler Ready")

Optimizer and Scheduler Ready


### Training and Validation Loops

In [10]:
epochs = 2
log_interval = 50

best_val_acc = 0.0

In [11]:
from src.training import run_epoch_x_to_y

for epoch in range(epochs):
    print(f"--- Epoch {epoch+1}/{epochs} ---")

    # Train
    train_loss, train_acc = run_epoch_x_to_y(model, train_loader, attr_criterion, optimizer, is_training=True, device=device, verbose=True)
    print(f'Epoch {epoch+1} Train Summary | Loss: {train_loss:.4f} | Acc: {train_acc:.3f}')

    # Validate
    if val_loader:
        with torch.no_grad():
            val_loss, val_acc = run_epoch_x_to_y(model, val_loader, attr_criterion, optimizer, device=device, verbose=True)

        print(f'Epoch {epoch+1} Val Summary   | Loss: {val_loss:.4f} | Acc: {val_acc:.3f}')

        # Save best model based on validation accuracy
        if val_acc > best_val_acc:
            print(f"Validation accuracy improved ({best_val_acc:.3f} -> {val_acc:.3f}). Saving model...")
            best_val_acc = val_acc
            # torch.save(model, 'x_to_c_best_model.pth')
            # print("Model saved to x_to_c_best_model.pth")

    # Scheduler step
    scheduler.step()
    print(f"Current LR: {optimizer.param_groups[0]['lr']}")

--- Epoch 1/2 ---


Training:   9%|▊         | 23/264 [01:31<03:27,  1.16it/s, acc=0.8308, loss=0.6878] libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x11f05a8e0>
Traceback (most recent call last):
  File "/Users/pb/.pyenv/versions/3.11.9/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/Users/pb/.pyenv/versions/3.11.9/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 1582, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Users/pb/.pyenv/versions/3.11.9/lib/python3.11/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/pb/.pyenv/versions/3.11.9/lib/python3.11/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
           ^^^^^^^^^^^^^^^^^^^^^^^^^^

KeyboardInterrupt: 

In [None]:
if test_loader:
    with torch.no_grad():

        test_loss, test_acc = run_epoch_x_to_y(model, test_loader, attr_criterion, optimizer, device=device, verbose=True)

# print(f"Shuffled labels shape: {shuffled_img_labels.shape}")
print(f'Best Model Summary   | Loss: {test_loss:.4f} | Acc: {test_acc:.3f}')