In [1]:
from torch.utils.data import DataLoader
from torchvision import transforms

from src.concept_bottleneck.dataset import (
    CUB200ImageToAttributes,
    NUM_ATTRIBUTES,
)

batch_size = 16
num_workers = 2


training_preprocess = transforms.Compose(
    [
        transforms.RandomResizedCrop(299),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
        ),
    ]
)
training_data = CUB200ImageToAttributes(train=True, transform=training_preprocess)
training_dataloader = DataLoader(
    training_data, batch_size=batch_size, num_workers=num_workers, shuffle=True
)

test_data = CUB200ImageToAttributes(train=False)
test_dataloader = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers)


In [2]:
import torch
from src.concept_bottleneck.networks import get_inception

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

model: torch.nn.Module = get_inception().to(device)


Using cuda device


Using cache found in /home/shuangwu/.cache/torch/hub/pytorch_vision_v0.10.0


In [3]:
import numpy.typing as npt
import numpy as np


def train(
    model: torch.nn.Module,
    dataloader: DataLoader[tuple[torch.Tensor, npt.NDArray[np.float32]]],
    loss_fn: torch.nn.Module,
    optimizer: torch.optim.Optimizer,
    device: str,
):
    model.train()
    size = len(dataloader.dataset)  # type: ignore
    for batch, (x, y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)

        logits, aux_logits = model(x)
        loss = loss_fn(logits, y) + 0.4 * loss_fn(aux_logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            print(f"loss: {loss.item():>7f} [{batch * len(x):>5d}/{size:>5d}]")


def test(
    model: torch.nn.Module,
    dataloader: DataLoader[tuple[torch.Tensor, npt.NDArray[np.float32]]],
    loss_fn: torch.nn.Module,
    device: str,
):
    model.eval()
    test_loss = 0
    correct = 0
    total_correct = 0
    with torch.no_grad():
        for x, y in dataloader:
            x = x.to(device)
            y = y.to(device)

            logits = model(x)
            test_loss += loss_fn(logits, y).item()

            correct_attributes = (
                ((torch.sigmoid(logits) >= 0.5) == (y >= 0.5)).sum().item()
            )
            correct += correct_attributes / NUM_ATTRIBUTES

            total_correct += ( # Count the number of images with all attributes correct
                torch.all((torch.sigmoid(logits) >= 0.5) == (y >= 0.5), dim=1)
                .sum()
                .item()
            )

    test_loss /= len(dataloader)
    accuracy = correct / len(dataloader.dataset)  # type: ignore
    total_accuracy = total_correct / len(dataloader.dataset)  # type: ignore
    print(f"Total accuracy: {total_accuracy:>0.10f}%")

    return test_loss, accuracy


In [4]:
from src.concept_bottleneck.train import TrainFn, TestFn, run_epochs
from src.concept_bottleneck.inference import INDEPENDENT_IMAGE_TO_ATTRIBUTES_MODEL_NAME

loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

train_fn: TrainFn = lambda model: train(
    model, training_dataloader, loss_fn, optimizer, device
)
test_fn: TestFn = lambda model, dataloader: test(model, dataloader, loss_fn, device)

epochs = 1000

run_epochs(
    epochs,
    model,
    train_fn,
    test_fn,
    training_dataloader,
    test_dataloader,
    save_name=INDEPENDENT_IMAGE_TO_ATTRIBUTES_MODEL_NAME,
)


Epoch 1/1000-------------------
loss: 0.990696 [    0/ 5994]
loss: 0.310962 [ 1600/ 5994]
loss: 0.338918 [ 3200/ 5994]
loss: 0.324636 [ 4800/ 5994]
Total accuracy: 0.0000000000%
Training Loss: 0.2262, Training Accuracy: 91.1702%
Total accuracy: 0.0000000000%
Test Loss: 0.2262, Test Accuracy: 91.1572%
Saving model to independent-image-to-attributes.pth with accuracy 91.1572%
Epoch 2/1000-------------------
loss: 0.298379 [    0/ 5994]
loss: 0.308300 [ 1600/ 5994]
loss: 0.339199 [ 3200/ 5994]
loss: 0.347442 [ 4800/ 5994]
Total accuracy: 0.0000000000%
Training Loss: 0.2135, Training Accuracy: 91.5791%
Total accuracy: 0.0000000000%
Test Loss: 0.2138, Test Accuracy: 91.5685%
Saving model to independent-image-to-attributes.pth with accuracy 91.5685%
Epoch 3/1000-------------------
loss: 0.315007 [    0/ 5994]
loss: 0.300422 [ 1600/ 5994]
loss: 0.296928 [ 3200/ 5994]
loss: 0.308102 [ 4800/ 5994]
Total accuracy: 0.0000000000%
Training Loss: 0.2075, Training Accuracy: 91.7547%
Total accuracy: 0

KeyboardInterrupt: 