In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import os
import sys

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

base_dir = os.path.join(os.getcwd(), "data")

train_dir = os.path.join(base_dir, "seg_train", "seg_train") \
    if os.path.exists(os.path.join(base_dir, "seg_train", "seg_train")) \
    else os.path.join(base_dir, "seg_train")

test_dir  = os.path.join(base_dir, "seg_test", "seg_test") \
    if os.path.exists(os.path.join(base_dir, "seg_test", "seg_test")) \
    else os.path.join(base_dir, "seg_test")

print("Train dir:", train_dir)
print("Test dir:", test_dir)

IMG_SIZE = 150
BATCH_SIZE = 32

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

full_train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
val_dataset         = datasets.ImageFolder(root=train_dir, transform=val_transform)
test_dataset        = datasets.ImageFolder(root=test_dir,  transform=val_transform)

val_size   = int(0.1 * len(full_train_dataset))
train_size = len(full_train_dataset) - val_size

train_subset, _ = random_split(full_train_dataset, [train_size, val_size])
_, val_subset   = random_split(val_dataset,         [train_size, val_size])

train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_subset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print("Classes:", full_train_dataset.classes)
print(f"Train subset: {len(train_subset)} | Val subset: {len(val_subset)} | Test set: {len(test_dataset)}")

Using device: cpu
Train dir: /Users/stone/Desktop/project files/repos/homework3_DL/data/seg_train/seg_train
Test dir: /Users/stone/Desktop/project files/repos/homework3_DL/data/seg_test/seg_test
Classes: ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']
Train subset: 12631 | Val subset: 1403 | Test set: 3000


Lets build a minimal, lightweight CNN.

In [10]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=6, dropout=0.5):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(128 * (150 // 8) * (150 // 8), 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 150 -> 75
        x = self.pool(F.relu(self.conv2(x)))  # 75 -> 37
        x = self.pool(F.relu(self.conv3(x)))  # 37 -> 18
        x = torch.flatten(x, 1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

Now we make a basic training loop that we will apply to each experiment. We also keep track of some checkpoints that we will need for later.

In [11]:
def train_model(config, train_loader, val_loader, num_epochs=10):
    print(f"\n=== Training: {config['name']} ===")

    model = SimpleCNN(num_classes=6, dropout=config.get("dropout", 0.5)).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=config["lr"],
                           weight_decay=config.get("weight_decay", 0))
    
    scheduler = None
    if config.get("scheduler", False):
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    
    history = {"train_loss": [], "val_loss": [], "val_acc": []}

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for batch_idx, (imgs, labels) in enumerate(train_loader, 1):
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

            if batch_idx % max(1, len(train_loader)//10) == 0:
                percent = 100 * batch_idx / len(train_loader)
                sys.stdout.write(
                    f"\rEpoch {epoch+1}/{num_epochs} [{percent:5.1f}%] "
                    f"Batch {batch_idx}/{len(train_loader)} "
                    f"Train Loss: {running_loss/batch_idx:.4f}"
                )
                sys.stdout.flush()

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total

        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (preds == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = 100 * val_correct / val_total

        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        if scheduler:
            scheduler.step()

        current_lr = optimizer.param_groups[0]['lr']
        print(f"\nEpoch [{epoch+1}/{num_epochs}] "
              f"| Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% "
              f"| Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}% "
              f"| LR: {current_lr:.5f}")
        print("-" * 90)

    print(f"Finished training {config['name']}")
    return model, history

Now we run through all experiment combinations and save the results and history in dataframe.

In [None]:
import os, pandas as pd

os.makedirs("checkpoints", exist_ok=True)

experiments = [
    {"name": "baseline",       "lr": 1e-3},
    {"name": "lr_5e4",         "lr": 5e-4},
    {"name": "lr_2e3",         "lr": 2e-3},
    {"name": "dropout_0.3",    "lr": 1e-3, "dropout": 0.3},
    {"name": "dropout_0.7",    "lr": 1e-3, "dropout": 0.7},
    {"name": "weight_decay",   "lr": 1e-3, "weight_decay": 1e-4},
    {"name": "scheduler",      "lr": 1e-3, "scheduler": True},
    {"name": "batchnorm",      "lr": 1e-3, "use_bn": True},
    {"name": "augment",        "lr": 1e-3, "augment": True},
    {"name": "larger_model",   "lr": 1e-3, "model_size": "large"},
]

results = []

for cfg in experiments:
    model, hist = train_model(cfg, train_loader, val_loader, num_epochs=10)
    best_acc = max(hist["val_acc"])
    
    results.append({
        "name": cfg["name"],
        "lr": cfg["lr"],
        "dropout": cfg.get("dropout", 0.5),
        "weight_decay": cfg.get("weight_decay", 0),
        "scheduler": cfg.get("scheduler", False),
        "best_val_acc": best_acc
    })
    
    torch.save(model.state_dict(), f"checkpoints/{cfg['name']}.pth")

df = pd.DataFrame(results)
df.to_csv("results_summary.csv", index=False)
df


=== Training: baseline ===
Epoch 1/10 [ 98.7%] Batch 390/395 Train Loss: 1.0424

Lets plot the train and validation curves.

In [None]:
import matplotlib.pyplot as plt

best_run = max(results, key=lambda x: x["best_val_acc"])["name"]
print(f"Best configuration: {best_run}")

plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(hist["val_acc"], label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.title("Validation Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(hist["train_loss"], label="Training Loss")
plt.plot(hist["val_loss"], label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Curves")
plt.legend()

plt.tight_layout()
plt.show()

We also want to add a confusion matrix and see where the model tends to get stumped.

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np

model.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

cm = confusion_matrix(y_true, y_pred)
ConfusionMatrixDisplay(cm, display_labels=train_data.classes).plot(
    cmap="Blues", xticks_rotation=45)
plt.show()

# Per-class accuracy
y_true, y_pred = np.array(y_true), np.array(y_pred)
for i, cls in enumerate(train_data.classes):
    mask = (y_true == i)
    acc = (y_pred[mask] == y_true[mask]).mean()
    print(f"{cls:10s}: {acc*100:.2f}%")