In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, random_split
import torch.optim as optim
from sklearn.model_selection import train_test_split
import numpy as np
import os

from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from torch.nn.functional import softmax

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm


In [20]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


2.7.1+cu118
True
NVIDIA GeForce RTX 3050 6GB Laptop GPU


In [21]:
cpu_cores = os.cpu_count()
print("Number of CPU cores available:", cpu_cores)

Number of CPU cores available: 20


In [22]:
data_dir = "C:/Users/LOQ/Desktop/Grad Project/pap_backup/papilledema"

In [23]:
base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



In [24]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [25]:
full_dataset = datasets.ImageFolder(root=data_dir, transform=train_transform)

n_total = len(full_dataset)
n_train = int(0.7 * n_total)
n_val   = int(0.15 * n_total)
n_test  = n_total - n_train - n_val

train_ds, val_ds, test_ds = random_split(full_dataset, [n_train, n_val, n_test])

train_ds.dataset.transform = train_transform
val_ds.dataset.transform = base_transform
test_ds.dataset.transform = base_transform

In [26]:
batch_size = 32

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)

test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)

In [31]:
class allPap_Grade(nn.Module):
    def __init__(self):
        super().__init__()
        # 3 × 224 × 224 input

        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),      # 112 × 112

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),      # 56 × 56

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),      # 28 × 28

            # Block 4 (more abstract features)
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),   
               
            # Block 4 (more abstract features)
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),   # 14 × 14
        )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),      # 256 × 1 × 1
            nn.Flatten(),                  # 256
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),               # ← Add after first ReLU
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),               # ← Add after second ReLU
            nn.Linear(128, 6)              # Keep final layer without dropout
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [32]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = allPap_Grade().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)


In [33]:
def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    total_loss, correct, total = 0.0, 0, 0

    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)  # [B] class indices 0..5

        with torch.set_grad_enabled(train):
            logits = model(xb)           # [B, 6]
            loss = criterion(logits, yb)

            if train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        total_loss += loss.item() * xb.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

    return total_loss / total, correct / total


In [34]:

num_epochs = 60
for epoch in range(num_epochs):
    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss,   val_acc   = run_epoch(val_loader,   train=False)
    print(f"Epoch {epoch+1:02d} | "
          f"train_loss={train_loss:.4f} acc={train_acc:.3f} | "
          f"val_loss={val_loss:.4f} acc={val_acc:.3f}")


Epoch 01 | train_loss=1.7076 acc=0.337 | val_loss=1.7885 acc=0.208
Epoch 02 | train_loss=1.4877 acc=0.635 | val_loss=1.6850 acc=0.509
Epoch 03 | train_loss=1.2867 acc=0.703 | val_loss=1.5905 acc=0.509
Epoch 04 | train_loss=1.1048 acc=0.727 | val_loss=1.4791 acc=0.491
Epoch 05 | train_loss=0.9905 acc=0.755 | val_loss=1.3268 acc=0.547
Epoch 06 | train_loss=0.8564 acc=0.763 | val_loss=1.1110 acc=0.623
Epoch 07 | train_loss=0.7799 acc=0.759 | val_loss=0.9795 acc=0.717
Epoch 08 | train_loss=0.7356 acc=0.779 | val_loss=0.9969 acc=0.679
Epoch 09 | train_loss=0.7289 acc=0.759 | val_loss=1.0856 acc=0.623
Epoch 10 | train_loss=0.6878 acc=0.779 | val_loss=0.9371 acc=0.698
Epoch 11 | train_loss=0.6376 acc=0.799 | val_loss=1.0556 acc=0.566
Epoch 12 | train_loss=0.6326 acc=0.775 | val_loss=0.9841 acc=0.698
Epoch 13 | train_loss=0.6111 acc=0.783 | val_loss=1.2703 acc=0.585
Epoch 14 | train_loss=0.6345 acc=0.771 | val_loss=1.0491 acc=0.660
Epoch 15 | train_loss=0.5297 acc=0.819 | val_loss=0.9239 acc=0

KeyboardInterrupt: 

In [13]:
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",       # because you monitor validation loss
    factor=0.1,       # reduce LR by ×0.1
    patience=3        # epochs with no improvement before reducing
)

In [15]:
for epoch in range(60):
    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss,   val_acc   = run_epoch(val_loader,   train=False)

    print(
        f"Epoch {epoch+1:02d} | "
        f"train_loss={train_loss:.4f} acc={train_acc:.3f} | "
        f"val_loss={val_loss:.4f} acc={val_acc:.3f}"
    )

    # step the scheduler with the validation loss
    scheduler.step(val_loss)


NameError: name 'run_epoch' is not defined

In [18]:
class allPap_Grade2(nn.Module):
    def __init__(self):
        super().__init__()
        # 3 × 224 × 224 input

        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),      # 112 × 112

            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),      # 56 × 56

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),      # 28 × 28

            # Block 4 (more abstract features)
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),      # 14 × 14
        )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),   # 256 × 1 × 1
            nn.Flatten(),              # 256
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(128, 6)          
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [19]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = allPap_Grade2().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)


In [27]:
for epoch in range(100):
    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss,   val_acc   = run_epoch(val_loader,   train=False)

    print(
        f"Epoch {epoch+1:02d} | "
        f"train_loss={train_loss:.4f} acc={train_acc:.3f} | "
        f"val_loss={val_loss:.4f} acc={val_acc:.3f}"
    )

    # step the scheduler with the validation loss
    scheduler.step(val_loss)


Epoch 01 | train_loss=1.4809 acc=0.530 | val_loss=1.4868 acc=0.585
Epoch 02 | train_loss=1.1970 acc=0.659 | val_loss=1.0035 acc=0.698
Epoch 03 | train_loss=1.1246 acc=0.655 | val_loss=1.0738 acc=0.660
Epoch 04 | train_loss=1.0695 acc=0.663 | val_loss=0.9756 acc=0.698
Epoch 05 | train_loss=0.9656 acc=0.695 | val_loss=1.0793 acc=0.623
Epoch 06 | train_loss=0.9145 acc=0.703 | val_loss=0.9757 acc=0.660
Epoch 07 | train_loss=0.9166 acc=0.707 | val_loss=1.3212 acc=0.585
Epoch 08 | train_loss=0.8875 acc=0.723 | val_loss=0.8444 acc=0.698
Epoch 09 | train_loss=0.8704 acc=0.715 | val_loss=0.9770 acc=0.642
Epoch 10 | train_loss=0.8136 acc=0.735 | val_loss=0.7791 acc=0.755
Epoch 11 | train_loss=0.7819 acc=0.739 | val_loss=0.9430 acc=0.642
Epoch 12 | train_loss=0.8249 acc=0.731 | val_loss=0.8392 acc=0.717
Epoch 13 | train_loss=0.7771 acc=0.735 | val_loss=0.7592 acc=0.755
Epoch 14 | train_loss=0.7962 acc=0.723 | val_loss=0.7910 acc=0.736
Epoch 15 | train_loss=0.7640 acc=0.739 | val_loss=1.2361 acc=0

In [24]:
from torchvision import models

device = "cuda" if torch.cuda.is_available() else "cpu"

num_classes = 6  # normal + 5 grades

# New torchvision API
from torchvision.models import efficientnet_b1, EfficientNet_B1_Weights

weights = EfficientNet_B1_Weights.IMAGENET1K_V1  # or EfficientNet_B3_Weights.DEFAULT
model = efficientnet_b1(weights=weights)

Downloading: "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 89.6MB/s]


In [25]:
# Inspect classifier
print(model.classifier)

# Typical structure: Dropout -> Linear(in_features, 1000)
in_features = model.classifier[1].in_features

model.classifier[1] = nn.Linear(in_features, num_classes)
model = model.to(device)


Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)


In [None]:
criterion = nn.CrossEntropyLoss()  # multi-class
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-4)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.1,
    patience=3
)


In [27]:
def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    total_loss, correct, total = 0.0, 0, 0

    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)  # class indices [0..5]

        with torch.set_grad_enabled(train):
            logits = model(xb)           # [B, 6]
            loss = criterion(logits, yb)

            if train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        total_loss += loss.item() * xb.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

    return total_loss / total, correct / total

num_epochs = 80

for epoch in range(num_epochs):
    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss,   val_acc   = run_epoch(val_loader,   train=False)

    print(
        f"Epoch {epoch+1:02d} | "
        f"train_loss={train_loss:.4f} acc={train_acc:.3f} | "
        f"val_loss={val_loss:.4f} acc={val_acc:.3f}"
    )

    scheduler.step(val_loss)


Epoch 01 | train_loss=1.2219 acc=0.602 | val_loss=0.8238 acc=0.679
Epoch 02 | train_loss=0.5746 acc=0.815 | val_loss=0.7288 acc=0.755
Epoch 03 | train_loss=0.3394 acc=0.920 | val_loss=0.6395 acc=0.755
Epoch 04 | train_loss=0.2419 acc=0.940 | val_loss=0.5239 acc=0.868
Epoch 05 | train_loss=0.1810 acc=0.960 | val_loss=0.6222 acc=0.774
Epoch 06 | train_loss=0.1360 acc=0.964 | val_loss=0.5068 acc=0.811
Epoch 07 | train_loss=0.1663 acc=0.948 | val_loss=0.6034 acc=0.868
Epoch 08 | train_loss=0.1756 acc=0.964 | val_loss=0.7047 acc=0.830
Epoch 09 | train_loss=0.1420 acc=0.948 | val_loss=0.6931 acc=0.792
Epoch 10 | train_loss=0.1472 acc=0.948 | val_loss=0.4153 acc=0.811
Epoch 11 | train_loss=0.1472 acc=0.948 | val_loss=0.2927 acc=0.925
Epoch 12 | train_loss=0.0777 acc=0.984 | val_loss=0.5205 acc=0.868
Epoch 13 | train_loss=0.0797 acc=0.976 | val_loss=0.5048 acc=0.830
Epoch 14 | train_loss=0.0296 acc=0.996 | val_loss=0.5206 acc=0.868
Epoch 15 | train_loss=0.0290 acc=0.996 | val_loss=0.4990 acc=0

In [28]:
test_loss, test_acc = run_epoch(test_loader, train=False)
print(f"TEST | loss={test_loss:.4f} acc={test_acc:.3f}")


TEST | loss=0.8434 acc=0.855


In [38]:
import torch
import torch.nn as nn
from torchvision import models

device = "cuda" if torch.cuda.is_available() else "cpu"
num_classes = 6  # normal + 5 grades

weights = models.ResNet18_Weights.IMAGENET1K_V1
model = models.resnet18(weights=weights)

in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)
model = model.to(device)


In [None]:
criterion = nn.CrossEntropyLoss()  # multi-class
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=5e-4)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.1,
    patience=3
)


In [40]:
def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    total_loss, correct, total = 0.0, 0, 0

    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)  # class indices [0..5]

        with torch.set_grad_enabled(train):
            logits = model(xb)           # [B, 6]
            loss = criterion(logits, yb)

            if train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        total_loss += loss.item() * xb.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

    return total_loss / total, correct / total

num_epochs = 80

for epoch in range(num_epochs):
    train_loss, train_acc = run_epoch(train_loader, train=True)
    val_loss,   val_acc   = run_epoch(val_loader,   train=False)

    print(
        f"Epoch {epoch+1:02d} | "
        f"train_loss={train_loss:.4f} acc={train_acc:.3f} | "
        f"val_loss={val_loss:.4f} acc={val_acc:.3f}"
    )

    scheduler.step(val_loss)


Epoch 01 | train_loss=1.0607 acc=0.647 | val_loss=1.0680 acc=0.585
Epoch 02 | train_loss=0.5014 acc=0.811 | val_loss=0.6734 acc=0.717
Epoch 03 | train_loss=0.2661 acc=0.920 | val_loss=1.0697 acc=0.623
Epoch 04 | train_loss=0.3661 acc=0.867 | val_loss=0.8503 acc=0.717
Epoch 05 | train_loss=0.1882 acc=0.944 | val_loss=1.1665 acc=0.717
Epoch 06 | train_loss=0.1782 acc=0.944 | val_loss=0.8187 acc=0.717
Epoch 00006: reducing learning rate of group 0 to 3.0000e-05.
Epoch 07 | train_loss=0.0827 acc=0.976 | val_loss=0.7193 acc=0.717
Epoch 08 | train_loss=0.0461 acc=0.996 | val_loss=0.7629 acc=0.717
Epoch 09 | train_loss=0.0369 acc=0.996 | val_loss=0.6136 acc=0.736
Epoch 10 | train_loss=0.0408 acc=0.996 | val_loss=0.6012 acc=0.736
Epoch 11 | train_loss=0.0302 acc=0.992 | val_loss=0.6053 acc=0.755
Epoch 12 | train_loss=0.0205 acc=1.000 | val_loss=0.5873 acc=0.755
Epoch 13 | train_loss=0.0178 acc=1.000 | val_loss=0.6269 acc=0.717
Epoch 14 | train_loss=0.0644 acc=0.972 | val_loss=0.7643 acc=0.717


KeyboardInterrupt: 