In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import torchvision.models as models
from sklearn.utils.class_weight import compute_class_weight
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
import numpy as np
import os

# ---------------- Setup ----------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if DEVICE.type == 'cuda':
    cudnn.benchmark = True

# ---------------- Hyperparameters ----------------
BATCH_SIZE   = 32
EPOCHS       = 30
INITIAL_LR   = 1e-4
WEIGHT_DECAY = 1e-4
NUM_CLASSES  = 7
RANDOM_SEED  = 42

In [24]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# ---------------- Datasets & Loaders ----------------
train_val_dataset = datasets.ImageFolder(root="./train", transform=train_transforms)
train_size = int(0.75 * len(train_val_dataset))
val_size   = len(train_val_dataset) - train_size
train_dataset, val_dataset = random_split(
    train_val_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(RANDOM_SEED)
)

test_dataset = datasets.ImageFolder(root="./test", transform=val_test_transforms)

# Use multiple workers and pin memory on CUDA
num_workers = min(4, os.cpu_count() if os.cpu_count() is not None else 1)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=num_workers, pin_memory=(DEVICE.type=='cuda'))
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=num_workers, pin_memory=(DEVICE.type=='cuda'))
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=num_workers, pin_memory=(DEVICE.type=='cuda'))

# ---------------- Class Weights ----------------
all_labels = [s[1] for s in train_val_dataset.samples]
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(all_labels),
    y=all_labels
)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(DEVICE)

In [26]:
model = models.resnet18(pretrained=True)
for param in model.parameters():  # freeze backbone
    param.requires_grad = False
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, NUM_CLASSES)
)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.AdamW(
    model.fc.parameters(), lr=INITIAL_LR, weight_decay=WEIGHT_DECAY
)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
scaler = GradScaler()

# ---------------- Training Loop ----------------
best_val_loss = float('inf')
for epoch in range(1, EPOCHS+1):
    model.train()
    running_loss = 0.0
    correct = total = 0
    loader = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS}", unit='batch')
    for imgs, lbls in loader:
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
        optimizer.zero_grad()
        with autocast():
            outputs = model(imgs)
            loss = criterion(outputs, lbls)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * imgs.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == lbls).sum().item()
        total += lbls.size(0)
        loader.set_postfix(loss=running_loss/total, acc=correct/total)

    scheduler.step()
    train_loss = running_loss / total
    train_acc  = correct / total

    # Validation
    model.eval()
    val_loss = val_correct = val_total = 0
    with torch.no_grad():
        for imgs, lbls in val_loader:
            imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
            with autocast():
                outputs = model(imgs)
                loss = criterion(outputs, lbls)
            val_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(dim=1)
            val_correct += (preds == lbls).sum().item()
            val_total += lbls.size(0)

    val_loss /= val_total
    val_acc  = val_correct / val_total
    current_lr = optimizer.param_groups[0]['lr']

    print(f"Epoch {epoch}/{EPOCHS} | LR: {current_lr:.6f} | "
          f"Train: loss={train_loss:.4f}, acc={train_acc:.4f} | "
          f"Val: loss={val_loss:.4f}, acc={val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_resnet.pth")
        print("✅ Saved best model")

  scaler = GradScaler()
  with autocast():
Epoch 1/30:  54%|█████▍    | 362/673 [02:10<01:52,  2.77batch/s, acc=0.209, loss=1.92]


KeyboardInterrupt: 

In [None]:
model.load_state_dict(torch.load("best_resnet.pth"))
model.eval()
correct = total = 0
with torch.no_grad():
    for imgs, lbls in test_loader:
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
        with autocast():
            outputs = model(imgs)
        preds = outputs.argmax(dim=1)
        correct += (preds == lbls).sum().item()
        total += lbls.size(0)

test_acc = correct / total
print(f"\n🎯 Final Test Accuracy: {test_acc * 100:.2f}%")

RuntimeError: Given groups=1, weight of size [32, 1, 3, 3], expected input[32, 3, 224, 224] to have 1 channels, but got 3 channels instead