# FGSC-23 Baseline (ResNet50, PyTorch)

This notebook trains a baseline fine-grained ship classifier on FGSC-23 using transfer learning.

- Dataset: `AFP Ideathon/FGSC-23/`
- Classes: 23 (see mapping in `readme.txt`)
- Model: ResNet50 (ImageNet pretrained)
- Metrics: accuracy, confusion matrix
- Output: best model checkpoint and simple inference demo

If running on Windows without CUDA, training will be slower; consider reducing batch size/epochs.


In [None]:
# Environment checks and installs (run once if needed)
import sys, subprocess, pkgutil

required = [
    'torch', 'torchvision', 'tqdm', 'matplotlib', 'scikit-learn'
]
for pkg in required:
    if pkg not in {m.name for m in pkgutil.iter_modules()}:
        print(f'Installing {pkg}...')
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg])

import os
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
from tqdm import tqdm

print('Python:', sys.version)
print('PyTorch:', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
assert torch.cuda.is_available(), 'CUDA not available. Please install CUDA-enabled PyTorch or select a GPU runtime.'

torch.backends.cudnn.benchmark = True
DEVICE = 'cuda'

# Paths
PROJECT_ROOT = r'C:\\Users\\LESTER\\Desktop\\Codes'
DATA_ROOT = os.path.join(PROJECT_ROOT, 'AFP Ideathon', 'FGSC-23')
TRAIN_DIR = os.path.join(DATA_ROOT, 'train')
TEST_DIR = os.path.join(DATA_ROOT, 'test')
NUM_CLASSES = 23
assert os.path.isdir(TRAIN_DIR) and os.path.isdir(TEST_DIR), 'FGSC-23 train/test folders not found'


In [None]:
# Transforms and Datasets
IMG_SIZE = 224
BATCH_SIZE = 64
NUM_WORKERS = 6  # increase if you have more CPU cores

train_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_ds = datasets.ImageFolder(TRAIN_DIR, transform=train_tfms)
val_ds   = datasets.ImageFolder(TEST_DIR, transform=test_tfms)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

class_names = train_ds.classes
print('Classes:', class_names)
print('Train images:', len(train_ds), ' Val images:', len(val_ds))


In [None]:
# Model: ResNet50 transfer learning
from torchvision.models import resnet50
import torch.nn as nn

model = resnet50(weights='IMAGENET1K_V1')
# Freeze backbone (optional); unfreeze last layer
for name, param in model.named_parameters():
    param.requires_grad = False

in_features = model.fc.in_features
model.fc = nn.Linear(in_features, NUM_CLASSES)

model = model.to(DEVICE)
print('Device:', DEVICE)


In [None]:
# Training utilities
import torch.optim as optim

epochs = 20
lr = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)

best_val_acc = 0.0
best_path = os.path.join(DATA_ROOT, 'resnet50_fgsc23_best.pt')


def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(loader, desc='Train', leave=False):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total


def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in tqdm(loader, desc='Val', leave=False):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())
    return running_loss / total, correct / total, all_preds, all_labels


for epoch in range(1, epochs + 1):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    val_loss, val_acc, _, _ = evaluate(model, val_loader, criterion, DEVICE)
    print(f'Epoch {epoch:02d}/{epochs} | train_loss={train_loss:.4f} acc={train_acc:.4f} | val_loss={val_loss:.4f} acc={val_acc:.4f}')

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_path)
        print(f'  Saved new best to {best_path} (val_acc={best_val_acc:.4f})')


In [None]:
# Load best model and evaluate on the test set with confusion matrix
import numpy as np
from sklearn.metrics import ConfusionMatrixDisplay

best = resnet50(weights=None)
in_features = best.fc.in_features
best.fc = nn.Linear(in_features, NUM_CLASSES)
best = best.to(DEVICE)
best.load_state_dict(torch.load(best_path, map_location=DEVICE))

_, test_acc, test_preds, test_labels = evaluate(best, val_loader, criterion, DEVICE)
print('Test accuracy:', test_acc)

cm = confusion_matrix(test_labels, test_preds)
fig, ax = plt.subplots(figsize=(10, 10))
ConfusionMatrixDisplay(cm, display_labels=class_names).plot(ax=ax, cmap='Blues', colorbar=False, xticks_rotation=90)
plt.tight_layout()
plt.show()

print('\nClassification report:\n')
print(classification_report(test_labels, test_preds, target_names=class_names))


In [None]:
# Quick inference demo on a few test images
from PIL import Image
import random

best.eval()
sample_paths = []
for cls in class_names[:5]:  # sample from first 5 classes for brevity
    cls_dir = os.path.join(TEST_DIR, cls)
    imgs = [f for f in os.listdir(cls_dir) if f.lower().endswith('.jpg')]
    if imgs:
        sample_paths.append(os.path.join(cls_dir, random.choice(imgs)))

for p in sample_paths:
    img = Image.open(p).convert('RGB')
    inp = test_tfms(img).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        logits = best(inp)
        pred = logits.argmax(dim=1).item()
    print(f'Image: {os.path.basename(p)} | Pred: {class_names[pred]}')
