In [1]:
import torch
import timm
import os
import random
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image

# Configurable RegNetY models to benchmark
regnety_models = {
    "RegNetY-4G": {"timm_name": "regnety_004", "image_size": 224},
    "RegNetY-8G": {"timm_name": "regnety_008", "image_size": 224},
    "RegNetY-16G": {"timm_name": "regnety_016", "image_size": 224},
}

# Correct root dataset directory (matches your screenshot)
data_root = "."  # the current folder contains 'images' and 'annotations'

# Device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Confirm configuration
print("Models to run:")
for name, meta in regnety_models.items():
    print(f" - {name}: {meta['timm_name']} | {meta['image_size']}x{meta['image_size']}")

print(f"\nDataset root: {data_root}")
print(f"Device: {device}")


Models to run:
 - RegNetY-4G: regnety_004 | 224x224
 - RegNetY-8G: regnety_008 | 224x224
 - RegNetY-16G: regnety_016 | 224x224

Dataset root: .
Device: cuda


In [2]:
class OxfordPetsDataset(Dataset):
    def __init__(self, root_dir, split='train', image_size=224, transform=None, split_ratio=0.8, seed=42):
        self.root_dir = root_dir
        self.split = split
        self.image_size = image_size

        # List all .jpg files in ./images/
        image_dir = os.path.join(root_dir, "images")
        all_files = sorted([
            f[:-4] for f in os.listdir(image_dir)
            if f.endswith(".jpg")
        ])

        # Create train/val split
        random.seed(seed)
        random.shuffle(all_files)
        split_idx = int(len(all_files) * split_ratio)
        self.image_ids = all_files[:split_idx] if split == 'train' else all_files[split_idx:]

        # Extract class names from filenames
        self.class_names = sorted(list(set([img_id.rsplit('_', 1)[0] for img_id in all_files])))
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.class_names)}

        # Define transforms
        if transform:
            self.transform = transform
        else:
            self.transform = transforms.Compose([
                transforms.Resize((image_size, image_size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_path = os.path.join(self.root_dir, 'images', f'{img_id}.jpg')
        image = Image.open(img_path).convert('RGB')

        class_name = img_id.rsplit('_', 1)[0]
        label = self.class_to_idx[class_name]

        image = self.transform(image)
        return image, label


def get_loaders(root_dir, image_size, batch_size=64, num_workers=0, seed=42):
    train_dataset = OxfordPetsDataset(root_dir, split='train', image_size=image_size, seed=seed)
    val_dataset   = OxfordPetsDataset(root_dir, split='val', image_size=image_size, seed=seed)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader   = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_loader, val_loader


In [3]:
import torch.nn as nn

def load_model(model_name, num_classes):
    model_info = regnety_models[model_name]
    timm_name = model_info["timm_name"]

    # Load pretrained model from timm
    model = timm.create_model(timm_name, pretrained=True)

    # Replace classifier head based on structure
    if hasattr(model, "head") and isinstance(model.head, nn.Linear):
        in_features = model.head.in_features
        model.head = nn.Linear(in_features, num_classes)
    elif hasattr(model, "classifier") and isinstance(model.classifier, nn.Linear):
        in_features = model.classifier.in_features
        model.classifier = nn.Linear(in_features, num_classes)
    else:
        raise ValueError("Unknown model head structure for replacement.")

    return model.to(device)


In [4]:
import time
from tqdm import tqdm

def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss, correct, total = 0, 0, 0
    start_time = time.time()

    loop = tqdm(dataloader, desc="Training", leave=False)
    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        _, preds = outputs.max(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        loop.set_postfix(loss=loss.item(), acc=100.0 * correct / total)

    avg_loss = total_loss / total
    accuracy = 100.0 * correct / total
    elapsed = time.time() - start_time
    return avg_loss, accuracy, elapsed

def validate(model, dataloader, criterion, device):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    start_time = time.time()

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validating", leave=False):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item() * images.size(0)
            _, preds = outputs.max(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = total_loss / total
    accuracy = 100.0 * correct / total
    elapsed = time.time() - start_time
    return avg_loss, accuracy, elapsed


In [5]:
from torch import nn, optim
import pandas as pd
import time
import warnings
import contextlib
import io
from fvcore.nn import FlopCountAnalysis

results = []
num_epochs = 5
learning_rate = 1e-3
batch_size = 64
num_workers = 0

for model_name, config in regnety_models.items():
    print(f"\n🔍 Running model: {model_name}")

    image_size = config["image_size"]
    train_loader, val_loader = get_loaders(data_root, image_size=image_size, batch_size=batch_size, num_workers=num_workers)

    # Initialize model and classifier head
    model = timm.create_model(config["timm_name"], pretrained=True)
    num_classes = len(train_loader.dataset.class_to_idx)
    model.reset_classifier(num_classes)
    model = model.to(device)

    # Compute FLOPs using fvcore, suppressing warnings and stderr
    sample_input = torch.randn(1, 3, image_size, image_size).to(device)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        with contextlib.redirect_stderr(io.StringIO()):
            flops = FlopCountAnalysis(model, sample_input).total() / 1e9  # GFLOPs

    # Optimizer and loss
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # Store epoch history
    history = []

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        
        train_loss, train_acc, train_time = train(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc, val_time = validate(model, val_loader, criterion, device)

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss:   {val_loss:.4f}, Val Acc:   {val_acc:.2f}%")
        print(f"Train Time: {train_time:.2f}s, Val Time: {val_time:.2f}s\n")

        history.append((train_loss, train_acc, val_loss, val_acc, train_time, val_time))

    # Final epoch results for reporting
    final_metrics = history[-1]
    params_m = round(sum(p.numel() for p in model.parameters()) / 1e6)  # Round to int M
    flops_g = round(flops, 1)  # 0.4G format
    throughput = round(len(val_loader.dataset) / final_metrics[5], 1)
    top1_acc = round(final_metrics[3], 1)

    results.append({
        "method": model_name,
        "image size": f"{image_size}²",
        "#params": f"{params_m}M",
        "FLOPs": f"{flops_g}G",
        "throughput (image / s)": throughput,
        "ImageNet top-1 acc.": top1_acc,
    })

# Final table
results_df = pd.DataFrame(results)
print("\n✅ RegNet Benchmark Summary:")
display(results_df)



🔍 Running model: RegNetY-4G
Epoch 1/5


                                                                               

Train Loss: 0.9926, Train Acc: 71.97%
Val Loss:   0.6959, Val Acc:   77.60%
Train Time: 24.22s, Val Time: 5.38s

Epoch 2/5


                                                                               

Train Loss: 0.3831, Train Acc: 87.77%
Val Loss:   0.6063, Val Acc:   81.12%
Train Time: 24.67s, Val Time: 5.36s

Epoch 3/5


                                                                                

Train Loss: 0.2301, Train Acc: 92.78%
Val Loss:   0.7511, Val Acc:   77.94%
Train Time: 24.65s, Val Time: 5.56s

Epoch 4/5


                                                                                

Train Loss: 0.2045, Train Acc: 93.30%
Val Loss:   0.6391, Val Acc:   80.04%
Train Time: 24.57s, Val Time: 5.41s

Epoch 5/5


                                                                                

Train Loss: 0.1652, Train Acc: 94.79%
Val Loss:   0.6200, Val Acc:   82.61%
Train Time: 24.69s, Val Time: 5.49s


🔍 Running model: RegNetY-8G
Epoch 1/5


                                                                               

Train Loss: 0.8523, Train Acc: 76.29%
Val Loss:   0.6772, Val Acc:   77.81%
Train Time: 25.01s, Val Time: 5.22s

Epoch 2/5


                                                                                

Train Loss: 0.2762, Train Acc: 91.04%
Val Loss:   0.8037, Val Acc:   76.79%
Train Time: 24.92s, Val Time: 5.21s

Epoch 3/5


                                                                                

Train Loss: 0.1725, Train Acc: 94.82%
Val Loss:   0.8603, Val Acc:   77.94%
Train Time: 25.26s, Val Time: 5.12s

Epoch 4/5


                                                                                

Train Loss: 0.1686, Train Acc: 94.69%
Val Loss:   0.5242, Val Acc:   84.10%
Train Time: 25.44s, Val Time: 5.26s

Epoch 5/5


                                                                                

Train Loss: 0.1164, Train Acc: 96.33%
Val Loss:   0.6921, Val Acc:   83.42%
Train Time: 25.01s, Val Time: 5.19s


🔍 Running model: RegNetY-16G
Epoch 1/5


                                                                               

Train Loss: 0.8276, Train Acc: 77.11%
Val Loss:   1.1695, Val Acc:   66.78%
Train Time: 26.57s, Val Time: 5.25s

Epoch 2/5


                                                                               

Train Loss: 0.3247, Train Acc: 89.67%
Val Loss:   0.8332, Val Acc:   77.06%
Train Time: 26.46s, Val Time: 5.25s

Epoch 3/5


                                                                                

Train Loss: 0.2225, Train Acc: 92.96%
Val Loss:   0.6601, Val Acc:   80.99%
Train Time: 26.39s, Val Time: 5.28s

Epoch 4/5


                                                                                

Train Loss: 0.1373, Train Acc: 95.57%
Val Loss:   0.5628, Val Acc:   85.25%
Train Time: 26.35s, Val Time: 5.29s

Epoch 5/5


                                                                                

Train Loss: 0.1713, Train Acc: 94.69%
Val Loss:   0.7016, Val Acc:   81.80%
Train Time: 26.19s, Val Time: 5.26s


✅ RegNet Benchmark Summary:




Unnamed: 0,method,image size,#params,FLOPs,throughput (image / s),ImageNet top-1 acc.
0,RegNetY-4G,224²,4M,0.4G,269.1,82.6
1,RegNetY-8G,224²,6M,0.8G,285.0,83.4
2,RegNetY-16G,224²,10M,1.7G,281.2,81.8
