In [1]:
# Check GPU availability and install correct PyTorch version
!nvidia-smi

# Check if we have GPU hardware but PyTorch can't see it
import subprocess
import sys

result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
has_nvidia_gpu = result.returncode == 0

print(f"NVIDIA GPU detected: {has_nvidia_gpu}")

# Try importing torch to check current version
try:
    import torch
    print(f"Current PyTorch version: {torch.__version__}")
    print(f"CUDA available in PyTorch: {torch.cuda.is_available()}")
    
    # If we have a GPU but PyTorch can't use it, we need to reinstall
    if has_nvidia_gpu and not torch.cuda.is_available():
        print("\n⚠️ GPU detected but PyTorch is CPU-only!")
        print("Installing PyTorch with CUDA support...")
        %pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 --force-reinstall
        print("\n✅ PyTorch with CUDA installed. Please restart the runtime: Runtime -> Restart runtime")
    elif not has_nvidia_gpu:
        print("\n⚠️ No GPU detected! In Colab, go to: Runtime -> Change runtime type -> Select GPU (e.g., T4 GPU)")
    else:
        print("\n✅ PyTorch with CUDA is properly configured!")
        
except ImportError:
    print("PyTorch not installed yet")


Fri Oct 17 19:49:51 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.57                 Driver Version: 581.57         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4080 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   40C    P8             10W /  320W |    2008MiB /  16376MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [2]:
# Check if GPU is available in Colab
!nvidia-smi

import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import os
import time

# Import PyTorch libraries
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

print("Libraries imported - ready to use PyTorch", torch.__version__)
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

def show_image(image, label):
    image = image.permute(1, 2, 0)
    plt.imshow(image.squeeze())
    plt.title(f'Label: {label}')
    plt.show()

Fri Oct 17 19:50:01 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.57                 Driver Version: 581.57         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4080 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   40C    P8              9W /  320W |    2018MiB /  16376MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

ModuleNotFoundError: No module named 'pandas'

In [None]:
# device
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

# device="cpu"

print(f"Using {device} device")

# resume training options
resume_training = True

class Params:
    def __init__(self):
        self.batch_size = 16
        self.name = "resnet_152_sgd1"
        self.workers = 4
        self.lr = 0.1
        self.momentum = 0.9
        self.weight_decay = 1e-4
        self.lr_step_size = 30
        self.lr_gamma = 0.1

    def __repr__(self):
        return str(self.__dict__)

    def __eq__(self, other):
        return self.__dict__ == other.__dict__

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
params = Params()
params, params.batch_size

In [None]:
import os, shutil
from pathlib import Path
import scipy.io as sio  # pip install scipy
from tqdm import tqdm    # pip install tqdm

# 1) CHANGE THIS to your actual local path:
IMAGENET_ROOT = Path("Z:\era-v4\era4-assign9 - org\archive\ILSVRC2012")

VAL_FLAT  = IMAGENET_ROOT / "val"                # flat folder (source)
DEVKIT    = IMAGENET_ROOT / "devkit"
VAL_OUT   = IMAGENET_ROOT / "val_sorted"         # output target folder

VAL_OUT.mkdir(parents=True, exist_ok=True)

# 2) Load mapping: ILSVRC_ID → WNID
meta = sio.loadmat(str(DEVKIT / "data" / "meta.mat"), squeeze_me=True)['synsets']
id_to_wnid = {
    int(e['ILSVRC2012_ID']) : str(e['WNID'])
    for e in meta
    if 1 <= int(e['ILSVRC2012_ID']) <= 1000
}

# 3) Read the ground-truth class for each image (alphabetically sorted order)
gt_file = DEVKIT / "data" / "ILSVRC2012_validation_ground_truth.txt"
gt_ids  = [int(x) for x in open(gt_file).read().strip().splitlines()]

val_files = sorted(p for p in VAL_FLAT.glob("*.JPEG"))
assert len(val_files) == 50000, f"Expected 50k val images, got {len(val_files)}"
assert len(val_files) == len(gt_ids), "Mismatch between files and labels"

print("Reorganizing validation images into class folders...")

# 4) Copy each image into val_sorted/<WNID>/
for img_path, ilsvrc_id in tqdm(zip(val_files, gt_ids), total=len(val_files)):
    wnid = id_to_wnid[ilsvrc_id]
    class_dir = VAL_OUT / wnid
    class_dir.mkdir(exist_ok=True)
    dst = class_dir / img_path.name
    shutil.copy2(img_path, dst)

print("✅ DONE — validation images are now in", VAL_OUT)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%mkdir data

In [None]:
!cp drive/MyDrive/data/imagenet/imagenet_subset.zip data/
!cp drive/MyDrive/data/imagenet/imagenet_val.zip data/
!unzip /content/data/archive.zip -d /content/data/
!unzip /content/data/imagenet_val.zip -d /content/data/

In [None]:
!pwd

In [None]:
!unzip /content/data/imagenet_subset.zip -d /content/data/

In [None]:
! for %f in (data\*.tar) do tar -xvf "%f" -C content\data\

In [None]:
! ls /content/data/

In [None]:
!ls /content/data/imagenet_subtrain/

In [None]:
training_folder_name = 'data-train'
val_folder_name = 'data-val'

In [None]:
os.listdir(training_folder_name)

In [None]:
os.listdir(training_folder_name)

In [None]:
train_transformation = transforms.Compose([
        transforms.ToTensor(),
        transforms.RandomResizedCrop(224, interpolation=transforms.InterpolationMode.BILINEAR, antialias=True),
        transforms.RandomHorizontalFlip(0.5),
        # Normalize the pixel values (in R, G, and B channels)
        transforms.Normalize(mean=[0.485, 0.485, 0.406], std=[0.229, 0.224, 0.225])
    ])

train_dataset = torchvision.datasets.ImageFolder(
    root=training_folder_name,
    transform=train_transformation
)
train_sampler = torch.utils.data.RandomSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=params.batch_size,
    sampler=train_sampler,
    num_workers = params.workers,
    pin_memory=True,
)
for X, y in train_loader:
    break
print(X.shape)
show_image(X[0], y[0])

In [None]:
train_dataset[1337]

In [None]:
val_transformation = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(size=256, antialias=True),
        transforms.CenterCrop(224),
        # Normalize the pixel values (in R, G, and B channels)
        transforms.Normalize(mean=[0.485, 0.485, 0.406], std=[0.229, 0.224, 0.225])
    ])
val_dataset = torchvision.datasets.ImageFolder(
    root=val_folder_name,
    transform=val_transformation
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=64,
    num_workers=params.workers,
    shuffle=False,
    pin_memory=True
)
for X, y in val_loader:
    break
print(X.shape)
show_image(X[0], y[0])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import seaborn as sns

def dataset_eda(train_dataset, val_dataset):
    print("📊 EDA on ImageNet Subset")
    print("-" * 40)
    print(f"Total Training Images: {len(train_dataset)}")
    print(f"Total Validation Images: {len(val_dataset)}")
    print(f"Number of Classes: {len(train_dataset.classes)}\n")

    # Print first few class names
    print("Sample Class Names:", train_dataset.classes[:10], "\n")

    # Count images per class
    train_counts = Counter([train_dataset.targets[i] for i in range(len(train_dataset))])
    val_counts = Counter([val_dataset.targets[i] for i in range(len(val_dataset))])

    # Convert to sorted lists for visualization
    class_indices = list(range(len(train_dataset.classes)))
    train_freqs = [train_counts[i] for i in class_indices]
    val_freqs = [val_counts[i] for i in class_indices]

    # Plot class distribution (Top 30 only for readability)
    plt.figure(figsize=(12, 6))
    sns.barplot(x=list(range(min(30, len(train_freqs)))), y=train_freqs[:30])
    plt.title("Training Samples per Class (Top 30)")
    plt.xlabel("Class Index")
    plt.ylabel("Image Count")
    plt.show()

    # Show a few sample images
    print("\n🖼️ Sample Images from Training Set:")
    fig, axes = plt.subplots(2, 5, figsize=(12, 5))
    for i, ax in enumerate(axes.flat):
        idx = np.random.randint(len(train_dataset))
        img, label = train_dataset[idx]
        img = img.permute(1, 2, 0).numpy()
        img = np.clip((img * 0.229 + 0.485), 0, 1)  # De-normalize for viewing
        ax.imshow(img)
        ax.set_title(train_dataset.classes[label][:15])
        ax.axis("off")
    plt.tight_layout()
    plt.show()

# Run EDA
dataset_eda(train_dataset, val_dataset)


In [None]:
from math import sqrt
def train(dataloader, model, loss_fn, optimizer, epoch, writer):
    size = len(dataloader.dataset)
    model.train()
    start0 = time.time()
    start = time.time()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        batch_size = len(X)
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * batch_size
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}], {(current/size * 100):>4f}%")
            step = epoch * size + current
            writer.add_scalar('training loss',
                            loss,
                            step)
            new_start = time.time()
            delta = new_start - start
            start = new_start
            if batch != 0:
                print("Done in ", delta, " seconds")
                remaining_steps = size - current
                speed = 100 * batch_size / delta
                remaining_time = remaining_steps / speed
                print("Remaining time (seconds): ", remaining_time)
        optimizer.zero_grad()
    print("Entire epoch done in ", time.time() - start0, " seconds")

In [None]:
def test(dataloader, model, loss_fn, epoch, writer, train_dataloader, calc_acc5=False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct, correct_top5 = 0, 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            if calc_acc5:
                _, pred_top5 = pred.topk(5, 1, largest=True, sorted=True)
                correct_top5 += pred_top5.eq(y.view(-1, 1).expand_as(pred_top5)).sum().item()
    test_loss /= num_batches
    step = epoch * len(train_dataloader.dataset)
    if writer != None:
        writer.add_scalar('test loss',
                            test_loss,
                            step)
    correct /= size
    correct_top5 /= size
    if writer != None:
        writer.add_scalar('test accuracy',
                            100*correct,
                            step)
        if calc_acc5:
            writer.add_scalar('test accuracy5',
                            100*correct_top5,
                            step)
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    if calc_acc5:
        print(f"Test Error: \n Accuracy-5: {(100*correct_top5):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
## testing a pretrained model to validate correctness of our dataset, transform and metrics code
pretrained_model = torchvision.models.resnet18(weights='ResNet18_Weights.DEFAULT').to(device)
start = time.time()
loss_fn = nn.CrossEntropyLoss()
test(val_loader, pretrained_model, loss_fn, epoch=0, writer=None, train_dataloader=train_loader, calc_acc5=True)
print("Elapsed: ", time.time() - start)

In [None]:
def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1) -> nn.Conv2d:
    """3x3 convolution with padding"""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=1,
        groups=groups,
        bias=False,
    )


def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

In [None]:
from functools import partial
from typing import Any, Callable, List, Optional, Type, Union
from torch import Tensor
class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition" https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.

    expansion: int = 4

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        base_width: int = 64,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.0))
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [None]:
class ResNet(nn.Module):
    def __init__(
        self,
        block: Type[Bottleneck],
        layers: List[int],
        num_classes: int = 1000,
        width_per_group: int = 64,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(
        self,
        block: Type[Union[Bottleneck]],
        planes: int,
        blocks: int,
        stride: int = 1,
    ) -> nn.Sequential:
        norm_layer = self._norm_layer
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(
            block(
                self.inplanes, planes, stride, downsample, self.base_width, norm_layer
            )
        )
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(
                block(
                    self.inplanes,
                    planes,
                    base_width=self.base_width,
                    norm_layer=norm_layer,
                )
            )

        return nn.Sequential(*layers)

    def _forward_impl(self, x: Tensor) -> Tensor:
        # See note [TorchScript super()]
        x = self.conv1(x)

        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)

In [None]:
# Use the device that was set earlier (should be cuda if available)
if not torch.cuda.is_available():
    raise RuntimeError("⚠️ CUDA is not available! Please enable GPU runtime in Colab: Runtime -> Change runtime type -> GPU")

model = ResNet(Bottleneck, [3, 8, 36, 3]).to(device)
print(f"Model is on device: {next(model.parameters()).device}")
preds = model(X.to(device))
preds.shape

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
                            lr=params.lr, momentum=params.momentum, weight_decay=params.weight_decay)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=params.lr_step_size, gamma=params.lr_gamma)

In [None]:
start_epoch = 0
checkpoint_path = os.path.join("checkpoints", params.name, f"checkpoint.pth")
if resume_training and os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["model"])
    start_epoch = checkpoint["epoch"] + 1
    optimizer.load_state_dict(checkpoint["optimizer"])
    lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
    assert params == checkpoint["params"]

In [None]:
from torch.utils.tensorboard import SummaryWriter
from pathlib import Path
Path(os.path.join("checkpoints", params.name)).mkdir(parents=True, exist_ok=True)
writer = SummaryWriter('runs/' + params.name)
test(val_loader, model, loss_fn, epoch=0, writer=writer, train_dataloader=train_loader, calc_acc5=True)
for epoch in range(start_epoch, 100):
    train(train_loader, model, loss_fn, optimizer, epoch=epoch, writer=writer)
    checkpoint = {
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "lr_scheduler": lr_scheduler.state_dict(),
        "epoch": epoch,
        "params": params
    }
    torch.save(checkpoint, os.path.join("checkpoints", params.name, f"model_{epoch}.pth"))
    torch.save(checkpoint, os.path.join("checkpoints", params.name, f"checkpoint.pth"))
    lr_scheduler.step()
    test(val_loader, model, loss_fn, epoch + 1, writer, train_dataloader=train_loader, calc_acc5=True)

In [None]:
%mkdir /content/drive/MyDrive/projects/imagenet_poc

In [None]:
!cp -r /content/checkpoints/ /content/drive/MyDrive/projects/imagenet_poc

In [None]:
!ls /content/drive/MyDrive/projects/imagenet_poc/

In [None]:
!cp -r /content/runs /content/drive/MyDrive/projects/imagenet_poc