Mount Google Drive, Validation Tar + Devkit

In [1]:
# Install & setup
!pip -q install -U torch torchvision tqdm cupy-cuda12x

import os, sys, torch
from google.colab import drive

print("Python:", sys.version.split()[0], "| Torch:", torch.__version__,
      "| CUDA available:", torch.cuda.is_available())

drive.mount('/content/drive', force_remount=True)

VAL_TAR    = "/content/drive/MyDrive/AI 231/ILSVRC2012_img_val.tar"
DEVKIT_TAR = "/content/drive/MyDrive/AI 231/ILSVRC2012_devkit_t12.tar.gz"

IMAGENET_ROOT = "/content/imagenet_root"
os.makedirs(IMAGENET_ROOT, exist_ok=True)

# Symlink (or copy) the archives with the exact filenames torchvision expects
!ln -sf "$VAL_TAR"    "$IMAGENET_ROOT/ILSVRC2012_img_val.tar"
!ln -sf "$DEVKIT_TAR" "$IMAGENET_ROOT/ILSVRC2012_devkit_t12.tar.gz"

!ls -lh "$IMAGENET_ROOT"

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.9/112.9 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hPython: 3.12.11 | Torch: 2.8.0+cu126 | CUDA available: True
Mounted at /content/drive
total 0
lrwxrwxrwx 1 root root 58 Sep 18 01:27 ILSVRC2012_devkit_t12.tar.gz -> '/content/drive/MyDrive/AI 231/ILSVRC2012_devkit_t12.tar.gz'
lrwxrwxrwx 1 root root 52 Sep 18 01:27 ILSVRC2012_img_val.tar -> '/content/drive/MyDrive/AI 231/ILSVRC2012_img_val.tar'


Load Validation Dataset

In [2]:
# Dataset + transforms
import scipy

from torchvision.datasets import ImageNet
from torchvision.models import alexnet, AlexNet_Weights
from torch.utils.data import DataLoader
import numpy as np

# Use official AlexNet transforms (resize 256 -> center-crop 224 -> normalize)
weights = AlexNet_Weights.IMAGENET1K_V1
preprocess = weights.transforms()

# This parses devkit and extracts val on first run
val_ds = ImageNet(root=IMAGENET_ROOT, split="val", transform=preprocess)
print("Val size:", len(val_ds))

# Collate to NumPy (CHW float32) + labels (int64)
def collate_fn_numpy(batch):
    import torch
    imgs_t = torch.stack([img for (img, _) in batch], dim=0)     # (N,3,224,224) torch
    labels_t = torch.tensor([lab for (_, lab) in batch], dtype=torch.long)
    return imgs_t.numpy(), labels_t.numpy()

batch_size  = 512
num_workers = 2
val_loader_np = DataLoader(
    val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers,
    pin_memory=False, collate_fn=collate_fn_numpy
)
# Smoke test: fetch one batch
xb, yb = next(iter(val_loader_np))
print(xb.shape, yb.shape)

Val size: 50000
(512, 3, 224, 224) (512,)


Numpy Alexnet Implementation

In [3]:
# NumPy AlexNet model
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view

def kaiming_normal(shape, fan_in=None, rng=None):
    rng = rng or np.random
    if fan_in is None:
        if len(shape) == 4:  # (C_out, C_in, KH, KW)
            fan_in = shape[1] * shape[2] * shape[3]
        elif len(shape) == 2:  # (out, in)
            fan_in = shape[1]
        else:
            fan_in = np.prod(shape)
    std = np.sqrt(2.0 / fan_in)
    return (rng.standard_normal(size=shape).astype(np.float32) * std).astype(np.float32)

def xavier_normal(shape, rng=None):
    rng = rng or np.random
    if len(shape) == 4:
        fan_in = shape[1] * shape[2] * shape[3]
        fan_out = shape[0]
    else:
        fan_in, fan_out = shape[1], shape[0]
    std = np.sqrt(2.0 / (fan_in + fan_out))
    return (rng.standard_normal(size=shape).astype(np.float32) * std).astype(np.float32)

def pad2d(x, pad):
    if pad == 0: return x
    return np.pad(x, ((0,0),(0,0),(pad,pad),(pad,pad)), mode='constant')

class Conv2D:
    def __init__(self, in_ch, out_ch, k, stride=1, padding=0, W=None, b=None, rng=None):
        self.in_ch, self.out_ch = in_ch, out_ch
        self.k, self.stride, self.padding = k, stride, padding
        self.W = kaiming_normal((out_ch, in_ch, k, k), rng=rng) if W is None else W.astype(np.float32)
        self.b = np.zeros((out_ch,), dtype=np.float32) if b is None else b.astype(np.float32)

    def __call__(self, x):
        # x: (N,C,H,W)
        x = pad2d(x, self.padding)
        N, C, H, W = x.shape
        KH = KW = self.k
        # (N, C, H-KH+1, W-KW+1, KH, KW)
        win = sliding_window_view(x, (KH, KW), axis=(-2, -1))
        win = win[:, :, ::self.stride, ::self.stride, :, :]     # stride
        N, C, H_out, W_out, _, _ = win.shape
        # reshape to GEMM: (N, H_out*W_out, C*KH*KW)
        cols = win.transpose(0,2,3,1,4,5).reshape(N, H_out*W_out, C*KH*KW)
        W_col = self.W.reshape(self.out_ch, C*KH*KW)            # (C_out, C*KH*KW)
        out = np.matmul(cols, W_col.T) + self.b.reshape(1,1,-1) # (N, H_out*W_out, C_out)
        return out.transpose(0,2,1).reshape(N, self.out_ch, H_out, W_out)

class ReLU:
    def __call__(self, x): return np.maximum(x, 0)

class MaxPool2D:
    def __init__(self, k=3, stride=2): self.k, self.stride = k, stride
    def __call__(self, x):
        N, C, H, W = x.shape
        KH = KW = self.k
        win = sliding_window_view(x, (KH, KW), axis=(-2, -1))    # (N,C,H-KH+1,W-KW+1,KH,KW)
        win = win[:, :, ::self.stride, ::self.stride, :, :]
        return win.max(axis=(-2, -1))                            # max over KH,KW

class Dropout:
    def __init__(self, p=0.5): self.p = p
    def __call__(self, x, train=False, rng=None):
        if not train or self.p <= 0.0: return x
        rng = rng or np.random
        mask = (rng.rand(*x.shape).astype(np.float32) > self.p).astype(np.float32)
        return x * mask / (1.0 - self.p)

class Flatten:  # (N,C,H,W) -> (N, C*H*W)
    def __call__(self, x): return x.reshape(x.shape[0], -1)

class Linear:
    def __init__(self, in_f, out_f, W=None, b=None, rng=None):
        self.W = xavier_normal((out_f, in_f), rng=rng) if W is None else W.astype(np.float32)
        self.b = np.zeros((out_f,), dtype=np.float32) if b is None else b.astype(np.float32)
    def __call__(self, x): return x @ self.W.T + self.b

class AlexNetNP:
    """
    NumPy-only AlexNet forward pass (no LRN). Input: (N,3,224,224)
    Matches torchvision's canonical AlexNet used for ImageNet-1K.
    """
    def __init__(self, num_classes=1000, seed=0):
        np.random.seed(seed)
        # Feature extractor
        self.conv1 = Conv2D(3,   64, k=11, stride=4, padding=2)
        self.relu1 = ReLU()
        self.pool1 = MaxPool2D(k=3, stride=2)

        self.conv2 = Conv2D(64, 192, k=5, stride=1, padding=2)
        self.relu2 = ReLU()
        self.pool2 = MaxPool2D(k=3, stride=2)

        self.conv3 = Conv2D(192, 384, k=3, stride=1, padding=1)
        self.relu3 = ReLU()
        self.conv4 = Conv2D(384, 256, k=3, stride=1, padding=1)
        self.relu4 = ReLU()
        self.conv5 = Conv2D(256, 256, k=3, stride=1, padding=1)
        self.relu5 = ReLU()
        self.pool5 = MaxPool2D(k=3, stride=2)

        self.flatten = Flatten()

        # Classifier
        self.drop1 = Dropout(0.5)
        self.fc1   = Linear(256*6*6, 4096)
        self.relu6 = ReLU()
        self.drop2 = Dropout(0.5)
        self.fc2   = Linear(4096, 4096)
        self.relu7 = ReLU()
        self.fc3   = Linear(4096, num_classes)

    def forward(self, x, train=False):
        x = self.conv1(x); x = self.relu1(x); x = self.pool1(x)
        x = self.conv2(x); x = self.relu2(x); x = self.pool2(x)
        x = self.conv3(x); x = self.relu3(x)
        x = self.conv4(x); x = self.relu4(x)
        x = self.conv5(x); x = self.relu5(x); x = self.pool5(x)   # -> (N,256,6,6)
        x = self.flatten(x)                                       # -> (N,9216)
        x = self.drop1(x, train=train); x = self.fc1(x); x = self.relu6(x)
        x = self.drop2(x, train=train); x = self.fc2(x); x = self.relu7(x)
        x = self.fc3(x)                                           # logits
        return x

    def load_from_npz(self, npz_path):
        data = np.load(npz_path)
        # features
        self.conv1.W, self.conv1.b = data['features.0.weight'],  data['features.0.bias']
        self.conv2.W, self.conv2.b = data['features.3.weight'],  data['features.3.bias']
        self.conv3.W, self.conv3.b = data['features.6.weight'],  data['features.6.bias']
        self.conv4.W, self.conv4.b = data['features.8.weight'],  data['features.8.bias']
        self.conv5.W, self.conv5.b = data['features.10.weight'], data['features.10.bias']
        # classifier
        self.fc1.W, self.fc1.b     = data['classifier.1.weight'], data['classifier.1.bias']
        self.fc2.W, self.fc2.b     = data['classifier.4.weight'], data['classifier.4.bias']
        self.fc3.W, self.fc3.b     = data['classifier.6.weight'], data['classifier.6.bias']

Export Alexnet Weights

In [4]:
# Export torchvision AlexNet weights to .npz and load into NumPy AlexNet
import numpy as np
import torch
from torchvision.models import alexnet, AlexNet_Weights

# Export (once)
torch_model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1).eval()
sd = torch_model.state_dict()
np.savez("alexnet_imagenet1k_v1.npz", **{k: v.cpu().numpy() for k,v in sd.items()})
print("Saved alexnet_imagenet1k_v1.npz")

# Load NumPy model + weights
model_np = AlexNetNP(num_classes=1000, seed=42)
model_np.load_from_npz("alexnet_imagenet1k_v1.npz")
print("NumPy model ready.")

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


100%|██████████| 233M/233M [00:01<00:00, 207MB/s]


Saved alexnet_imagenet1k_v1.npz
NumPy model ready.


Evaluate Numpy Alexnet on Validation Dataset

In [5]:
# Evaluation loop (Top-1 / Top-5) with NumPy model
from tqdm import tqdm
import numpy as np

def topk_hits(logits, labels, k):
    # logits: (N,1000) float32; labels: (N,) int64
    if k == 1:
        preds = np.argmax(logits, axis=1)
        return (preds == labels).sum()
    # partial top-k (unordered) is faster than full sort
    topk = np.argpartition(-logits, kth=k-1, axis=1)[:, :k]
    # membership test
    return sum(lbl in row for lbl, row in zip(labels, topk))

NUM_VAL_SAMPLES = None  # set to None for full 50k (slow on CPU)

top1 = top5 = n = 0
seen = 0

for xb, yb in tqdm(val_loader_np, total=(len(val_ds) // val_loader_np.batch_size) + 1):
    if NUM_VAL_SAMPLES is not None and seen >= NUM_VAL_SAMPLES:
        break
    # Trim last batch if we would exceed the cap
    if NUM_VAL_SAMPLES is not None and seen + yb.shape[0] > NUM_VAL_SAMPLES:
        keep = NUM_VAL_SAMPLES - seen
        xb, yb = xb[:keep], yb[:keep]

    logits = model_np.forward(xb, train=False)  # NumPy forward
    top1 += topk_hits(logits, yb, k=1)
    top5 += topk_hits(logits, yb, k=5)
    n += yb.shape[0]; seen += yb.shape[0]

print(f"[NumPy AlexNet] Evaluated {n} samples "
      f"| Top-1: {top1/n:.4f} | Top-5: {top5/n:.4f}")
print("\n")
print(f"Validation Accuracy: {top1/n:.4f}")

100%|██████████| 98/98 [21:28<00:00, 13.15s/it]

[NumPy AlexNet] Evaluated 50000 samples | Top-1: 0.5656 | Top-5: 0.7908


Validation Accuracy: 0.5656



