In [18]:
# Cell 1: imports and configuration 
from pathlib import Path
import os, random, numpy as np, pandas as pd
import torch, torch.nn as nn, torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm
from PIL import Image
from torchsummary import summary

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# Device (CPU only)
DEVICE = "cpu"
print("Running on:", DEVICE)

Running on: cpu


In [3]:
# Cell 2: project paths
NB_DIR = Path.cwd()              
PROJ_ROOT = NB_DIR.parent         

DATA_DIR = PROJ_ROOT / "data"
CLASS_DIR = DATA_DIR / "classification_data"
TRAIN_DIR = CLASS_DIR / "train_data"
VAL_DIR   = CLASS_DIR / "val_data"
TEST_DIR  = CLASS_DIR / "test_data"

MODEL_DIR = PROJ_ROOT / "models"
ARTIFACT_DIR = PROJ_ROOT / "artifacts"
MODEL_DIR.mkdir(exist_ok=True)
ARTIFACT_DIR.mkdir(exist_ok=True)

print("Project root:", PROJ_ROOT)
print("Data:", DATA_DIR.exists())
print("Models folder:", MODEL_DIR.exists())
print("Artifacts folder:", ARTIFACT_DIR.exists())


Project root: d:\study\cos30082\final_asm
Data: True
Models folder: True
Artifacts folder: True


In [4]:
#  Cell 3: data transforms and loaders
IMG_SIZE = 160
BATCH_SIZE = 64  # moderate batch size for CPU

train_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

val_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

train_ds = datasets.ImageFolder(TRAIN_DIR, transform=train_tf)
val_ds   = datasets.ImageFolder(VAL_DIR,   transform=val_tf)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

num_classes = len(train_ds.classes)
print(f"Classes: {num_classes}")
print(f"Train images: {len(train_ds)}")
print(f"Val images: {len(val_ds)}")


Classes: 4000
Train images: 380638
Val images: 8000


In [5]:
# Cell 4: verify one sample
img, label = train_ds[0]
print("Image tensor shape:", img.shape)
print("Label index:", label, "->", train_ds.classes[label])


Image tensor shape: torch.Size([3, 160, 160])
Label index: 0 -> n000003


In [None]:
# Cell 5 — Model architecture (pretrained + fine-tuning)
from torchvision.models import ResNet18_Weights

class FaceNetSoftmax(nn.Module):
    def __init__(self, num_classes, emb_dim=128):
        super().__init__()
        backbone = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
        feat_dim = backbone.fc.in_features
        # Fine-tune only higher layers for speed/accuracy trade-off
        for name, param in backbone.named_parameters():
            param.requires_grad = False
            if "layer4" in name or "bn1" in name or "conv1" in name:
                param.requires_grad = True
        backbone.fc = nn.Identity()
        self.backbone = backbone
        self.embed = nn.Linear(feat_dim, emb_dim)
        self.classifier = nn.Linear(emb_dim, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x, return_embedding=False):
        f = self.backbone(x)
        e = nn.functional.normalize(self.embed(f))
        logits = self.classifier(e)
        probs = self.softmax(logits)
        if return_embedding:
            return e
        return logits, probs


In [19]:
# Cell 6 — Initialize model and load previous weights safely
EMB_DIM = 128
model = FaceNetSoftmax(num_classes=num_classes, emb_dim=EMB_DIM).to(DEVICE)

MODEL_PATH = MODEL_DIR / "classifier_embed_resnet18_full_cpu.pt"  # your previous checkpoint
if MODEL_PATH.exists():
    state = torch.load(MODEL_PATH, map_location=DEVICE)
    model.load_state_dict(state, strict=False)  # ignores new softmax layer
    print("Loaded previous weights (added Softmax for compliance)")
else:
    print("No checkpoint found. Training from scratch.")

summary(model, input_size=(3, 160, 160), device="cpu")

Loaded previous weights (added Softmax for compliance)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 80, 80]           9,408
       BatchNorm2d-2           [-1, 64, 80, 80]             128
              ReLU-3           [-1, 64, 80, 80]               0
         MaxPool2d-4           [-1, 64, 40, 40]               0
            Conv2d-5           [-1, 64, 40, 40]          36,864
       BatchNorm2d-6           [-1, 64, 40, 40]             128
              ReLU-7           [-1, 64, 40, 40]               0
            Conv2d-8           [-1, 64, 40, 40]          36,864
       BatchNorm2d-9           [-1, 64, 40, 40]             128
             ReLU-10           [-1, 64, 40, 40]               0
       BasicBlock-11           [-1, 64, 40, 40]               0
           Conv2d-12           [-1, 64, 40, 40]          36,864
      BatchNorm2d-13           [-1, 64, 40, 40] 

In [9]:
# Cell 7 — Training loop
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
EPOCHS = 2  # short fine-tune to adjust new softmax layer

for epoch in range(EPOCHS):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}", ncols=90):
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        logits, probs = model(imgs)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        preds = probs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    train_acc = correct / total
    print(f"Epoch {epoch+1}: loss={running_loss/len(train_loader):.4f} | train_acc={train_acc:.4f}")


Epoch 1/2: 100%|████████████████████████████████████| 5948/5948 [1:58:22<00:00,  1.19s/it]


Epoch 1: loss=2.1436 | train_acc=0.5800


Epoch 2/2: 100%|████████████████████████████████████| 5948/5948 [1:59:21<00:00,  1.20s/it]

Epoch 2: loss=1.9249 | train_acc=0.6201





In [10]:
# Cell 8 — Validation accuracy
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        logits, probs = model(imgs)
        preds = probs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
val_acc = correct / total if total > 0 else 0
print(f"Validation accuracy: {val_acc:.4f}")


Validation accuracy: 0.5040


In [11]:
# Cell 9 — Save compliant model
SAVE_PATH = MODEL_DIR / "classifier_embed_resnet18_softmax_cpu.pt"
torch.save(model.state_dict(), SAVE_PATH)
print("Saved model with explicit softmax →", SAVE_PATH)


Saved model with explicit softmax → d:\study\cos30082\final_asm\models\classifier_embed_resnet18_softmax_cpu.pt


In [12]:
# Cell 10 — Embedding extraction utilities
def embed_image(path):
    model.eval()
    x = val_tf(Image.open(path).convert("RGB")).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        emb = model(x, return_embedding=True).cpu().numpy().squeeze()
    return emb


In [34]:
# Cell 11 — Test embedding extraction & similarity
from random import choice
from numpy.linalg import norm
import numpy as np

def cosine_similarity(e1, e2):
    return np.dot(e1, e2) / (norm(e1) * norm(e2))

# pick random samples
cls1, cls2 = choice(train_ds.classes), choice(train_ds.classes)
p1 = next((TRAIN_DIR / cls1).glob("*"))
p2 = next((TRAIN_DIR / cls2).glob("*"))
e1, e2 = embed_image(p1), embed_image(p2)
print(f"Different people similarity: {cosine_similarity(e1, e2):.4f}")

# same person test
cls = choice(train_ds.classes)
imgs = list((TRAIN_DIR / cls).glob("*"))
if len(imgs) >= 2:
    e1, e2 = embed_image(imgs[0]), embed_image(imgs[1])
    print(f"Same person similarity: {cosine_similarity(e1, e2):.4f}")


Different people similarity: -0.0632
Same person similarity: 0.8728
