In [None]:
# ========================================
# 🧠 Cell 1 — Train Custom CNN Model
# ========================================
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ======================
# Transforms
# ======================
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(224, scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ======================
# Datasets & Loaders
# ======================
data_dir = "dataset"

train_ds = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform)
val_ds   = datasets.ImageFolder(os.path.join(data_dir, "val"), transform=val_transform)
test_ds  = datasets.ImageFolder(os.path.join(data_dir, "test"), transform=val_transform)

batch_size = 64

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4)

class_names = train_ds.classes
print("Classes:", class_names)

# ======================
# Model Definition
# ======================
class AntiSpoofNet(nn.Module):
    def __init__(self, num_classes=2):
        super(AntiSpoofNet, self).__init__()
        self.features = nn.Sequential(
            # block 1
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 2
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 3
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 4
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 5
            nn.Conv2d(256, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512 * 7 * 7, 512),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# ======================
# Training setup
# ======================
model = AntiSpoofNet(num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

# ======================
# Training Loop
# ======================
best_val_acc = 0.0
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    
    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]")
    for imgs, labels in loop:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        loop.set_postfix(loss=loss.item(), acc=100*correct/total)

    # Validation
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100 * val_correct / val_total
    scheduler.step(val_acc)
    print(f"Epoch {epoch+1} done. Train Acc: {100*correct/total:.2f}% | Val Acc: {val_acc:.2f}%")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_customcnn.pth")
        print("✅ Saved best model")

print("Training complete. Best Val Accuracy:", best_val_acc)

In [None]:
# ========================================
# 🧪 Cell 2 — Test on test set
# ========================================
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

model = AntiSpoofNet(num_classes=2).to(device)
model.load_state_dict(torch.load("best_customcnn.pth", map_location=device))
model.eval()

all_labels = []
all_preds = []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc="Testing"):
        imgs = imgs.to(device)
        outputs = model(imgs)
        _, preds = torch.max(outputs, 1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

cm = confusion_matrix(all_labels, all_preds)
report = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True)

real_acc = report["real"]["recall"] * 100
spoof_acc = report["spoof"]["recall"] * 100
overall_acc = (np.trace(cm) / np.sum(cm)) * 100

print(f"\n✅ Overall Accuracy: {overall_acc:.2f}%")
print(f"🎭 Real Accuracy: {real_acc:.2f}%")
print(f"🕵️ Spoof Accuracy: {spoof_acc:.2f}%")

# Plot confusion matrix
plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix (Test Set)")
plt.show()

In [None]:
# ========================================
# 🚀 Cell 3 — Evaluate on Custom Folder
# ========================================
custom_dir = "custom_folder"  # <-- Your custom unseen dataset folder

custom_ds = datasets.ImageFolder(custom_dir, transform=val_transform)
custom_loader = DataLoader(custom_ds, batch_size=batch_size, shuffle=False, num_workers=4)
print("Custom classes:", custom_ds.classes)

all_labels = []
all_preds = []

model.eval()
with torch.no_grad():
    for imgs, labels in tqdm(custom_loader, desc="Evaluating Custom Folder"):
        imgs = imgs.to(device)
        outputs = model(imgs)
        _, preds = torch.max(outputs, 1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

cm_custom = confusion_matrix(all_labels, all_preds)
report_custom = classification_report(all_labels, all_preds, target_names=custom_ds.classes, output_dict=True)

real_acc = report_custom["real"]["recall"] * 100
spoof_acc = report_custom["spoof"]["recall"] * 100
overall_acc = (np.trace(cm_custom) / np.sum(cm_custom)) * 100

print(f"\n🧾 Custom Folder Evaluation Results:")
print(f"✅ Overall Accuracy: {overall_acc:.2f}%")
print(f"🎭 Real Accuracy: {real_acc:.2f}%")
print(f"🕵️ Spoof Accuracy: {spoof_acc:.2f}%")

plt.figure(figsize=(5, 4))
sns.heatmap(cm_custom, annot=True, fmt='d', cmap='Greens',
            xticklabels=custom_ds.classes, yticklabels=custom_ds.classes)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix (Custom Folder)")
plt.show()

In [None]:
# ===========================
# Robust Webcam Inference Cell
# ===========================
# Paste into one Jupyter notebook cell and run.
# Make sure your model weights "best_customcnn.pth" are in the working directory (or change path below).

import cv2
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from collections import deque
from tqdm import tqdm
import math
import sys

# -----------------------
# Device
# -----------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -----------------------
# Model architecture (must match your trained model)
# -----------------------
class AntiSpoofNet(nn.Module):
    def __init__(self, num_classes=2):
        super(AntiSpoofNet, self).__init__()
        self.features = nn.Sequential(
            # block 1
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 2
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 3
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 4
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),

            # block 5
            nn.Conv2d(256, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512 * 7 * 7, 512),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# -----------------------
# Load model weights
# -----------------------
model = AntiSpoofNet(num_classes=2).to(device)
weights_path = "best_customcnn.pth"  # change if needed
state = torch.load(weights_path, map_location=device)
try:
    model.load_state_dict(state)
except Exception as e:
    # permissive load in case state dict nested or saved differently
    if 'state_dict' in state:
        model.load_state_dict(state['state_dict'])
    else:
        raise e
model.eval()
print("Loaded model weights from", weights_path)

# -----------------------
# Transforms (same normalization used during training)
# -----------------------
val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# -----------------------
# Face detector: try RetinaFace, fallback to MTCNN
# -----------------------
use_retina = False
detector = None
try:
    from retinaface import RetinaFace
    use_retina = True
    print("Using RetinaFace for detection (retinaface package).")
except Exception as e:
    print("RetinaFace not available, falling back to MTCNN from facenet-pytorch.")
    try:
        from facenet_pytorch import MTCNN
        detector = MTCNN(keep_all=True, device=device if str(device).startswith("cuda") else "cpu")
        print("Using MTCNN for detection.")
    except Exception as e2:
        raise RuntimeError("No face detector available. Install retinaface or facenet-pytorch.") from e2

# -----------------------
# Utilities: IoU, expand box, FFT score
# -----------------------
def iou(boxA, boxB):
    # boxes in (x1,y1,x2,y2)
    xA = max(boxA[0], boxB[0]); yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2]); yB = min(boxA[3], boxB[3])
    interW = max(0, xB - xA); interH = max(0, yB - yA)
    inter = interW * interH
    boxAArea = max(0, (boxA[2]-boxA[0])) * max(0, (boxA[3]-boxA[1]))
    boxBArea = max(0, (boxB[2]-boxB[0])) * max(0, (boxB[3]-boxB[1]))
    den = boxAArea + boxBArea - inter
    return inter / den if den > 0 else 0.0

def expand_box(box, scale, img_w, img_h):
    x1,y1,x2,y2 = box
    w = x2 - x1; h = y2 - y1
    cx = x1 + w/2; cy = y1 + h/2
    nw = w * scale; nh = h * scale
    nx1 = int(max(0, cx - nw/2)); ny1 = int(max(0, cy - nh/2))
    nx2 = int(min(img_w, cx + nw/2)); ny2 = int(min(img_h, cy + nh/2))
    return (nx1, ny1, nx2, ny2)

def fft_spoof_score(bgr_crop):
    # Returns a heuristic 0..1 score: higher -> more "spoof-like" spectral signature
    gray = cv2.cvtColor(bgr_crop, cv2.COLOR_BGR2GRAY)
    # small blur to reduce noise bias
    gray = cv2.GaussianBlur(gray, (3,3), 0)
    f = np.fft.fft2(gray)
    fshift = np.fft.fftshift(f)
    magnitude = np.abs(fshift)
    # avoid log(0)
    mag = np.log1p(magnitude)
    h, w = mag.shape
    # center low-freq region radius
    cx, cy = w//2, h//2
    # create mask for high frequency region
    Y, X = np.ogrid[:h, :w]
    r = np.sqrt((X - cx)**2 + (Y - cy)**2)
    # threshold radius: 25% of max dimension
    rad = max(h, w) * 0.25
    hf_mask = (r > rad)
    hf_energy = mag[hf_mask].sum()
    total_energy = mag.sum() + 1e-8
    hf_ratio = hf_energy / total_energy
    # some heuristics: screens and prints often create higher periodic energy -> higher hf_ratio
    # scale ratio to 0..1 (expected hf_ratio ~0.05..0.5)
    score = np.clip((hf_ratio - 0.05) / (0.45 - 0.05), 0.0, 1.0)
    return float(score)

# -----------------------
# Tracking small state per face with simple IOU linking
# -----------------------
class Track:
    def __init__(self, box, track_id):
        self.box = box
        self.id = track_id
        self.last_seen = 0
        self.pred_queue = deque(maxlen=7)   # store recent softmax probabilities (spoof prob)
        self.last_center = ((box[0]+box[2])/2, (box[1]+box[3])/2)

tracks = []
next_track_id = 0
FRAME_COUNTER = 0
IOU_MATCH_THRESH = 0.35

# -----------------------
# Inference helpers
# -----------------------
softmax = torch.nn.Softmax(dim=1)

def predict_on_crop(bgr_crop, upsample_small=True):
    # bgr_crop: numpy BGR image
    # returns spoof_prob (float 0..1)
    h, w = bgr_crop.shape[:2]
    # if very small, upsample to keep texture information (won't add info but matches training scale)
    if upsample_small and max(h,w) < 80:
        scale = int(np.ceil(224 / max(h,w)))
        bgr_crop = cv2.resize(bgr_crop, (w*scale, h*scale), interpolation=cv2.INTER_CUBIC)
    img_t = val_transform(bgr_crop)  # PIL conversion inside transform
    img_t = img_t.unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(img_t)
        probs = softmax(logits).cpu().numpy()[0]  # [prob_real, prob_spoof] if your training order is same
    # NOTE: depending on your train label order, index 1 may correspond to 'spoof'
    # We assume ImageFolder ordering: class_names = ['real','spoof'] so index 1 = spoof
    spoof_prob = float(probs[1])
    return spoof_prob

def multi_scale_predict(frame, box, scales=[1.0, 1.5], fft_weight=0.35):
    # For a detected face box, run multiple scale crops and average results
    h_img, w_img = frame.shape[:2]
    probs = []
    for s in scales:
        bx = expand_box(box, s, w_img, h_img)
        crop = frame[bx[1]:bx[3], bx[0]:bx[2]]
        if crop.size == 0:
            continue
        p = predict_on_crop(crop)
        probs.append(p)
    if len(probs) == 0:
        avg_p = 0.0
    else:
        avg_p = float(np.mean(probs))
    # compute fft score on base box (scale=1.0) to provide spectral cue
    bx0 = expand_box(box, 1.0, w_img, h_img)
    crop0 = frame[bx0[1]:bx0[3], bx0[0]:bx0[2]]
    if crop0.size == 0:
        fft_score = 0.0
    else:
        fft_score = fft_spoof_score(crop0)
    # combine: final_spoof_prob = (1 - alpha)*cnn + alpha*fft_score
    alpha = fft_weight
    final_spoof = alpha * fft_score + (1 - alpha) * avg_p
    return final_spoof, avg_p, fft_score

# -----------------------
# Video capture and main loop
# -----------------------
cap = cv2.VideoCapture(0)  # change index if you have multiple cams
if not cap.isOpened():
    raise RuntimeError("Cannot open camera. Change camera index or check permissions.")

print("Starting webcam. Press 'q' to quit.")
fps_time = time.time()
while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to read frame")
        break
    FRAME_COUNTER += 1
    h_img, w_img = frame.shape[:2]

    # ---------- 1) Detect faces ----------
    boxes = []
    if use_retina:
        try:
            # RetinaFace.detect_faces returns dict keyed by idx with 'facial_area'
            faces = RetinaFace.detect_faces(frame)
            if isinstance(faces, dict):
                for k, v in faces.items():
                    if 'facial_area' in v:
                        x1,y1,x2,y2 = v['facial_area']
                        # ensure ints
                        boxes.append((int(x1), int(y1), int(x2), int(y2)))
            elif faces is None:
                boxes = []
        except Exception as e:
            # fallback to MTCNN
            if detector is not None:
                bbs = detector.detect(frame)
                if bbs[0] is not None:
                    for bb in bbs[0]:
                        x1,y1,x2,y2 = bb
                        boxes.append((int(x1), int(y1), int(x2), int(y2)))
    else:
        # MTCNN path
        try:
            bbs, _ = detector.detect(frame)
            if bbs is not None:
                for bb in bbs:
                    x1,y1,x2,y2 = bb
                    boxes.append((int(x1), int(y1), int(x2), int(y2)))
        except Exception as e:
            boxes = []

    # ---------- 2) Track linking (simple IOU) ----------
    # mark all existing tracks as not updated
    for tr in tracks:
        tr.last_seen += 1

    assigned = []
    new_tracks = []
    for box in boxes:
        # find best matching existing track by IOU
        best_iou = 0.0
        best_track = None
        for tr in tracks:
            i = iou(box, tr.box)
            if i > best_iou:
                best_iou = i
                best_track = tr
        if best_iou >= IOU_MATCH_THRESH and best_track is not None:
            # update track
            best_track.box = box
            best_track.last_seen = 0
            best_track.last_center = ((box[0]+box[2])/2, (box[1]+box[3])/2)
            assigned.append(box)
        else:
            # create new track
            global next_track_id
            tr = Track(box, next_track_id)
            next_track_id += 1
            tracks.append(tr)
            assigned.append(box)

    # remove old tracks > 30 frames not seen
    tracks = [tr for tr in tracks if tr.last_seen <= 30]

    # ---------- 3) Per-face prediction and smoothing ----------
    labels_for_draw = []
    for tr in tracks:
        # compute combined spoof probability via multi-scale cnn + fft
        spoof_prob, cnn_p, fft_p = multi_scale_predict(frame, tr.box, scales=[1.0, 1.5], fft_weight=0.35)
        # push into track queue and compute smoothed prob
        tr.pred_queue.append(spoof_prob)
        smoothed_spoof = float(np.mean(tr.pred_queue))
        # also compute variance; if high variance, reduce confidence
        var = float(np.var(tr.pred_queue)) if len(tr.pred_queue) > 0 else 0.0
        # compute final confidence
        # convert to label: threshold 0.5 -> spoof
        label = "SPOOF" if smoothed_spoof >= 0.5 else "REAL"
        conf = smoothed_spoof if label=="SPOOF" else (1.0 - smoothed_spoof)
        # reduce confidence if track is short or high variance or small box
        box_w = tr.box[2]-tr.box[0]; box_h = tr.box[3]-tr.box[1]
        if len(tr.pred_queue) < 3:
            conf *= 0.7
        if var > 0.02:
            conf *= 0.85
        if max(box_w, box_h) < 80:
            conf *= 0.8

        labels_for_draw.append((tr.box, label, conf, smoothed_spoof, cnn_p, fft_p, tr.id))

    # ---------- 4) Draw boxes and labels ----------
    # draw in-frame
    for box, label, conf, smoothed_spoof, cnn_p, fft_p, tid in labels_for_draw:
        x1,y1,x2,y2 = box
        color = (0,255,0) if label=="REAL" else (0,0,255)
        thickness = 2
        cv2.rectangle(frame, (x1,y1), (x2,y2), color, thickness)
        text = f"ID:{tid} {label} {conf*100:.0f}%"
        cv2.putText(frame, text, (x1, max(15, y1-8)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
        # small debug overlay: cnn prob and fft
        dbg = f"CNN:{cnn_p:.2f} FFT:{fft_p:.2f}"
        cv2.putText(frame, dbg, (x1, y2+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200,200,200), 1)

    # ---------- 5) Show frame and handle quit ----------
    cv2.imshow("AntiSpoof Live", frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

# cleanup
cap.release()
cv2.destroyAllWindows()
print("Done.")