In [1]:
import kagglehub
import os

# =============================
# Download FaceForensics++ (FF++)
# =============================
ffpp_path = kagglehub.dataset_download("xdxd003/ff-c23")
print("✅ Path to FF++ dataset:", ffpp_path)

# Show the folder structure of FF++
for root, dirs, files in os.walk(ffpp_path):
    level = root.replace(ffpp_path, "").count(os.sep)
    indent = " " * 4 * (level)
    print(f"{indent}{os.path.basename(root)}/")
    subindent = " " * 4 * (level + 1)
    for f in files[:10]:  # show only first 10 files per directory
        print(f"{subindent}{f}")
    if len(files) > 10:
        print(f"{subindent}... ({len(files)} files total)")

# =============================
# Download CASIA-FASD
# =============================
casia_path = kagglehub.dataset_download("minhnh2107/casiafasd")
print("\n✅ Path to CASIA-FASD dataset:", casia_path)

# Show the folder structure of CASIA-FASD
for root, dirs, files in os.walk(casia_path):
    level = root.replace(casia_path, "").count(os.sep)
    indent = " " * 4 * (level)
    print(f"{indent}{os.path.basename(root)}/")
    subindent = " " * 4 * (level + 1)
    for f in files[:10]:  # show only first 10 files per directory
        print(f"{subindent}{f}")
    if len(files) > 10:
        print(f"{subindent}... ({len(files)} files total)")

✅ Path to FF++ dataset: C:\Users\mujta\.cache\kagglehub\datasets\xdxd003\ff-c23\versions\1
1/
    FaceForensics++_C23/
        csv/
            DeepFakeDetection.csv
            Deepfakes.csv
            Face2Face.csv
            FaceShifter.csv
            FaceSwap.csv
            FF++_Metadata.csv
            FF++_Metadata_Shuffled.csv
            Mean_Data.csv
            NeuralTextures.csv
            original.csv
        DeepFakeDetection/
            01_02__meeting_serious__YVGY8LOK.mp4
            01_02__outside_talking_still_laughing__YVGY8LOK.mp4
            01_02__talking_against_wall__YVGY8LOK.mp4
            01_02__walking_down_indoor_hall_disgust__YVGY8LOK.mp4
            01_02__walk_down_hall_angry__YVGY8LOK.mp4
            01_03__hugging_happy__ISF9SP4G.mp4
            01_03__kitchen_pan__JZUXXFRB.mp4
            01_03__podium_speech_happy__480LQD1C.mp4
            01_03__talking_against_wall__JZUXXFRB.mp4
            01_04__hugging_happy__GBC7ZGDP.mp4
            ... (1

100%|█████████████████████████████████████████████████████████████████████████████| 70.6M/70.6M [00:07<00:00, 9.98MB/s]

Extracting files...






✅ Path to CASIA-FASD dataset: C:\Users\mujta\.cache\kagglehub\datasets\minhnh2107\casiafasd\versions\1
1/
    test_img/
        test_img/
            color/
                10_1.avi_100_real.jpg
                10_1.avi_125_real.jpg
                10_1.avi_25_real.jpg
                10_1.avi_50_real.jpg
                10_1.avi_75_real.jpg
                10_2.avi_100_real.jpg
                10_2.avi_125_real.jpg
                10_2.avi_150_real.jpg
                10_2.avi_175_real.jpg
                10_2.avi_25_real.jpg
                ... (2408 files total)
            depth/
                10_1.avi_100_real.jpg
                10_1.avi_125_real.jpg
                10_1.avi_25_real.jpg
                10_1.avi_50_real.jpg
                10_1.avi_75_real.jpg
                10_2.avi_100_real.jpg
                10_2.avi_125_real.jpg
                10_2.avi_150_real.jpg
                10_2.avi_175_real.jpg
                10_2.avi_25_real.jpg
                ... (2408 files 

In [2]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import random

# ======================
# CONFIG
# ======================
ffpp_path = r"C:\Users\mujta\.cache\kagglehub\datasets\xdxd003\ff-c23\versions\1\FaceForensics++_C23"
casia_path = r"C:\Users\mujta\.cache\kagglehub\datasets\minhnh2107\casiafasd\versions\1"

output_data = "processed_dataset"  # final dataset folder
os.makedirs(output_data, exist_ok=True)

# Target size for Inception
IMG_SIZE = 299

# ======================
# HELPER: Extract frames from videos
# ======================
def extract_frames(video_path, save_dir, label, max_frames=10):
    os.makedirs(save_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = sorted(random.sample(range(total_frames), min(max_frames, total_frames)))

    for i, idx in enumerate(frame_indices):
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(frame)
            img = img.resize((IMG_SIZE, IMG_SIZE))
            img.save(os.path.join(save_dir, f"{label}_{i}.jpg"))
    cap.release()

# ======================
# PREPARE DATASET
# ======================

# FF++ REAL (original videos)
real_out = os.path.join(output_data, "real")
deepfake_out = os.path.join(output_data, "deepfake")
os.makedirs(real_out, exist_ok=True)
os.makedirs(deepfake_out, exist_ok=True)

print("Extracting FF++ REAL...")
for f in os.listdir(os.path.join(ffpp_path, "original"))[:50]:  # limit for testing
    video_path = os.path.join(ffpp_path, "original", f)
    extract_frames(video_path, real_out, "real")

print("Extracting FF++ DEEPFAKE...")
for folder in ["Deepfakes", "Face2Face", "FaceSwap", "FaceShifter", "NeuralTextures"]:
    for f in os.listdir(os.path.join(ffpp_path, folder))[:50]:
        video_path = os.path.join(ffpp_path, folder, f)
        extract_frames(video_path, deepfake_out, "deepfake")

print("Copying CASIA-FASD (as REAL)...")
casia_train = os.path.join(casia_path, "train_img", "train_img", "color")
for f in os.listdir(casia_train)[:500]:
    img = Image.open(os.path.join(casia_train, f)).convert("RGB").resize((IMG_SIZE, IMG_SIZE))
    img.save(os.path.join(real_out, f"casia_{f}"))

Extracting FF++ REAL...
Extracting FF++ DEEPFAKE...
Copying CASIA-FASD (as REAL)...


In [3]:
# ======================
# DATALOADER
# ======================
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

dataset = datasets.ImageFolder(output_data, transform=transform)
train_loader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=2)

# ======================
# MODEL
# ======================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.inception_v3(pretrained=True)
model.aux_logits = False
model.fc = nn.Linear(model.fc.in_features, 2)  # binary classification
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# ======================
# TRAIN
# ======================
EPOCHS = 5
for epoch in range(EPOCHS):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    print(f"Epoch [{epoch+1}/{EPOCHS}] Loss: {running_loss/len(train_loader):.4f}, Acc: {100.*correct/total:.2f}%")



Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to C:\Users\mujta/.cache\torch\hub\checkpoints\inception_v3_google-0cc3c7bd.pth


100%|███████████████████████████████████████████████████████████████████████████████| 104M/104M [00:09<00:00, 11.2MB/s]


Epoch [1/5] Loss: 0.2303, Acc: 92.88%
Epoch [2/5] Loss: 0.0416, Acc: 98.08%
Epoch [3/5] Loss: 0.0338, Acc: 98.27%
Epoch [4/5] Loss: 0.0322, Acc: 98.46%
Epoch [5/5] Loss: 0.0291, Acc: 99.23%


In [25]:
def predict_face_image(img_path):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    face = mtcnn(img)
    if face is None:
        return "No face detected"
    
    # Ensure consistency: MTCNN → PIL → training transform
    face_img = transforms.ToPILImage()(face)
    face = transform(face_img).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(face)
        pred = torch.argmax(outputs, dim=1).item()
    return "DEEPFAKE" if pred == 0 else "REAL"


print(predict_face_image(r"C:\Users\mujta\OneDrive\Desktop\FaceSwap_10.jpg"))

REAL


In [20]:
print(predict_image(os.path.join(real_out, os.listdir(real_out)[0])))
print(predict_image(os.path.join(deepfake_out, os.listdir(deepfake_out)[0])))

print(predict_image(r"C:\Users\mujta\OneDrive\Desktop\DALL-E_0004.jpg"))

REAL
DEEPFAKE
REAL


In [11]:
def predict_own_image(img_path):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(img)
        pred = torch.argmax(outputs, dim=1).item()
    return "DEEPFAKE" if pred == 0 else "REAL"

In [16]:
my_image = r"C:\Users\mujta\OneDrive\Desktop\DALL-E_0004.jpg"  # <-- replace with your file
print("Prediction:", predict_own_image(my_image))

Prediction: REAL


In [None]:
# Pick a sample deepfake video from FF++
test_video = r"C:\Users\mujta\OneDrive\Desktop\me.jpg"

# Extract 1 frame from it
cap = cv2.VideoCapture(test_video)
cap.set(cv2.CAP_PROP_POS_FRAMES, 50)  # pick frame number 50
ret, frame = cap.read()
cap.release()

if ret:
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    test_img = Image.fromarray(frame).resize((IMG_SIZE, IMG_SIZE))
    test_img.save("deepfake_test.jpg")

print("Prediction:", predict_image("deepfake_test.jpg"))

In [26]:
from torch.utils.data import random_split

# total size of dataset
total_size = len(dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

# split dataset
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

print(f"Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")


Train size: 416, Test size: 104


In [27]:
model.eval()
correct, total = 0, 0
all_preds, all_labels = [], []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

acc = 100. * correct / total
print(f"Test Accuracy: {acc:.2f}%")

Test Accuracy: 99.04%


In [29]:
import kagglehub

# Download CASIA-FASD dataset
casia_path = kagglehub.dataset_download("minhnh2107/casiafasd")

print("✅ CASIA-FASD downloaded to:", casia_path)

# Check structure
import os

for root, dirs, files in os.walk(casia_path):
    level = root.replace(casia_path, '').count(os.sep)
    indent = ' ' * 4 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 4 * (level + 1)
    for f in files[:5]:  # show first 5 files per folder
        print(f"{subindent}{f}")

✅ CASIA-FASD downloaded to: C:\Users\mujta\.cache\kagglehub\datasets\minhnh2107\casiafasd\versions\1
1/
    test_img/
        test_img/
            color/
                10_1.avi_100_real.jpg
                10_1.avi_125_real.jpg
                10_1.avi_25_real.jpg
                10_1.avi_50_real.jpg
                10_1.avi_75_real.jpg
            depth/
                10_1.avi_100_real.jpg
                10_1.avi_125_real.jpg
                10_1.avi_25_real.jpg
                10_1.avi_50_real.jpg
                10_1.avi_75_real.jpg
    train_img/
        train_img/
            color/
                10_1.avi_100_real.jpg
                10_1.avi_25_real.jpg
                10_1.avi_50_real.jpg
                10_1.avi_75_real.jpg
                10_2.avi_100_real.jpg
            depth/
                10_1.avi_100_real.jpg
                10_1.avi_25_real.jpg
                10_1.avi_50_real.jpg
                10_1.avi_75_real.jpg
                10_2.avi_100_real.jpg


In [32]:
import os, shutil

casia_base = r"C:\Users\mujta\.cache\kagglehub\datasets\minhnh2107\casiafasd\versions\1"
output_path = os.path.join(casia_base, "processed_casia")

# source paths
train_src = os.path.join(casia_base, "train_img", "train_img", "color")
test_src  = os.path.join(casia_base, "test_img", "test_img", "color")

for split, src in [("train", train_src), ("test", test_src)]:
    real_dir = os.path.join(output_path, split, "real")
    fake_dir = os.path.join(output_path, split, "fake")
    os.makedirs(real_dir, exist_ok=True)
    os.makedirs(fake_dir, exist_ok=True)
    
    for file in os.listdir(src):
        if file.endswith(".jpg"):
            src_file = os.path.join(src, file)
            if "real" in file.lower():
                shutil.copy(src_file, os.path.join(real_dir, file))
            elif "fake" in file.lower():
                shutil.copy(src_file, os.path.join(fake_dir, file))

print("✅ CASIA reorganized into:", output_path)

✅ CASIA reorganized into: C:\Users\mujta\.cache\kagglehub\datasets\minhnh2107\casiafasd\versions\1\processed_casia


In [33]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

IMG_SIZE = 299

casia_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

casia_test_dataset = datasets.ImageFolder(
    os.path.join(output_path, "test"),
    transform=casia_transform
)

casia_test_loader = DataLoader(casia_test_dataset, batch_size=16, shuffle=False)

print("Classes:", casia_test_dataset.classes)   # should print ['fake', 'real']
print("Total Test Images:", len(casia_test_dataset))

Classes: ['fake', 'real']
Total Test Images: 2408


In [34]:
import torch
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# make sure model is in eval mode
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in casia_test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Metrics
acc = accuracy_score(all_labels, all_preds)
print(f"✅ CASIA Test Accuracy: {acc*100:.2f}%\n")

print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=casia_test_dataset.classes))

print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


✅ CASIA Test Accuracy: 24.54%

Classification Report:
              precision    recall  f1-score   support

        fake       0.00      0.00      0.00      1817
        real       0.25      1.00      0.39       591

    accuracy                           0.25      2408
   macro avg       0.12      0.50      0.20      2408
weighted avg       0.06      0.25      0.10      2408

Confusion Matrix:
[[   0 1817]
 [   0  591]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
# Cell 1 -- Setup + extract frames (FF++) + reorganize CASIA if not already done
import os, random, shutil, math
from pathlib import Path
from PIL import Image
import cv2
from tqdm import tqdm

# === CONFIG ===
ffpp_path = r"C:\Users\mujta\.cache\kagglehub\datasets\xdxd003\ff-c23\versions\1\FaceForensics++_C23"
casia_base = r"C:\Users\mujta\.cache\kagglehub\datasets\minhnh2107\casiafasd\versions\1"
output_data = r"C:\Users\mujta\processed_dataset_v2"   # change if you want
os.makedirs(output_data, exist_ok=True)

IMG_SIZE = 299
FRAMES_PER_VIDEO = 50      # increase to extract more frames (set <= total frames)
MAX_VIDEOS_PER_FOLDER = None  # None = use all, or set an integer for debugging

# Helper: evenly spaced frame indices
def pick_frame_indices(total_frames, k):
    if total_frames <= 0:
        return []
    k = min(k, total_frames)
    step = total_frames / k
    return [int(i * step) for i in range(k)]

# Extract frames (face cropping will be applied later via MTCNN during dataset)
def extract_frames_from_video(video_path, save_dir, prefix, frames_to_extract=50):
    os.makedirs(save_dir, exist_ok=True)
    cap = cv2.VideoCapture(str(video_path))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
    indices = pick_frame_indices(total_frames, frames_to_extract)
    saved = 0
    for i, idx in enumerate(indices):
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            continue
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame).resize((IMG_SIZE, IMG_SIZE))  # temporary resize; final face crop uses MTCNN
        fname = f"{prefix}_{Path(video_path).stem}_{i:03d}.jpg"
        img.save(os.path.join(save_dir, fname))
        saved += 1
    cap.release()
    return saved

# Prepare folders
real_out = os.path.join(output_data, "train", "real")
fake_out = os.path.join(output_data, "train", "fake")
os.makedirs(real_out, exist_ok=True)
os.makedirs(fake_out, exist_ok=True)

# 1) Extract from FF++ originals -> REAL
print("Extracting FF++ originals (REAL)...")
orig_dir = os.path.join(ffpp_path, "original")
orig_videos = sorted(os.listdir(orig_dir))
if MAX_VIDEOS_PER_FOLDER:
    orig_videos = orig_videos[:MAX_VIDEOS_PER_FOLDER]
for v in tqdm(orig_videos):
    p = os.path.join(orig_dir, v)
    extract_frames_from_video(p, real_out, "ffpp_orig", FRAMES_PER_VIDEO)

# 2) Extract from FF++ manipulated -> FAKE
manip_folders = ["Deepfakes", "Face2Face", "FaceSwap", "FaceShifter", "NeuralTextures"]
print("Extracting FF++ manipulated (DEEPFAKE)...")
for folder in manip_folders:
    folder_path = os.path.join(ffpp_path, folder)
    vids = sorted(os.listdir(folder_path))
    if MAX_VIDEOS_PER_FOLDER:
        vids = vids[:MAX_VIDEOS_PER_FOLDER]
    for v in tqdm(vids, desc=folder):
        p = os.path.join(folder_path, v)
        extract_frames_from_video(p, fake_out, f"ffpp_{folder}", FRAMES_PER_VIDEO)

# 3) Add CASIA images as REAL (use both train/test color sets)
print("Copying CASIA images (as REAL)...")
casia_color_train = os.path.join(casia_base, "train_img", "train_img", "color")
casia_color_test  = os.path.join(casia_base, "test_img", "test_img", "color")
for src in [casia_color_train, casia_color_test]:
    if not os.path.isdir(src):
        continue
    for f in os.listdir(src):
        if not f.lower().endswith(".jpg"):
            continue
        # treat both 'real' and 'fake' as REAL? No — in your spec earlier, CASIA spoofed reals should be real.
        # But CASIA contains *_fake.jpg naming (these are replay/print attacks) — earlier you wanted spoofed real to be REAL.
        # So treat *_fake.jpg from CASIA as REAL (presentation attacks = real). If you want them as fake, change logic.
        src_fp = os.path.join(src, f)
        # If filename indicates "real" -> copy to real; if "fake" -> also copy to real (per your requirement)
        target = real_out
        shutil.copy(src_fp, os.path.join(target, f"casia_{f}"))

print("Done extraction and CASIA copy. Train real images:", len(os.listdir(real_out)), "Train fake images:", len(os.listdir(fake_out)))

Extracting FF++ originals (REAL)...


 38%|█████████████████████████████▋                                               | 385/1000 [46:14<2:02:03, 11.91s/it]

In [None]:
# Cell 2 -- dataset + transforms + dataloaders
import torch
from torch.utils.data import Dataset, DataLoader, random_split, WeightedRandomSampler
from torchvision import transforms
from facenet_pytorch import MTCNN
from PIL import Image
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# MTCNN face detector; returns face tensors (C,H,W) in range 0-1
mtcnn = MTCNN(image_size=IMG_SIZE, margin=20, keep_all=False, device=device)

# Transforms: training with augmentation, validation/test only resize+norm
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.1, 0.1, 0.1, 0.05),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

class FaceFolderDataset(Dataset):
    """Loads images from folder structure root/real and root/fake.
       Uses MTCNN to crop the face. If no face found, falls back to center-crop.
    """
    def __init__(self, root_dir, transform=None, mtcnn=None):
        self.root_dir = root_dir
        self.transform = transform
        self.mtcnn = mtcnn
        self.samples = []
        for label_name, label_idx in [("real", 0), ("fake", 1)]:  # choose mapping: 0=real,1=fake
            folder = os.path.join(root_dir, label_name)
            if not os.path.isdir(folder):
                continue
            for f in os.listdir(folder):
                if f.lower().endswith((".jpg",".jpeg",".png")):
                    self.samples.append((os.path.join(folder, f), label_idx))
        # shuffle stable
        random.shuffle(self.samples)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert("RGB")
        face = None
        if self.mtcnn is not None:
            # mtcnn returns tensor or None
            try:
                face = self.mtcnn(img)  # tensor CxHxW in 0..1
            except Exception as e:
                face = None
        if face is not None:
            # convert back to PIL so same transforms apply
            face_img = transforms.ToPILImage()(face)
        else:
            # fallback: center crop from original
            w, h = img.size
            min_side = min(w,h)
            left = (w-min_side)//2
            top = (h-min_side)//2
            face_img = img.crop((left, top, left+min_side, top+min_side))
        if self.transform:
            face_img = self.transform(face_img)
        return face_img, label

# Create datasets
train_root = os.path.join(output_data, "train")
os.makedirs(train_root, exist_ok=True)  # should exist

# Create dataset, then split into train/val
full_dataset = FaceFolderDataset(train_root, transform=train_transform, mtcnn=mtcnn)
val_ratio = 0.15
val_size = int(len(full_dataset) * val_ratio)
train_size = len(full_dataset) - val_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# For val dataset, use val_transform (we need to replace transform in subset)
# hack: override transform for items in val_dataset
val_dataset.dataset.transform = val_transform

# Weighted sampler to handle class imbalance
labels = [label for _, label in full_dataset.samples]
class_sample_count = np.array([labels.count(0), labels.count(1)])  # [real_count, fake_count]
print("Counts (real,fake):", class_sample_count)
weight = 1. / class_sample_count
samples_weight = np.array([weight[t] for t in labels])
sampler = WeightedRandomSampler(weights=samples_weight, num_samples=len(samples_weight), replacement=True)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

print("Train size:", len(train_dataset), "Val size:", len(val_dataset))

In [None]:
# Cell 3 -- model build, optimizer, criterion, scheduler
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Load pretrained Inception v3 (use weights interface if torchvision version supports it)
model = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1) if hasattr(models, "Inception_V3_Weights") else models.inception_v3(pretrained=True)
model.aux_logits = False

# Replace fc with dropout + classifier
in_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(in_features, 512),
    nn.ReLU(),
    nn.Dropout(p=0.4),
    nn.Linear(512, 2)   # 2 classes: 0=REAL, 1=DEEPFAKE
)

model = model.to(device)

# Option: freeze early layers for first epochs (uncomment if you want)
# for name, param in model.named_parameters():
#     if "fc" not in name:
#         param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

In [None]:
# Cell 4 -- training loop
import time
from sklearn.metrics import accuracy_score

EPOCHS = 10
best_val_loss = float("inf")
best_model_path = os.path.join(output_data, "best_inception_v3.pth")

for epoch in range(1, EPOCHS+1):
    t0 = time.time()
    model.train()
    train_losses = []
    all_preds = []
    all_labels = []

    for imgs, labels in train_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())
        preds = outputs.argmax(dim=1).detach().cpu().numpy()
        all_preds.extend(list(preds))
        all_labels.extend(list(labels.cpu().numpy()))

    train_loss = sum(train_losses)/len(train_losses)
    train_acc = accuracy_score(all_labels, all_preds)

    # Validation
    model.eval()
    val_losses = []
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            val_losses.append(loss.item())
            preds = outputs.argmax(dim=1).cpu().numpy()
            val_preds.extend(list(preds))
            val_labels.extend(list(labels.cpu().numpy()))

    val_loss = sum(val_losses)/len(val_losses)
    val_acc = accuracy_score(val_labels, val_preds)
    scheduler.step(val_loss)

    print(f"Epoch {epoch}/{EPOCHS}  Time: {time.time()-t0:.1f}s  TrainLoss:{train_loss:.4f} TrainAcc:{train_acc*100:.2f}%  ValLoss:{val_loss:.4f} ValAcc:{val_acc*100:.2f}%")

    # Save best
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss,
        }, best_model_path)
        print("  Saved best model ->", best_model_path)


In [None]:
# Cell 5 -- load best model & evaluate on CASIA test (processed_casia/test should be ready)
import torch
from sklearn.metrics import classification_report, confusion_matrix

# load best model
ck = torch.load(best_model_path, map_location=device)
model.load_state_dict(ck['model_state_dict'])
model.eval()

# CASIA processed test path (if you used earlier 'processed_casia' path)
casia_test_path = os.path.join(casia_base, "processed_casia", "test")  # update if different
if not os.path.isdir(casia_test_path):
    print("Warning: CASIA processed test path not found:", casia_test_path)
else:
    # create DataLoader for CASIA test using FaceFolderDataset but val_transform + mtcnn
    casia_test_dataset = FaceFolderDataset(casia_test_path, transform=val_transform, mtcnn=mtcnn)
    casia_loader = DataLoader(casia_test_dataset, batch_size=32, shuffle=False, num_workers=4)

    y_true, y_pred = [], []
    with torch.no_grad():
        for imgs, labels in casia_loader:
            imgs = imgs.to(device)
            out = model(imgs)
            preds = out.argmax(dim=1).cpu().numpy().tolist()
            y_pred.extend(preds)
            y_true.extend(labels.numpy().tolist())

    print("CASIA Test Accuracy: %.2f%%" % (100.0 * (sum(1 for i,j in zip(y_true,y_pred) if i==j) / len(y_true))))
    print(classification_report(y_true, y_pred, target_names=["real","fake"]))
    print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))

# Optional: evaluate all images in a folder (external deepfakes)
def predict_folder(folder_path):
    preds = []
    for f in os.listdir(folder_path):
        if not f.lower().endswith(('.jpg','.png','.jpeg')):
            continue
        p = os.path.join(folder_path, f)
        # get cropped face with MTCNN
        try:
            img = Image.open(p).convert("RGB")
        except:
            continue
        face = mtcnn(img)
        if face is None:
            # fallback to center crop + transform
            w,h = img.size
            min_side = min(w,h)
            left = (w-min_side)//2
            top = (h-min_side)//2
            face_img = img.crop((left, top, left+min_side, top+min_side))
            inp = val_transform(face_img).unsqueeze(0).to(device)
        else:
            face_img = transforms.ToPILImage()(face)
            inp = val_transform(face_img).unsqueeze(0).to(device)
        with torch.no_grad():
            out = model(inp)
            pred = out.argmax(dim=1).item()
        preds.append((f, "real" if pred==0 else "fake"))
    return preds

In [None]:
# Example usage:
# external_preds = predict_folder(r"C:\Users\mujta\OneDrive\Desktop\deepfake_test_folder")
# print(external_preds[:20])
