<a href="https://colab.research.google.com/github/rohingarg12/Deepfake-detection/blob/main/deepfake_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
sanikatiwarekar_deep_fake_detection_dfd_entire_original_dataset_path = kagglehub.dataset_download('sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset')

print('Data source import complete.')


In [None]:
!pip install retina-face --quiet
!pip install opencv-python-headless --quiet


In [None]:
import os
import cv2
from retinaface import RetinaFace
from PIL import Image
from tqdm import tqdm

# Paths
REAL_PATH = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences"
FAKE_PATH = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_manipulated_sequences/DFD_manipulated_sequences"
SAVE_PATH = "/kaggle/working/faces"

# Create output folders
os.makedirs(f"{SAVE_PATH}/real", exist_ok=True)
os.makedirs(f"{SAVE_PATH}/fake", exist_ok=True)

# Settings
FRAME_SKIP = 30
MAX_FACES = 5

def extract_faces(video_path, output_dir, video_name):
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    saved = 0

    while cap.isOpened() and saved < MAX_FACES:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % FRAME_SKIP == 0:
            try:
                faces = RetinaFace.extract_faces(frame, align=True)
                for face in faces:
                    img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
                    img.save(f"{output_dir}/{video_name}_{saved}.jpg")
                    saved += 1
                    if saved >= MAX_FACES:
                        break
            except:
                pass
        frame_idx += 1
    cap.release()

# Run on real and fake videos with tqdm progress bar
for label, path in zip(['real', 'fake'], [REAL_PATH, FAKE_PATH]):
    print(f"\n📦 Extracting {label.upper()} videos:")
    videos = sorted(os.listdir(path))[:5]  # First 5 for now
    for video in tqdm(videos, desc=f"{label.upper()}"):
        video_path = os.path.join(path, video)
        video_name = os.path.splitext(video)[0]
        extract_faces(video_path, f"{SAVE_PATH}/{label}", video_name)


In [None]:
import os
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch import nn, optim
import matplotlib.pyplot as plt

# ✅ Paths
data_path = "/kaggle/working/faces"
model_save_path = "/kaggle/working/resnet_model.pth"

# ✅ Transformations (resize + normalize)
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# ✅ Dataset and Dataloader
dataset = ImageFolder(data_path, transform=transform)
train_loader = DataLoader(dataset, batch_size=4, shuffle=True)

# ✅ Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ✅ Load pretrained ResNet50
model = models.resnet50(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, 2)  # Binary output
model = model.to(device)

# ✅ Loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# ✅ Training loop
epochs = 5
train_loss, train_acc = [], []

for epoch in range(epochs):
    total_loss = 0
    correct = 0
    total = 0
    model.train()

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / len(train_loader)
    accuracy = 100 * correct / total
    train_loss.append(avg_loss)
    train_acc.append(accuracy)

    print(f"Epoch {epoch+1}/{epochs} | Loss: {avg_loss:.4f} | Accuracy: {accuracy:.2f}%")

# ✅ Save model
torch.save(model.state_dict(), model_save_path)
print(f"\n✅ ResNet model saved to: {model_save_path}")


In [None]:
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_loss, label="Loss")
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_acc, label="Accuracy", color="green")
plt.title("Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.grid(True)
plt.legend()

plt.show()


In [None]:
!pip install pretrainedmodels --quiet


In [None]:
import torch
import pretrainedmodels
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

# Paths
data_path = "/kaggle/working/faces"
model_save_path = "/kaggle/working/xception_model.pth"

# Transformations
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Xception expects 299x299
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Dataset
dataset = ImageFolder(data_path, transform=transform)
train_loader = DataLoader(dataset, batch_size=4, shuffle=True)

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load pretrained Xception
xception = pretrainedmodels.__dict__['xception'](pretrained='imagenet')
xception.last_linear = nn.Linear(xception.last_linear.in_features, 2)
xception = xception.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(xception.parameters(), lr=1e-4)

# Train loop
epochs = 5
train_loss, train_acc = [], []

for epoch in range(epochs):
    xception.train()
    total_loss, correct, total = 0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = xception(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = 100 * correct / total
    train_loss.append(total_loss / len(train_loader))
    train_acc.append(acc)

    print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.4f} | Accuracy: {acc:.2f}%")

# Save model
torch.save(xception.state_dict(), model_save_path)
print(f"\n✅ Xception model saved to: {model_save_path}")


In [None]:
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_loss, label="Loss")
plt.title("Xception Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(train_acc, label="Accuracy", color="orange")
plt.title("Xception Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.grid(True)
plt.show()


In [None]:
os.makedirs("/kaggle/working/features", exist_ok=True)


In [None]:
import os
import torch
import torch.nn as nn
import torchvision.models as models
import pretrainedmodels
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

# --- Paths ---
data_path = "/kaggle/working/faces"
resnet_path = "/kaggle/working/resnet_model.pth"
xception_path = "/kaggle/working/xception_model.pth"
save_path = "/kaggle/working/features"
os.makedirs(save_path, exist_ok=True)

# --- Device ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Transforms ---
resnet_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

xception_transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# --- Load Datasets ---
resnet_dataset = datasets.ImageFolder(data_path, transform=resnet_transform)
xception_dataset = datasets.ImageFolder(data_path, transform=xception_transform)

resnet_loader = DataLoader(resnet_dataset, batch_size=4, shuffle=False)
xception_loader = DataLoader(xception_dataset, batch_size=4, shuffle=False)

# --- Load ResNet with trained weights ---
resnet = models.resnet50(weights=None)
resnet.fc = nn.Linear(2048, 2)  # match what you trained
resnet.load_state_dict(torch.load(resnet_path))
resnet.fc = nn.Identity()       # remove classifier for feature extraction
resnet = resnet.to(device)
resnet.eval()

# --- Load XceptionNet with trained weights ---
xception = pretrainedmodels.__dict__['xception'](pretrained=None)
xception.last_linear = nn.Linear(xception.last_linear.in_features, 2)
xception.load_state_dict(torch.load(xception_path))
xception.last_linear = nn.Identity()
xception = xception.to(device)
xception.eval()

# --- Extract ResNet Features ---
resnet_features = []
labels = []

with torch.no_grad():
    for images, lbls in resnet_loader:
        images = images.to(device)
        feats = resnet(images)
        resnet_features.append(feats.cpu())
        labels.append(lbls)

# --- Extract Xception Features ---
xception_features = []

with torch.no_grad():
    for images, _ in xception_loader:
        images = images.to(device)
        feats = xception(images)
        xception_features.append(feats.cpu())

# --- Save Features ---
resnet_features = torch.cat(resnet_features)
xception_features = torch.cat(xception_features)
labels = torch.cat(labels)

torch.save(resnet_features, f"{save_path}/resnet_features.pt")
torch.save(xception_features, f"{save_path}/xception_features.pt")
torch.save(labels, f"{save_path}/labels.pt")

print("✅ Features extracted and saved successfully.")


In [None]:
import torch
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Load features
resnet_feats = torch.load("/kaggle/working/features/resnet_features.pt")
xception_feats = torch.load("/kaggle/working/features/xception_features.pt")
labels = torch.load("/kaggle/working/features/labels.pt")

# Combine features
combined_features = torch.cat((resnet_feats, xception_feats), dim=1).numpy()
y = labels.numpy()

# Train-Test Split (for demonstration, you can use sklearn.model_selection.train_test_split)
X_train, X_test = combined_features[:8], combined_features[8:]
y_train, y_test = y[:8], y[8:]

# Train Meta Classifier (Logistic Regression)
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Predict & Evaluate
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"✅ Meta-classifier Accuracy: {acc * 100:.2f}%")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels=["Real", "Fake"])
disp.plot(cmap='Blues')
plt.title("Confusion Matrix - Ensemble Output")
plt.show()


In [None]:
from sklearn.model_selection import train_test_split

# Split features and labels properly
X_train, X_test, y_train, y_test = train_test_split(
    combined_features, y, test_size=0.3, stratify=y, random_state=42
)


In [None]:
print("Train class counts:", {i: list(y_train).count(i) for i in set(y_train)})
print("Test class counts:", {i: list(y_test).count(i) for i in set(y_test)})


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Train Meta Classifier
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Predict and Evaluate
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"✅ Ensemble (LogReg) Accuracy: {acc * 100:.2f}%")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Real", "Fake"])
disp.plot(cmap='Blues')
plt.title("Confusion Matrix - Ensemble Output")
plt.grid(False)
plt.show()


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"🔎 Precision: {precision:.2f}")
print(f"🔄 Recall:    {recall:.2f}")
print(f"🎯 F1 Score:  {f1:.2f}")


In [None]:
import os
import cv2
import torch
import torchvision.models as models
import pretrainedmodels
import torch.nn as nn
from PIL import Image
from torchvision import transforms
from retinaface import RetinaFace
from tqdm import tqdm

# Paths
REAL_VIDEOS = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences"
FAKE_VIDEOS = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_manipulated_sequences/DFD_manipulated_sequences"
SAVE_PATH = "/kaggle/working/lstm_features"

# Settings
FRAME_SKIP = 30
MAX_FRAMES = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create output dirs
os.makedirs(f"{SAVE_PATH}/real", exist_ok=True)
os.makedirs(f"{SAVE_PATH}/fake", exist_ok=True)

# Transforms
resnet_tf = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

xception_tf = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Load ResNet
resnet = models.resnet50(weights=None)
resnet.fc = nn.Linear(2048, 2)
resnet.load_state_dict(torch.load("/kaggle/working/resnet_model.pth"))
resnet.fc = nn.Identity()
resnet = resnet.to(DEVICE).eval()

# Load Xception
xception = pretrainedmodels.__dict__['xception'](pretrained=None)
xception.last_linear = nn.Linear(xception.last_linear.in_features, 2)
xception.load_state_dict(torch.load("/kaggle/working/xception_model.pth"))
xception.last_linear = nn.Identity()
xception = xception.to(DEVICE).eval()

# Feature extractor per frame
def get_frame_features(face_img):
    # Resize for each model
    resnet_img = resnet_tf(face_img).unsqueeze(0).to(DEVICE)
    xception_img = xception_tf(face_img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        resnet_feat = resnet(resnet_img)
        xception_feat = xception(xception_img)

    return torch.cat((resnet_feat.cpu(), xception_feat.cpu()), dim=1)  # shape: [1, 4096]

# Video → [T, 4096] feature sequence
def process_video(video_path, save_dir, vid_name):
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    sequence = []

    while cap.isOpened() and len(sequence) < MAX_FRAMES:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % FRAME_SKIP == 0:
            try:
                faces = RetinaFace.extract_faces(frame, align=True)
                if faces:
                    face = faces[0]
                    face_img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
                    feature_vec = get_frame_features(face_img)
                    sequence.append(feature_vec.squeeze(0))
            except:
                pass
        frame_idx += 1

    cap.release()
    if sequence:
        sequence_tensor = torch.stack(sequence)  # shape: [T, 4096]
        torch.save(sequence_tensor, f"{save_dir}/{vid_name}.pt")

# Process real + fake videos
for label, path in zip(['real', 'fake'], [REAL_VIDEOS, FAKE_VIDEOS]):
    print(f"\n📦 Processing {label.upper()} videos:")
    videos = sorted(os.listdir(path))[:100]  # Start with 10 for now
    for vid in tqdm(videos):
        video_path = os.path.join(path, vid)
        name = os.path.splitext(vid)[0]
        save_dir = f"{SAVE_PATH}/{label}"
        process_video(video_path, save_dir, name)


In [None]:
from torch.utils.data import Dataset
import torch
import os

class LSTMVideoDataset(Dataset):
    def __init__(self, root_dir):
        self.data = []
        self.labels = []
        for label, subfolder in enumerate(['real', 'fake']):
            folder = os.path.join(root_dir, subfolder)
            for file in sorted(os.listdir(folder)):
                if file.endswith('.pt'):
                    tensor = torch.load(os.path.join(folder, file))
                    if tensor.size(0) == 5:  # Only keep fixed-length
                        self.data.append(tensor)
                        self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


In [None]:
import torch.nn as nn

class DeepfakeLSTM(nn.Module):
    def __init__(self, input_size=4096, hidden_size=256, num_layers=1):
        super(DeepfakeLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 2)

    def forward(self, x):
        out, _ = self.lstm(x)  # x: [batch, seq, features]
        out = out[:, -1, :]    # take last time step
        out = self.fc(out)
        return out


In [None]:
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

# Load dataset
full_dataset = LSTMVideoDataset("/kaggle/working/lstm_features")
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Init model
model = DeepfakeLSTM().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

train_losses = []
test_accuracies = []

# Training loop
for epoch in range(15):
    model.train()
    running_loss = 0
    for x, y in train_loader:
        x, y = x.to(DEVICE), torch.tensor(y).to(DEVICE)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_losses.append(running_loss / len(train_loader))

    # Eval
    model.eval()
    correct = 0
    total = 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(DEVICE), torch.tensor(y).to(DEVICE)
            output = model(x)
            preds = output.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
            y_true.extend(y.cpu().tolist())
            y_pred.extend(preds.cpu().tolist())
    acc = correct / total
    test_accuracies.append(acc)
    print(f"Epoch {epoch+1}: Train Loss={train_losses[-1]:.4f}, Test Acc={acc:.2f}")


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(train_losses, label="Loss")
plt.title("Training Loss")
plt.grid()

plt.subplot(1,2,2)
plt.plot(test_accuracies, label="Accuracy", color='green')
plt.title("Test Accuracy")
plt.grid()
plt.show()


In [None]:
from sklearn.metrics import classification_report

print("🔍 LSTM Classification Report:\n")
print(classification_report(y_true, y_pred, target_names=["Real", "Fake"]))


In [None]:
!pip install timm --quiet


In [None]:
import timm


In [None]:
import torch
import timm

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

vit = timm.create_model("vit_base_patch16_224", pretrained=True)
vit.head = torch.nn.Identity()  # remove classification head
vit = vit.to(DEVICE).eval()


In [None]:
from torchvision import transforms

vit_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

def get_frame_features(face_img):
    resnet_img = resnet_tf(face_img).unsqueeze(0).to(DEVICE)
    xception_img = xception_tf(face_img).unsqueeze(0).to(DEVICE)
    vit_img = vit_transform(face_img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        resnet_feat = resnet(resnet_img)
        xception_feat = xception(xception_img)
        vit_feat = vit(vit_img)

    return torch.cat((resnet_feat.cpu(), xception_feat.cpu(), vit_feat.cpu()), dim=1)  # [1, 6144]


In [None]:
import os
import cv2
import torch
import torchvision.models as models
import pretrainedmodels
import torch.nn as nn
import timm
from PIL import Image
from torchvision import transforms
from retinaface import RetinaFace
from tqdm import tqdm

# Paths
REAL_VIDEOS = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences"
FAKE_VIDEOS = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_manipulated_sequences/DFD_manipulated_sequences"
SAVE_PATH = "/kaggle/working/lstm_features"
os.makedirs(f"{SAVE_PATH}/real", exist_ok=True)
os.makedirs(f"{SAVE_PATH}/fake", exist_ok=True)

# Constants
FRAME_SKIP = 30
MAX_FRAMES = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transforms
resnet_tf = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

xception_tf = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

vit_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# Load ResNet
resnet = models.resnet50(weights=None)
resnet.fc = nn.Linear(2048, 2)
resnet.load_state_dict(torch.load("/kaggle/working/resnet_model.pth"))
resnet.fc = nn.Identity()
resnet = resnet.to(DEVICE).eval()

# Load Xception
xception = pretrainedmodels.__dict__['xception'](pretrained=None)
xception.last_linear = nn.Linear(xception.last_linear.in_features, 2)
xception.load_state_dict(torch.load("/kaggle/working/xception_model.pth"))
xception.last_linear = nn.Identity()
xception = xception.to(DEVICE).eval()

# Load ViT
vit = timm.create_model("vit_base_patch16_224", pretrained=True)
vit.head = nn.Identity()
vit = vit.to(DEVICE).eval()

# Feature extractor function
def get_frame_features(face_img):
    resnet_img = resnet_tf(face_img).unsqueeze(0).to(DEVICE)
    xception_img = xception_tf(face_img).unsqueeze(0).to(DEVICE)
    vit_img = vit_tf(face_img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        resnet_feat = resnet(resnet_img)
        xception_feat = xception(xception_img)
        vit_feat = vit(vit_img)

    return torch.cat((resnet_feat.cpu(), xception_feat.cpu(), vit_feat.cpu()), dim=1)  # [1, 6144]

# Process a single video
def process_video(video_path, save_dir, vid_name):
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    sequence = []

    while cap.isOpened() and len(sequence) < MAX_FRAMES:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % FRAME_SKIP == 0:
            try:
                faces = RetinaFace.extract_faces(frame, align=True)
                if faces:
                    face = faces[0]
                    face_img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
                    feature_vec = get_frame_features(face_img)
                    sequence.append(feature_vec.squeeze(0))
            except:
                pass
        frame_idx += 1

    cap.release()
    if sequence:
        sequence_tensor = torch.stack(sequence)  # shape: [T, 6144]
        torch.save(sequence_tensor, f"{save_dir}/{vid_name}.pt")

# Process real and fake videos
for label, path in zip(['real', 'fake'], [REAL_VIDEOS, FAKE_VIDEOS]):
    print(f"\n📦 Extracting {label.upper()} videos...")
    videos = sorted(os.listdir(path))[:100]  # adjust count as needed
    for vid in tqdm(videos):
        video_path = os.path.join(path, vid)
        name = os.path.splitext(vid)[0]
        save_dir = f"{SAVE_PATH}/{label}"
        process_video(video_path, save_dir, name)


In [None]:
from torch.utils.data import Dataset
import torch
import os

class LSTMVideoDataset(Dataset):
    def __init__(self, root_dir):
        self.data = []
        self.labels = []
        for label, subfolder in enumerate(['real', 'fake']):
            folder = os.path.join(root_dir, subfolder)
            for file in sorted(os.listdir(folder)):
                if file.endswith('.pt'):
                    tensor = torch.load(os.path.join(folder, file))
                    if tensor.size(0) == 5:  # Ensure sequence length = 5
                        self.data.append(tensor)
                        self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


In [None]:
from torch.utils.data import Dataset
import torch
import os

class LSTMVideoDataset(Dataset):
    def __init__(self, root_dir):
        self.data = []
        self.labels = []
        for label, subfolder in enumerate(['real', 'fake']):
            folder = os.path.join(root_dir, subfolder)
            for file in sorted(os.listdir(folder)):
                if file.endswith('.pt'):
                    tensor = torch.load(os.path.join(folder, file))
                    if tensor.size(0) == 5:  # Ensure sequence length = 5
                        self.data.append(tensor)
                        self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


In [None]:
import torch.nn as nn

class DeepfakeLSTM(nn.Module):
    def __init__(self, input_size=6144, hidden_size=256, num_layers=1):
        super(DeepfakeLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 2)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # last time step
        out = self.fc(out)
        return out


In [None]:
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

# Load dataset
dataset = LSTMVideoDataset("/kaggle/working/lstm_features")
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, test_set = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_set, batch_size=4, shuffle=True)
test_loader = DataLoader(test_set, batch_size=4)

# Initialize model
model = DeepfakeLSTM().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

train_losses = []
test_accuracies = []
y_true, y_pred = [], []

# Train
for epoch in range(5):
    model.train()
    running_loss = 0
    for x, y in train_loader:
        x, y = x.to(DEVICE), torch.tensor(y).to(DEVICE)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_losses.append(running_loss / len(train_loader))

    # Evaluate
    model.eval()
    correct = total = 0
    y_true.clear()
    y_pred.clear()
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(DEVICE), torch.tensor(y).to(DEVICE)
            output = model(x)
            preds = output.argmax(dim=1)
            y_true.extend(y.cpu().tolist())
            y_pred.extend(preds.cpu().tolist())
            correct += (preds == y).sum().item()
            total += y.size(0)
    acc = correct / total
    test_accuracies.append(acc)
    print(f"Epoch {epoch+1}: Train Loss={train_losses[-1]:.4f}, Test Acc={acc:.2f}")


In [None]:
return torch.cat((resnet_feat.cpu(), xception_feat.cpu(), vit_feat.cpu()), dim=1)


In [None]:
vit = timm.create_model("vit_base_patch16_224", pretrained=True)
vit.head = torch.nn.Sequential(
    nn.Linear(vit.head.in_features, 2048),
    nn.ReLU()
)
vit = vit.to(DEVICE).eval()


In [None]:
def get_frame_features(face_img):
    resnet_img = resnet_tf(face_img).unsqueeze(0).to(DEVICE)
    xception_img = xception_tf(face_img).unsqueeze(0).to(DEVICE)
    vit_img = vit_tf(face_img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        resnet_feat = resnet(resnet_img)            # [1, 2048]
        xception_feat = xception(xception_img)      # [1, 2048]
        vit_feat = vit(vit_img)                     # [1, 2048] — now fixed!

    return torch.cat((resnet_feat.cpu(), xception_feat.cpu(), vit_feat.cpu()), dim=1)  # [1, 6144]


In [None]:
import os
import cv2
import torch
import torchvision.models as models
import pretrainedmodels
import torch.nn as nn
import timm
from PIL import Image
from torchvision import transforms
from retinaface import RetinaFace
from tqdm import tqdm

# Paths
REAL_VIDEOS = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences"
FAKE_VIDEOS = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_manipulated_sequences/DFD_manipulated_sequences"
SAVE_PATH = "/kaggle/working/lstm_features"
os.makedirs(f"{SAVE_PATH}/real", exist_ok=True)
os.makedirs(f"{SAVE_PATH}/fake", exist_ok=True)

# Constants
FRAME_SKIP = 30
MAX_FRAMES = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transforms
resnet_tf = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

xception_tf = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

vit_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# Load ResNet
resnet = models.resnet50(weights=None)
resnet.fc = nn.Linear(2048, 2)
resnet.load_state_dict(torch.load("/kaggle/working/resnet_model.pth"))
resnet.fc = nn.Identity()
resnet = resnet.to(DEVICE).eval()

# Load Xception
xception = pretrainedmodels.__dict__['xception'](pretrained=None)
xception.last_linear = nn.Linear(xception.last_linear.in_features, 2)
xception.load_state_dict(torch.load("/kaggle/working/xception_model.pth"))
xception.last_linear = nn.Identity()
xception = xception.to(DEVICE).eval()

# Load ViT (with projection)
vit = timm.create_model("vit_base_patch16_224", pretrained=True)
vit.head = nn.Sequential(
    nn.Linear(vit.head.in_features, 2048),
    nn.ReLU()
)
vit = vit.to(DEVICE).eval()

# Feature extractor function
def get_frame_features(face_img):
    resnet_img = resnet_tf(face_img).unsqueeze(0).to(DEVICE)
    xception_img = xception_tf(face_img).unsqueeze(0).to(DEVICE)
    vit_img = vit_tf(face_img).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        resnet_feat = resnet(resnet_img)
        xception_feat = xception(xception_img)
        vit_feat = vit(vit_img)

    return torch.cat((resnet_feat.cpu(), xception_feat.cpu(), vit_feat.cpu()), dim=1)  # [1, 6144]

# Process a single video
def process_video(video_path, save_dir, vid_name):
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    sequence = []

    while cap.isOpened() and len(sequence) < MAX_FRAMES:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % FRAME_SKIP == 0:
            try:
                faces = RetinaFace.extract_faces(frame, align=True)
                if faces:
                    face = faces[0]
                    face_img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
                    feature_vec = get_frame_features(face_img)
                    sequence.append(feature_vec.squeeze(0))
            except:
                pass
        frame_idx += 1

    cap.release()
    if sequence:
        sequence_tensor = torch.stack(sequence)  # shape: [T, 6144]
        torch.save(sequence_tensor, f"{save_dir}/{vid_name}.pt")

# Run for real and fake videos
for label, path in zip(['real', 'fake'], [REAL_VIDEOS, FAKE_VIDEOS]):
    print(f"\n📦 Extracting {label.upper()} videos...")
    videos = sorted(os.listdir(path))[:100]  # change to desired count
    for vid in tqdm(videos):
        video_path = os.path.join(path, vid)
        name = os.path.splitext(vid)[0]
        save_dir = f"{SAVE_PATH}/{label}"
        process_video(video_path, save_dir, name)


In [None]:
from torch.utils.data import Dataset
import torch
import os

class LSTMVideoDataset(Dataset):
    def __init__(self, root_dir):
        self.data = []
        self.labels = []
        for label, subfolder in enumerate(['real', 'fake']):
            folder = os.path.join(root_dir, subfolder)
            for file in sorted(os.listdir(folder)):
                if file.endswith('.pt'):
                    tensor = torch.load(os.path.join(folder, file))
                    if tensor.size(0) == 5:  # ensure 5 frames
                        self.data.append(tensor)
                        self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


In [None]:
import torch.nn as nn

class DeepfakeLSTM(nn.Module):
    def __init__(self, input_size=6144, hidden_size=256, num_layers=1):
        super(DeepfakeLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 2)

    def forward(self, x):
        out, _ = self.lstm(x)  # x: [B, T, 6144]
        out = out[:, -1, :]    # last frame output
        out = self.fc(out)
        return out


In [None]:
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

# Load dataset
dataset = LSTMVideoDataset("/kaggle/working/lstm_features")
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, test_set = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_set, batch_size=4, shuffle=True)
test_loader = DataLoader(test_set, batch_size=4)

# Model
model = DeepfakeLSTM(input_size=6144).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

train_losses = []
test_accuracies = []
y_true, y_pred = [], []

# Training loop
for epoch in range(15):
    model.train()
    running_loss = 0
    for x, y in train_loader:
        x, y = x.to(DEVICE), torch.tensor(y).to(DEVICE)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_losses.append(running_loss / len(train_loader))

    # Evaluate
    model.eval()
    correct = total = 0
    y_true.clear()
    y_pred.clear()
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(DEVICE), torch.tensor(y).to(DEVICE)
            output = model(x)
            preds = output.argmax(dim=1)
            y_true.extend(y.cpu().tolist())
            y_pred.extend(preds.cpu().tolist())
            correct += (preds == y).sum().item()
            total += y.size(0)
    acc = correct / total
    test_accuracies.append(acc)
    print(f"📊 Epoch {epoch+1}: Train Loss = {train_losses[-1]:.4f} | Test Acc = {acc:.2%}")


In [None]:
plt.plot(train_losses, label="Train Loss")
plt.plot(test_accuracies, label="Test Accuracy")
plt.title("LSTM Training Progress")
plt.xlabel("Epoch")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
from sklearn.metrics import classification_report

print("🧠 LSTM + ViT Classification Report:\n")
print(classification_report(y_true, y_pred, target_names=["Real", "Fake"]))


In [None]:
from sklearn.metrics import classification_report

print("📋 LSTM + ViT Classification Report:\n")
print(classification_report(y_true, y_pred, target_names=["Real", "Fake"]))


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Real", "Fake"], yticklabels=["Real", "Fake"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - LSTM + ViT")
plt.show()


In [None]:
plt.plot(train_losses, label="Train Loss")
plt.plot(test_accuracies, label="Test Accuracy")
plt.title("Training Progress")
plt.xlabel("Epoch")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# ✅ Accuracy
acc = accuracy_score(y_true, y_pred)
print(f"\n🎯 Final Accuracy (ResNet + Xception + ViT + LSTM): {acc * 100:.2f}%\n")

# ✅ Precision, Recall, F1-Score
print("📋 Classification Report (ResNet + Xcep + ViT + LSTM):\n")
print(classification_report(y_true, y_pred, target_names=["Real", "Fake"]))

# ✅ Optional: Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Real", "Fake"], yticklabels=["Real", "Fake"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Hybrid Ensemble")
plt.show()
