In [4]:
!pip install torch torchvision torchaudio
!pip install mtcnn
!pip install grad-cam
!pip install scikit-learn
!pip install opencv-python

import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
from mtcnn import MTCNN
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

class FrameExtractor:
    def __init__(self):
        self.detector = MTCNN()
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224,224)),
            transforms.ToTensor()
        ])

    def extract_faces(self, video_path, frame_skip=10):
        cap = cv2.VideoCapture(video_path)
        faces = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            if int(cap.get(1)) % frame_skip == 0:
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                detections = self.detector.detect_faces(frame_rgb)
                if detections:
                    x,y,w,h = detections[0]['box']
                    face = frame_rgb[y:y+h, x:x+w]
                    if face.size != 0:
                        faces.append(self.transform(face))
        cap.release()
        return faces

extractor = FrameExtractor()

real_faces = extractor.extract_faces('/content/id0_0001.mp4')    # Real Video
fake_faces = extractor.extract_faces('/content/id0_id1_0001.mp4')  # Fake Video

print(f"Extracted {len(real_faces)} real frames and {len(fake_faces)} fake frames.")

all_faces = real_faces + fake_faces
all_labels = [0]*len(real_faces) + [1]*len(fake_faces)

class DeepfakeDataset(Dataset):
    def __init__(self, faces, labels):
        self.faces = faces
        self.labels = labels

    def __len__(self):
        return len(self.faces)

    def __getitem__(self, idx):
        return self.faces[idx], self.labels[idx]

dataset = DeepfakeDataset(all_faces, all_labels)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=8, shuffle=False)

class DeepfakeDetector(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = models.efficientnet_b0(pretrained=True)
        self.backbone.classifier = nn.Identity()
        self.fc = nn.Sequential(
            nn.Linear(1280, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 2)
        )

    def forward(self, x):
        features = self.backbone(x)
        output = self.fc(features)
        return output

model = DeepfakeDetector().to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}] - Loss: {total_loss/len(train_loader):.4f}")

print("Training completed!")

model.eval()
preds = []
trues = []

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        pred = outputs.argmax(dim=1)
        preds.extend(pred.cpu().numpy())
        trues.extend(labels.cpu().numpy())

acc = accuracy_score(trues, preds)
prec = precision_score(trues, preds)
rec = recall_score(trues, preds)
f1 = f1_score(trues, preds)
auc = roc_auc_score(trues, preds)

print("\n==== Validation Results ====")
print(f"Accuracy: {acc*100:.2f}%")
print(f"Precision: {prec:.2f}")
print(f"Recall: {rec:.2f}")
print(f"F1-Score: {f1:.2f}")
print(f"ROC-AUC: {auc:.2f}")

def gradcam_visualization(model, image_tensor):
    target_layers = [model.backbone.features[-1]]
    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=torch.cuda.is_available())
    grayscale_cam = cam(input_tensor=image_tensor.unsqueeze(0))[0, :]

    img = image_tensor.permute(1,2,0).cpu().numpy()
    visualization = show_cam_on_image(img, grayscale_cam, use_rgb=True)

    plt.figure(figsize=(6,6))
    plt.imshow(visualization)
    plt.axis('off')
    plt.title("Grad-CAM on Detected Frame")
    plt.show()

# Pick one real and one fake frame to visualize
print("\nGradCAM Visualization on sample real frame:")
gradcam_visualization(model, real_faces[0].to(device))

print("\nGradCAM Visualization on sample fake frame:")
gradcam_visualization(model, fake_faces[0].to(device))


Using device: cpu
Extracted 30 real frames and 30 fake frames.




Epoch [1/5] - Loss: 0.6903
Epoch [2/5] - Loss: 0.5962
Epoch [3/5] - Loss: 0.5087
Epoch [4/5] - Loss: 0.3997
Epoch [5/5] - Loss: 0.2964
Training completed!


Exception ignored in: <function BaseCAM.__del__ at 0x7c5afe3f8c20>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/pytorch_grad_cam/base_cam.py", line 212, in __del__
    self.activations_and_grads.release()
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'GradCAM' object has no attribute 'activations_and_grads'



==== Validation Results ====
Accuracy: 58.33%
Precision: 0.58
Recall: 1.00
F1-Score: 0.74
ROC-AUC: 0.50

GradCAM Visualization on sample real frame:


TypeError: GradCAM.__init__() got an unexpected keyword argument 'use_cuda'

In [None]:

def predict_frame(model, frame_tensor):
    model.eval()
    with torch.no_grad():
        output = model(frame_tensor.unsqueeze(0).to(device))
        pred = torch.argmax(output, dim=1).item()
    return pred  # 0: Real, 1: Fake

def live_video_prediction(model, video_path):
    model.eval()
    cap = cv2.VideoCapture(video_path)

    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224,224)),
        transforms.ToTensor()
    ])

    label_map = {0: "Real", 1: "Fake"}
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        face_detector = MTCNN()
        detections = face_detector.detect_faces(frame_rgb)

        if detections:
            x, y, w, h = detections[0]['box']
            face = frame_rgb[y:y+h, x:x+w]
            if face.size != 0:
                face_tensor = transform(face)
                pred = predict_frame(model, face_tensor)

                # Draw Rectangle
                cv2.rectangle(frame, (x,y), (x+w, y+h), (0,255,0) if pred==0 else (0,0,255), 2)
                cv2.putText(frame, label_map[pred], (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0) if pred==0 else (0,0,255), 2)

        # Resize for display
        frame = cv2.resize(frame, (640,480))
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        cv2.imshow('Real-Time Deepfake Detection', frame_bgr)

        frame_count += 1

        # Press 'q' to exit early
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print(f"Total frames processed: {frame_count}")
    cap.release()
    cv2.destroyAllWindows()

print("\nStarting live real-time detection demo...")
live_video_prediction(model, '/content/id0_id1_0001.mp4')  # Example: Fake video
