In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
!pip install mtcnn
!pip install opencv-python

# This will list the files in 'My Drive', including the shortcuts.
drive_root = '/content/drive/MyDrive/'
os.listdir(drive_root)


to get frames for original & manipulated and perfrom mtcnn

In [None]:
import os
import cv2
import time
from mtcnn import MTCNN

detector = MTCNN()
log_file = "processed_frames.log"

# Load already processed paths
processed = set()
if os.path.exists(log_file):
    with open(log_file, 'r') as f:
        processed = set(line.strip() for line in f.readlines())

def process_frame(frame_path):
    if frame_path in processed:
        return False  # Not newly processed

    try:
        image = cv2.imread(frame_path)
        if image is None:
            return False

        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        faces = detector.detect_faces(rgb_image)

        for face in faces:
            confidence = face['confidence']
            box = face['box']
            keypoints = face['keypoints']

            if confidence < 0.90 or box[2] < 50 or box[3] < 50:
                continue
            if not all(k in keypoints for k in ['left_eye', 'right_eye', 'nose', 'mouth_left', 'mouth_right']):
                continue
            if abs(keypoints['left_eye'][0] - keypoints['right_eye'][0]) < 15:
                continue

            x, y, w, h = box
            x, y = max(0, x), max(0, y)
            cropped_face = image[y:y+h, x:x+w]

            cv2.rectangle(cropped_face, (0, 0), (w, h), (0, 255, 0), 2)
            cv2.imwrite(frame_path, cropped_face)

            with open(log_file, 'a') as logf:
                logf.write(frame_path + '\n')
            processed.add(frame_path)

            return True  # Newly processed

    except Exception as e:
        print(f"💥 Error processing {frame_path}: {e}")

    time.sleep(0.05)
    return False

def process_all_images(directory_path, max_folders=100):
    folders_processed = 0

    for folder_name in sorted(os.listdir(directory_path)):
        if folders_processed >= max_folders:
            break

        folder_path = os.path.join(directory_path, folder_name)
        if os.path.isdir(folder_path):
            jpg_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.jpg')])
            newly_processed_in_folder = 0
            total_in_folder = len(jpg_files)

            for file_name in jpg_files:
                file_path = os.path.join(folder_path, file_name)
                if process_frame(file_path):
                    newly_processed_in_folder += 1

            print(f"📂 Done with folder {folder_name}: {total_in_folder} total, {newly_processed_in_folder} newly processed")
            folders_processed += 1

if __name__ == "__main__":
    directory_path = '/content/drive/MyDrive/FaceForensicsData/frames_for_original_videos'
    process_all_images(directory_path, max_folders=100)
    print(f"🎉 Done! Processed up to 100 folders.")


get the labels

put them into v1 and v2 inception resnet

In [None]:
import torch
import os
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load model
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Transformation
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# Base paths (Limit to only one video from each category)
data_sources = {
    "/content/drive/MyDrive/FaceForensicsData/frames_for_original_videos": 0,   # Real
    "/content/drive/MyDrive/FaceForensicsData/frames_for_FaceSwap": 1 # Fake
}

# Hold all embeddings and labels
all_embeddings = []
all_labels = []

# Process only the first video from both real and fake datasets
for folder_path, label in data_sources.items():
    # Select the first video folder from the directory (only one video folder)
    video_folders = sorted(os.listdir(folder_path))[:1]  # Limit to the first folder

    for video_folder in tqdm(video_folders, desc=f"Processing {folder_path}"):
        video_path = os.path.join(folder_path, video_folder)
        if not os.path.isdir(video_path):
            continue

        # Process all frames (100 frames) inside this folder
        for img_name in sorted(os.listdir(video_path)):
            if not img_name.endswith('.jpg'):
                continue

            img_path = os.path.join(video_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_pil = Image.fromarray(img_rgb)
            img_tensor = transform(img_pil).unsqueeze(0).to(device)

            with torch.no_grad():
                embedding = model(img_tensor).cpu().numpy().flatten()  # shape: (512,)

            # Append the embedding and label to lists
            all_embeddings.append(embedding)
            all_labels.append(label)

# Convert to arrays
X = np.array(all_embeddings)  # shape: (num_images, 512)
y = np.array(all_labels)      # shape: (num_images,)

# Save embeddings and labels to Google Drive
save_path = '/content/drive/MyDrive/embeddings_and_labels/'

# Create the directory if it doesn't exist
os.makedirs(save_path, exist_ok=True)

# Save embeddings and labels to Google Drive
np.save(os.path.join(save_path, "test_embeddings.npy"), X)
np.save(os.path.join(save_path, "test_labels.npy"), y)

print(f"✅ Finished processing. Embeddings shape: {X.shape}, Labels shape: {y.shape}")


get the labels and save them

In [None]:
import torch
import os
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
from tqdm import tqdm
from itertools import islice  # Import for limiting folder count

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load model
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Transformation
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# Base paths
data_sources = {
    "/content/drive/MyDrive/FaceForensicsData/frames_for_original_videos": 0,   # Real
    "/content/drive/MyDrive/FaceForensicsData/frames_for_FaceSwap": 1 # Fake
}

# Hold all embeddings and labels
all_embeddings = []
all_labels = []

# Process only the first 100 folders from both real and fake datasets
for folder_path, label in data_sources.items():
    # Use islice to limit the number of folders processed (100 folders from each category)
    video_folders = list(islice(sorted(os.listdir(folder_path)), 100))  # Limit to first 100 folders

    for video_folder in tqdm(video_folders, desc=f"Processing {folder_path}"):
        video_path = os.path.join(folder_path, video_folder)
        if not os.path.isdir(video_path):
            continue

        # Process all frames (100 frames) inside each folder
        for img_name in sorted(os.listdir(video_path)):
            if not img_name.endswith('.jpg'):
                continue

            img_path = os.path.join(video_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_pil = Image.fromarray(img_rgb)
            img_tensor = transform(img_pil).unsqueeze(0).to(device)

            with torch.no_grad():
                embedding = model(img_tensor).cpu().numpy().flatten()  # shape: (512,)

            # Append the embedding and label to lists
            all_embeddings.append(embedding)
            all_labels.append(label)

# Convert to arrays
X = np.array(all_embeddings)  # shape: (num_images, 512)
y = np.array(all_labels)      # shape: (num_images,)

# Save embeddings and labels to Google Drive
save_path = '/content/drive/MyDrive/embeddings_and_labels/'

# Create the directory if it doesn't exist
os.makedirs(save_path, exist_ok=True)

# Save embeddings and labels to Google Drive
np.save(os.path.join(save_path, "all_embeddings.npy"), X)
np.save(os.path.join(save_path, "all_labels.npy"), y)

print(f"✅ Finished processing. Embeddings shape: {X.shape}, Labels shape: {y.shape}")


In [None]:
for v2


In [None]:
import torch
import os
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
from tqdm import tqdm
from itertools import islice  # Import for limiting folder count

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load model
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Transformation
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# Base paths
data_sources = {
    "/content/drive/MyDrive/FaceForensicsData/frames_for_original_videos": 0,   # Real
    "/content/drive/MyDrive/FaceForensicsData/frames_for_FaceSwap": 1 # Fake
}

# Hold all embeddings and labels
all_embeddings = []
all_labels = []

# Process only the first 100 folders from both real and fake datasets
for folder_path, label in data_sources.items():
    # Use islice to limit the number of folders processed (100 folders from each category)
    video_folders = list(islice(sorted(os.listdir(folder_path)), 100))  # Limit to first 100 folders

    for video_folder in tqdm(video_folders, desc=f"Processing {folder_path}"):
        video_path = os.path.join(folder_path, video_folder)
        if not os.path.isdir(video_path):
            continue

        # Process all frames (100 frames) inside each folder
        for img_name in sorted(os.listdir(video_path)):
            if not img_name.endswith('.jpg'):
                continue

            img_path = os.path.join(video_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_pil = Image.fromarray(img_rgb)
            img_tensor = transform(img_pil).unsqueeze(0).to(device)

            with torch.no_grad():
                embedding = model(img_tensor).cpu().numpy().flatten()  # shape: (512,)

            # Append the embedding and label to lists
            all_embeddings.append(embedding)
            all_labels.append(label)

# Convert to arrays
X = np.array(all_embeddings)  # shape: (num_images, 512)
y = np.array(all_labels)      # shape: (num_images,)

# Save embeddings and labels to Google Drive
save_path = '/content/drive/MyDrive/embeddings_and_labels/'

# Create the directory if it doesn't exist
os.makedirs(save_path, exist_ok=True)

# Save embeddings and labels to Google Drive as v2 versions
np.save(os.path.join(save_path, "v2_embeddings.npy"), X)
np.save(os.path.join(save_path, "v2_labels.npy"), y)

print(f"✅ Finished processing. Embeddings shape: {X.shape}, Labels shape: {y.shape}")
