In [None]:
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-98pfrc0r
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-98pfrc0r
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from tqdm.notebook import tqdm

import clip
import kagglehub

import cv2
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

import pickle
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("xhlulu/140k-real-and-fake-faces")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/xhlulu/140k-real-and-fake-faces?dataset_version_number=2...


100%|██████████| 3.75G/3.75G [02:58<00:00, 22.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/xhlulu/140k-real-and-fake-faces/versions/2


# **LOADING CLIP**

In [None]:
model, preprocess = clip.load("ViT-B/32")
model.cuda().eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
input_resolution = model.visual.input_resolution
context_length = model.context_length
vocab_size = model.vocab_size

print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", input_resolution)
print("Context length:", context_length)
print("Vocab size:", vocab_size)

100%|███████████████████████████████████████| 338M/338M [00:05<00:00, 62.9MiB/s]


Model parameters: 151,277,313
Input resolution: 224
Context length: 77
Vocab size: 49408


In [None]:
# Define image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to the size CLIP expects
    transforms.ToTensor(),         # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Normalize to [-1, 1]
])

In [None]:
def get_image_paths(data_path):
    """Retrieve paths to all images in a directory."""
    image_paths = []
    for root, _, files in os.walk(data_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                image_paths.append(os.path.join(root, file))
    return image_paths

In [None]:
def create_clip_embeddings(image_paths, batch_size=32):
    """Generate CLIP embeddings for all images."""
    embeddings = []
    with torch.no_grad():  # Disable gradient computation for inference
        for i in tqdm(range(0, len(image_paths), batch_size), desc="Creating CLIP Embeddings"):
            batch_paths = image_paths[i:i + batch_size]
            images = [transform(Image.open(path).convert("RGB")).unsqueeze(0) for path in batch_paths]
            images = torch.cat(images).to(device)
            # Use encode_image to get embeddings
            batch_embeddings = model.encode_image(images).float()
            embeddings.append(batch_embeddings.cpu())
    return torch.cat(embeddings)

In [None]:
train_real_path = os.path.join(path, "real_vs_fake/real-vs-fake/train/real")
train_fake_path = os.path.join(path, "real_vs_fake/real-vs-fake/train/fake")
val_real_path = os.path.join(path, "real_vs_fake/real-vs-fake/valid/real")
val_fake_path = os.path.join(path, "real_vs_fake/real-vs-fake/valid/fake")
test_real_path = os.path.join(path, "real_vs_fake/real-vs-fake/test/real")
test_fake_path = os.path.join(path, "real_vs_fake/real-vs-fake/test/fake")

In [None]:
train_real_images = get_image_paths(train_real_path)
train_fake_images = get_image_paths(train_fake_path)
test_real_images = get_image_paths(test_real_path)
test_fake_images = get_image_paths(test_fake_path)
val_real_images = get_image_paths(val_real_path)
val_fake_images = get_image_paths(val_fake_path)

In [None]:
# Generate embeddings
print("Generating embeddings for val REAL images...")
val_real_embeddings = create_clip_embeddings(val_real_images)

print("Generating embeddings for val FAKE images...")
val_fake_embeddings = create_clip_embeddings(val_fake_images)

print("Generating embeddings for training REAL images...")
train_real_embeddings = create_clip_embeddings(train_real_images)

print("Generating embeddings for training FAKE images...")
train_fake_embeddings = create_clip_embeddings(train_fake_images)

print("Generating embeddings for test REAL images...")
test_real_embeddings = create_clip_embeddings(test_real_images)

print("Generating embeddings for test FAKE images...")
test_fake_embeddings = create_clip_embeddings(test_fake_images)

Generating embeddings for val REAL images...


Creating CLIP Embeddings:   0%|          | 0/313 [00:00<?, ?it/s]

Generating embeddings for val FAKE images...


Creating CLIP Embeddings:   0%|          | 0/313 [00:00<?, ?it/s]

Generating embeddings for training REAL images...


Creating CLIP Embeddings:   0%|          | 0/1563 [00:00<?, ?it/s]

Generating embeddings for training FAKE images...


Creating CLIP Embeddings:   0%|          | 0/1563 [00:00<?, ?it/s]

Generating embeddings for test REAL images...


Creating CLIP Embeddings:   0%|          | 0/313 [00:00<?, ?it/s]

Generating embeddings for test FAKE images...


Creating CLIP Embeddings:   0%|          | 0/313 [00:00<?, ?it/s]

In [None]:
train_real_embeddings_2 = torch.cat((train_real_embeddings, val_real_embeddings))
train_fake_embeddings_2 = torch.cat((train_fake_embeddings, val_fake_embeddings))

In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/DL_Project/'
with open(path + '/NVIDIA_Faces_CLIP_embeddings/NVIDIA_CLIP_train_real_embedded_pickle.pickle', 'wb') as f:
  pickle.dump(train_real_embeddings_2, f)

In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/DL_Project/'
with open(path + '/NVIDIA_Faces_CLIP_embeddings/NVIDIA_CLIP_train_fake_embedded_pickle.pickle', 'wb') as f:
  pickle.dump(train_fake_embeddings_2, f)

In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/DL_Project/'
with open(path + '/NVIDIA_Faces_CLIP_embeddings/NVIDIA_CLIP_test_real_embedded_pickle.pickle', 'wb') as f:
  pickle.dump(test_real_embeddings, f)

In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/DL_Project/'
with open(path + '/NVIDIA_Faces_CLIP_embeddings/NVIDIA_CLIP_test_fake_embedded_pickle.pickle', 'wb') as f:
  pickle.dump(test_fake_embeddings, f)