<a href="https://colab.research.google.com/github/myy04/Real-Fake-Image-Classifier/blob/main/CLIP_NotTuned_YonseiDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install scikit-learn matplotlib
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import AutoModel, AutoProcessor, get_linear_schedule_with_warmup
import os
import torchvision
import torchvision.transforms as transforms
from torch import Tensor
from torch.optim.lr_scheduler import CosineAnnealingLR

import random
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

import warnings
warnings.filterwarnings("ignore")

import kagglehub

# Download latest version
path = kagglehub.dataset_download("ciplab/real-and-fake-face-detection")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/ciplab/real-and-fake-face-detection?dataset_version_number=1...


100%|██████████| 431M/431M [00:24<00:00, 18.7MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/ciplab/real-and-fake-face-detection/versions/1


In [3]:
class ImageDataset(Dataset):
    def __init__(self, path):

        real_images_directory = os.path.join(path, 'training_real')
        fake_images_directory = os.path.join(path, 'training_fake')

        real_images = []
        fake_images = []

        for file in os.listdir(real_images_directory):
            real_images.append(os.path.join(real_images_directory, file))

        for file in os.listdir(fake_images_directory):
            fake_images.append(os.path.join(fake_images_directory, file))

        self.images = []
        for i in range(min(len(real_images), len(fake_images))):
            self.images.append((real_images[i], torch.tensor([1, 0], dtype = torch.float32)))
            self.images.append((fake_images[i], torch.tensor([0, 1], dtype = torch.float32)))


    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(self.images[idx][0]).convert("RGB")
        image = self.transform(image) # Apply transform if provided
        return image, self.images[idx][1]


eval_dataset = ImageDataset(os.path.join(path, 'real_and_fake_face'))
eval_loader = DataLoader(eval_dataset, batch_size=256, shuffle=True)

In [11]:

clip_processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = AutoModel.from_pretrained("openai/clip-vit-base-patch32")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# --- Model Loading and Text Prompts ---
clip_processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = AutoModel.from_pretrained("openai/clip-vit-base-patch32")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
text = ["a photo of a real human", "an AI generated photo of a human"]

# --- Evaluation Loop ---
model.eval()  # Set the model to evaluation mode
all_predicted_labels = []
all_true_labels = []
num_of_correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculations during evaluation

    for images, labels_batch in eval_loader:
        images, labels_batch = images.to(device), labels_batch.to(device)
        for image, label in zip(images, labels_batch):

            inputs = clip_processor(text=text, images=[image.cpu()], return_tensors="pt", padding=True).to(device)
            outputs = model(**inputs)
            logits_per_image = outputs.logits_per_image
            probs = logits_per_image.softmax(dim=1)
            predicted_class = probs.argmax(dim=1).item()
            total += 1
            num_of_correct += (predicted_class == label.argmax(dim=0).item())
            all_predicted_labels.append(predicted_class)
            all_true_labels.append(label.argmax(dim=0).item())

# --- Calculate and Print Metrics ---
precision = precision_score(all_true_labels, all_predicted_labels)
recall = recall_score(all_true_labels, all_predicted_labels)
f1 = f1_score(all_true_labels, all_predicted_labels)

print(f"Evaluation:")
print(f'Accuracy: {num_of_correct / total * 100}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')


# Evaluation:
# Accuracy: 50.0
# Precision: 0.50
# Recall: 1.00
# F1 Score: 0.67




Evaluation:
Accuracy: 50.0
Precision: 0.50
Recall: 1.00
F1 Score: 0.67
