<a href="https://colab.research.google.com/github/myy04/Real-Fake-Image-Classifier/blob/main/CLIP%2BRESNET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Download the data

In [1]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import AutoModel, AutoProcessor
import os
import torchvision
import torchvision.transforms as transforms
from torch import Tensor

import warnings
warnings.filterwarnings("ignore")

Download CIFAKE

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/birdy654/cifake-real-and-ai-generated-synthetic-images?dataset_version_number=3...


100%|██████████| 105M/105M [00:00<00:00, 118MB/s]  

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3


Custom Dataset Class

In [3]:
class ImageDataset(Dataset):
    def __init__(self, path, image_processor):
        self.input_processor = image_processor

        real_images_directory = os.path.join(path, 'REAL')
        fake_images_directory = os.path.join(path, 'FAKE')

        real_images = []
        fake_images = []

        for file in os.listdir(real_images_directory):
            real_images.append(os.path.join(real_images_directory, file))

        for file in os.listdir(fake_images_directory):
            fake_images.append(os.path.join(fake_images_directory, file))

        self.images = []
        for i in range(min(len(real_images), len(fake_images))):
            self.images.append((real_images[i], torch.tensor([1, 0], dtype = torch.float32)))
            self.images.append((fake_images[i], torch.tensor([0, 1], dtype = torch.float32)))


        self.image_processor = image_processor

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx][0]

        image_processed = self.image_processor(images = Image.open(image_path), text = ["real", "fake"], return_tensors = "pt", padding = True)

        return image_processed['pixel_values'].squeeze(0), self.images[idx][1]


BATCHSIZE = 128
NEPOCH = 30

clip_processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

train_dataset = ImageDataset(os.path.join(path, 'train'), clip_processor)
eval_dataset = ImageDataset(os.path.join(path, 'test'), clip_processor)

train_dataloader = DataLoader(train_dataset, batch_size = BATCHSIZE, shuffle = True)
eval_dataloader = DataLoader(eval_dataset, batch_size = BATCHSIZE, shuffle = True)


preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

ResNet Initialization

In [4]:
resnet_model = torchvision.models.resnet18(pretrained = False)
resnet_model.fc = nn.Linear(resnet_model.fc.in_features, 2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet_model = resnet_model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet_model.parameters(), weight_decay=1e-3)

In [5]:
def train_step(input: Tensor, label: Tensor):
  resnet_model.train()

  input = input.to(device)
  label = label.to(device)

  output = resnet_model(input)
  loss = criterion(output, label)

  optimizer.zero_grad()
  loss.backward()

  torch.nn.utils.clip_grad_norm_(resnet_model.parameters(), max_norm=1.0)
  optimizer.step()

  return loss, output

In [6]:
@torch.no_grad()
def test_step(input: Tensor, label: Tensor):

    resnet_model.eval()

    input = input.to(device)
    label = label.to(device)

    output = resnet_model(input)

    loss = criterion(output, label)

    return loss, output

In [7]:
def train_epoch(dataloader):
    correct = 0
    samples = 0

    for i, (input, label) in enumerate(dataloader):
        loss, output = train_step(input, label)

        _, predicted = torch.max(output.data, 1)

        samples += label.size(0)
        # The label tensor has shape (batch_size, 2) - we need to compare predicted to the index with the maximum value in the label tensor
        correct += (predicted == torch.argmax(label, dim=1).to(device)).sum().item()

        if i % 100 == 0:
            print(f"Loss [{i}/{len(dataloader)}]: {loss.item()}")

    print(f"Train Accuracy: {correct / samples * 100.00}%")

In [8]:
def test_epoch(dataloader):
    correct = 0
    samples = 0

    for i, (input, label) in enumerate(dataloader):
        loss, output = test_step(input, label)

        _, predicted = torch.max(output.data, 1)

        samples += label.size(0)
        correct += (predicted == torch.argmax(label, dim=1).to(device)).sum().item()

        if i % 100 == 0:
            print(f"Loss [{i}/{len(dataloader)}]: {loss.item()}")

    print(f"Test Accuracy: {correct / samples * 100.00}%")

In [9]:
for epoch in range(NEPOCH):
    train_epoch(train_dataloader)

    test_epoch(eval_dataloader)

torch.save(resnet_model.state_dict(), 'resnet_model_parameters.pth')

Loss [0/782]: 0.8589203357696533
Loss [100/782]: 0.6664603352546692
Loss [200/782]: 0.6512901782989502
Loss [300/782]: 0.6230288743972778
Loss [400/782]: 0.5791623592376709
Loss [500/782]: 0.5656065940856934
Loss [600/782]: 0.5728536248207092
Loss [700/782]: 0.5549020171165466
Train Accuracy: 68.511%
Loss [0/157]: 0.560021698474884
Loss [100/157]: 0.5595813989639282
Test Accuracy: 76.385%
Loss [0/782]: 0.510342001914978
Loss [100/782]: 0.5768934488296509
Loss [200/782]: 0.5290824174880981
Loss [300/782]: 0.5111572742462158
Loss [400/782]: 0.5129848718643188
Loss [500/782]: 0.4073593020439148
Loss [600/782]: 0.3942256569862366
Loss [700/782]: 0.41779255867004395
Train Accuracy: 78.36800000000001%
Loss [0/157]: 0.4261056184768677
Loss [100/157]: 0.47119981050491333
Test Accuracy: 79.945%
Loss [0/782]: 0.4443354904651642
Loss [100/782]: 0.49199047684669495
Loss [200/782]: 0.4270360469818115
Loss [300/782]: 0.4170322120189667
Loss [400/782]: 0.3947206437587738
Loss [500/782]: 0.30724564194