# Benchmark Vision Models on CIFake Dataset

This notebook benchmarks convolutional and transformer-based networks defined in `vision_models.py` on the [CIFake dataset](https://www.kaggle.com/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images).


In [None]:

!pip install -q kaggle torchvision


In [None]:

import os
from pathlib import Path
import numpy as np
import torch
from torchvision import datasets, transforms
from vision_models import MODEL_REGISTRY
from sklearn.metrics import accuracy_score
from pipelines_torch.benchmark import BenchmarkRunner

DATA_DIR = Path('cifake_data')


In [None]:

# Requires Kaggle API credentials available as environment variables
# KAGGLE_USERNAME and KAGGLE_KEY. See https://www.kaggle.com/docs/api.
if not DATA_DIR.exists():
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    !kaggle datasets download -d birdy654/cifake-real-and-ai-generated-synthetic-images -p $DATA_DIR --unzip


In [None]:

transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

train_ds = datasets.ImageFolder(DATA_DIR / 'train', transform=transform)
X = torch.stack([img for img, _ in train_ds]).numpy()
y = np.array(train_ds.targets)

class_names = train_ds.classes
num_classes = len(class_names)


In [None]:

model_names = ['simple_cnn', 'dropout_cnn', 'residual_cnn', 'resnet50', 'vision_transformer', 'clip_classifier']

model_configs = [
    {'name': name, 'class': MODEL_REGISTRY[name], 'params': {'num_classes': num_classes}}
    for name in model_names
]

runner = BenchmarkRunner(
    model_configs=model_configs,
    augmentations=[None],
    metrics=[accuracy_score],
    task_type='classification',
    device='cpu',
    epochs=1,
    batch_size=64,
    use_kfold=False,
)

results_df = runner.run(X, y)
results_df
