<a href="https://colab.research.google.com/github/kamaleshpantra/Synthetic-Image-Detector/blob/main/Synthetic_Image_detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import GradScaler, autocast

from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset, random_split

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


Reproducibility

In [4]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

Project Paths

In [5]:
PROJECT_ROOT = "/content/drive/MyDrive/synthetic-detector"
DATA_PATH = os.path.join(PROJECT_ROOT, "data")

TRAIN_PATH = os.path.join(DATA_PATH, "train")
TEST_PATH  = os.path.join(DATA_PATH, "test")

MODEL_DIR = os.path.join(PROJECT_ROOT, "outputs/models")
os.makedirs(MODEL_DIR, exist_ok=True)

print("Train path:", TRAIN_PATH)
print("Test path:", TEST_PATH)

Train path: /content/drive/MyDrive/synthetic-detector/data/train
Test path: /content/drive/MyDrive/synthetic-detector/data/test


Load CIFAKE Dataset

In [6]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

train_full_dataset = ImageFolder(root=TRAIN_PATH, transform=transform)
test_full_dataset  = ImageFolder(root=TEST_PATH, transform=transform)

print("Classes:", train_full_dataset.classes)
print("Train size:", len(train_full_dataset))
print("Test size:", len(test_full_dataset))

Classes: ['FAKE', 'REAL']
Train size: 30002
Test size: 10000


Balanced Subset

In [46]:
targets = train_full_dataset.targets
indices = []

samples_per_class = 2000
class_counts = {0: 0, 1: 0}

for idx, label in enumerate(targets):
    if class_counts[label] < samples_per_class:
        indices.append(idx)
        class_counts[label] += 1

    if all(v == samples_per_class for v in class_counts.values()):
        break

train_subset = Subset(train_full_dataset, indices)

print("Balanced train subset:", len(train_subset))
print("Class distribution:", class_counts)

Balanced train subset: 4000
Class distribution: {0: 2000, 1: 2000}


In [47]:
targets = test_full_dataset.targets
indices = []

samples_per_class = 500
class_counts = {0: 0, 1: 0}

for idx, label in enumerate(targets):
    if class_counts[label] < samples_per_class:
        indices.append(idx)
        class_counts[label] += 1

    if all(v == samples_per_class for v in class_counts.values()):
        break

test_subset = Subset(test_full_dataset, indices)

print("Balanced test subset:", len(test_subset))
print("Class distribution:", class_counts)

Balanced test subset: 1000
Class distribution: {0: 500, 1: 500}


Train / Validation Split

In [48]:
train_size = int(0.85 * len(train_subset))
val_size = len(train_subset) - train_size

train_dataset, val_dataset = random_split(
    train_subset,
    [train_size, val_size]
)

print("Final Train:", len(train_dataset))
print("Validation:", len(val_dataset))

Final Train: 3400
Validation: 600


Data Loader

In [49]:
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_full_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

print("DataLoaders ready.")

DataLoaders ready.


Build ResNet18

In [50]:
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Freeze early layers
for param in list(model.parameters())[:-10]:
    param.requires_grad = False

num_features = model.fc.in_features

model.fc = nn.Sequential(
    nn.Linear(num_features, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, 2)
)

model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scaler = GradScaler()

print("Model ready.")

Model ready.


  scaler = GradScaler()


Training & Validation

In [51]:
def validate(model, loader):
    model.eval()
    correct = 0
    total = 0
    val_loss = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    print(f"Validation Loss: {val_loss/len(loader):.4f}")
    print(f"Validation Accuracy: {correct/total:.4f}")


def train_model(model, train_loader, val_loader, epochs=5):
    for epoch in range(epochs):
        model.train()
        train_loss = 0

        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item()

        print(f"\nEpoch [{epoch+1}/{epochs}] Train Loss: {train_loss/len(train_loader):.4f}")
        validate(model, val_loader)

        # Safety checkpoint
        if (epoch+1) % 2 == 0:
            torch.save(model.state_dict(), os.path.join(MODEL_DIR, "temp_checkpoint.pth"))

Train

In [53]:
train_model(model, train_loader, val_loader, epochs=3)

  with autocast():
100%|██████████| 54/54 [31:56<00:00, 35.50s/it]



Epoch [1/3] Train Loss: 0.4517
Validation Loss: 0.4141
Validation Accuracy: 0.8667


100%|██████████| 54/54 [00:19<00:00,  2.74it/s]



Epoch [2/3] Train Loss: 0.3692
Validation Loss: 0.3943
Validation Accuracy: 0.8933


100%|██████████| 54/54 [00:19<00:00,  2.81it/s]



Epoch [3/3] Train Loss: 0.3432
Validation Loss: 0.3893
Validation Accuracy: 0.8883


In [16]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(0))

CUDA available: True
Current device: 0
Device name: Tesla T4


Evaluation

In [None]:
def evaluate_model(model, loader):
    model.eval()

    all_preds = []
    all_labels = []
    fake_confidences = []
    real_confidences = []

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)

            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

            for i in range(len(labels)):
                if labels[i] == 1:
                    fake_confidences.append(probs[i,1].item())
                else:
                    real_confidences.append(probs[i,0].item())

    acc = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    print("Accuracy:", acc)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Average confidence (Real):", np.mean(real_confidences))
    print("Average confidence (Fake):", np.mean(fake_confidences))

    return all_labels, all_preds, fake_confidences, real_confidences

In [None]:
labels, preds, fake_conf, real_conf = evaluate_model(model, test_loader)

Confusion Matrix

In [None]:
cm = confusion_matrix(labels, preds)

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Real", "Fake"],
            yticklabels=["Real", "Fake"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

Confidence Distribution

In [None]:
plt.hist(fake_conf, bins=30, alpha=0.7, label="Fake Confidence")
plt.hist(real_conf, bins=30, alpha=0.7, label="Real Confidence")
plt.legend()
plt.title("Confidence Distribution")
plt.xlabel("Confidence")
plt.ylabel("Frequency")
plt.show()

Save Final Model

In [None]:
MODEL_PATH = os.path.join(MODEL_DIR, "resnet18_synthetic_detector.pth")
torch.save(model.state_dict(), MODEL_PATH)

print(f"Model saved at {MODEL_PATH}")