# Synthetic Image Detection
---

### Mk-0.2  :- Basic CNN (Multi-generator training)
* Used CNN with...


---

### Data-preprocessing (GenImage -> BigGAN+VQDM dataset) :-

*   Resizing all the images to 224x224 (std. for CNN based models). CNN expects fixed input shape. Avoids inconsistent pixel distributions.
*   Resizing was done locally using python + cmd.


### Requirments

In [None]:
%pip install -r requirements.txt


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import os
import time
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
import torch # used torch instead of PyTorch for felxibility and efficiency
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler, autocast
from PIL import Image


In [2]:
# ensures Reproducibility (Phirse same results on every run)
import random

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)


### Data loading

In [6]:
# directories
train_dir = rf"D:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\Datasets\GenImage\BigGAN+VQDM\train"
test_dir = rf"D:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\Datasets\GenImage\BigGAN+VQDM\val"

# Check contents of the folders
print("Train Directory:", os.listdir(train_dir))
print("Test Directory:", os.listdir(test_dir))


Train Directory: ['ai', 'nature']
Test Directory: ['ai', 'nature']


In [7]:
# Data preprocesors and loaders

# images are already resized
train_transform = transforms.Compose([
    # transforms.RandomCrop(224, padding=8),                                      # it keeps aspect ratio but adds slight random offset
    transforms.RandomHorizontalFlip(p=0.5),                                     # horizontal flip with 50% probability
    # transforms.RandomRotation(15),                                              # rotate ±15 degrees
    # transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15),
    # transforms.GaussianBlur(3, sigma=(0.1, 1.0)),                               # slight blur,  helps fight generator-specific noise 
    transforms.ToTensor(),                                                      # convert to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])          # normalize to ImageNet stats
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load datasets (Safe -> safely skips broken/missing files with datasets.ImageFolder)
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=val_transform)

# Dataloaders
batch_size = 32     # 32 images per batch
num_workers = 1     # safe for my 12 cores CPU (preventing overload)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,    # number of parallel CPU threads preloading data while GPU trains
    pin_memory=True,           # pin_memory=True-> speeds up CPU→GPU transfer.
    persistent_workers=False,    # persistent_workers=True -> keeps workers alive between epochs (faster)
    prefetch_factor=1           # Loads next batches while GPU is training (works only if num_workers > 0)
)

val_loader = DataLoader(test_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=num_workers, 
    pin_memory=True, 
    persistent_workers=False, 
    prefetch_factor=1
)

print(f"Train samples: {len(train_dataset)} | Val samples: {len(test_dataset)}")
print(f"Classes: {train_dataset.classes}")


Train samples: 647996 | Val samples: 24000
Classes: ['ai', 'nature']


### Checking loaders & resource (GPU)

In [3]:
import torch
print(torch.cuda.is_available())
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))


True
NVIDIA GeForce GTX 1660 Ti


In [16]:
# Check for corrupted images in dataset folders
FOLDERS = [
    rf"C:\Users\sarth\Dataset\train\ai",
    rf"C:\Users\sarth\Dataset\train\nature",
    rf"C:\Users\sarth\Dataset\val\ai",
    rf"C:\Users\sarth\Dataset\val\nature"
]

bad_files = []

for folder in FOLDERS:
    print(f"Checking folder: {folder}")
    files = [
        f for f in os.listdir(folder)
        if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))
    ]
    for f in tqdm(files, desc=os.path.basename(folder)):
        path = os.path.join(folder, f)
        try:
            with Image.open(path) as img:
                img.verify()  # Checks for corruption
        except Exception as e:
            bad_files.append((path, str(e)))

print(f"\nScan complete. {len(bad_files)} bad images found.")
if bad_files:
    print("Examples:")
    for bf in bad_files[:10]:
        print("  ", bf[0])


Checking folder: C:\Users\sarth\Dataset\train\ai


ai:   4%|▍         | 13869/323996 [00:01<00:42, 7373.63it/s]


KeyboardInterrupt: 

### Defining basic CNN

In [None]:
# SImpleCNN upgraded with explicit Grad-CAM layer and regularization
class SimpleCNN_v2(nn.Module):
    def __init__(self):
        super(SimpleCNN_v2, self).__init__()

        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32), # stabilizes gradients
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Explicit Grad-CAM layer
        self.last_conv = nn.Conv2d(64, 256, 3, padding=1)
        self.bn_last   = nn.BatchNorm2d(256)
        self.relu_last = nn.ReLU()
        self.pool_last = nn.MaxPool2d(2)

        # Global average pooling and classifier
        self.gap = nn.AdaptiveAvgPool2d(1) # Reduces Overfitting and enables Grad-CAM (Regularization)

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.ReLU(),           # Adds non-linearity before FC layers
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.4),     # Regularization (Reduces overfitting)
            nn.Linear(64, 1)
        )

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.last_conv(x)
        x = self.bn_last(x)
        x = self.relu_last(x)
        x = self.pool_last(x)
        x = self.gap(x)
        x = self.fc_layers(x)
        return x


### Model Training

In [8]:
# Setup for training with checkpointing
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN_v2().to(device)

criterion = nn.BCEWithLogitsLoss()      # More stable than BCELoss with raw outputs (stable binary classification)
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)  # L2 Regularization

# Learning Rate Scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.7) # reduces LR only if validation accuracy plateaus
scaler = torch.amp.GradScaler("cuda") # for safe scaling during backprop

# Maintaing history for analysis
history = {"train_loss": [], "val_acc": [], "val_loss": [], "lr": []}

# Directory for saving checkpoints
checkpoint_dir = "checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, "last_checkpoint.pth")

start_epoch = 0
best_acc = 0.0

# Load checkpoint if resuming
if os.path.exists(checkpoint_path):
    print("Resuming from last checkpoint...")
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint["model_state"])
    optimizer.load_state_dict(checkpoint["optimizer_state"])
    scheduler.load_state_dict(checkpoint["scheduler_state"])
    scaler.load_state_dict(checkpoint["scaler_state"])
    start_epoch = checkpoint["epoch"] + 1
    best_acc = checkpoint.get("best_acc", 0.0)
    print(f"Resumed from epoch {start_epoch} with best accuracy {best_acc:.4f}")

patience = 3  # for early stopping: stop after 3 epochs with no improvement
patience_counter = 0
epochs = 10

# Training loop
for epoch in range(start_epoch, epochs):
    model.train()
    running_loss = 0.0
    print(f"Epoch {epoch+1}/{epochs}: ")

    with tqdm(train_loader, desc="Training", unit="batch") as tepoch:
        for images, labels in tepoch:
            images, labels = images.to(device), labels.float().unsqueeze(1).to(device)

            optimizer.zero_grad()               # clear gradients for next batch
            with torch.amp.autocast("cuda"):    # Mixed precision forward
                outputs = model(images)
                loss = criterion(outputs, labels)

            # backward pass
            scaler.scale(loss).backward()
            # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)    # Gradients stable is not needed as we stabalised it with BatchNorm and Mixed Precision
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            tepoch.set_postfix(loss=loss.item())


    # Validation
    model.eval()
    correct, total, val_loss = 0, 0, 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss_val = criterion(outputs, labels.unsqueeze(1).float())
            val_loss += loss_val.item()
            preds = (torch.sigmoid(outputs) > 0.5).float()
            correct += (preds == labels.unsqueeze(1)).sum().item()
            total += labels.size(0)

    val_acc = correct / total
    val_loss /= len(val_loader)
    avg_loss = running_loss / len(train_loader)

    scheduler.step(val_acc)

    if (epoch + 1) % 2 == 0 and device.type == "cuda":
        print("Clearing unused CUDA memory to avoid fragmentation...")
        torch.cuda.empty_cache()        # clear unused memory every 2 epochs to avoid fragmentation

    current_lr = optimizer.param_groups[0]['lr'] # get current learning rate

    print(f"Epoch {epoch+1} | Train Loss: {avg_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | LR: {current_lr:.6f}")

    history["train_loss"].append(avg_loss)
    history["val_acc"].append(val_acc)
    history["val_loss"].append(val_loss)
    history["lr"].append(current_lr)

    # Proactive early stopping based on validation accuracy
    if val_acc > best_acc:
        best_acc = val_acc
        patience_counter = 0  # reset counter
        best_checkpoint = {
            "epoch": epoch,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "scheduler_state": scheduler.state_dict(),
            "scaler_state": scaler.state_dict(),
            "best_acc": best_acc
        }
        torch.save(best_checkpoint, os.path.join(checkpoint_dir, "best_checkpoint.pth"))
        print(f"Best model updated! New Val Acc: {best_acc:.4f}")
    else:
        patience_counter += 1
        print(f"No improvement for {patience_counter}/{patience} epochs.")

    # Initiate early stop
    if patience_counter >= patience:
        print("\nEarly stopping initiated: no improvement for 3 epochs.")
        torch.save(model.state_dict(), os.path.join(checkpoint_dir, "final_best_model.pth"))
        print("Final best model saved before stopping.")
        break


    # Save checkpoints
    checkpoint = {
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "scaler_state": scaler.state_dict(),
        "best_acc": best_acc
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at epoch {epoch+1}")

    print("Initiating next epoch in 20 seconds...")
    time.sleep(20)  # CPU cooldown before next epoch



print("\nTraining complete 🔥")


  checkpoint = torch.load(checkpoint_path, map_location=device)


Resuming from last checkpoint...
Resumed from epoch 1 with best accuracy 0.8860
Epoch 2/10: 


Training:  16%|█▌        | 3237/20250 [34:12<2:59:46,  1.58batch/s, loss=0.286]  


KeyboardInterrupt: 

In [None]:
torch.cuda.empty_cache() # Clears VRAM memory cache


### Analysis

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

all_preds, all_labels = [], []

model.eval()  # Make sure model is in evaluation mode
with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        preds = (outputs > 0.5).float()
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(cm, display_labels=["Real", "Fake"])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Final Model")
plt.show()

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Final validation accuracy
val_acc = (all_preds == all_labels).mean()
print(f"Final Validation Accuracy: {val_acc:.4f}")


In [None]:
import torch, numpy as np
from sklearn.metrics import classification_report, roc_auc_score

model.eval()
all_probs, all_preds, all_labels = [], [], []
val_loss = 0.0
criterion = nn.BCEWithLogitsLoss()

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.float().unsqueeze(1).to(device)
        outputs = model(images)                    # logits
        loss = criterion(outputs, labels)
        val_loss += loss.item() * images.size(0)
        probs = torch.sigmoid(outputs).cpu().numpy().ravel()
        preds = (probs > 0.5).astype(int)
        all_probs.extend(probs.tolist())
        all_preds.extend(preds.tolist())
        all_labels.extend(labels.cpu().numpy().ravel().astype(int).tolist())

val_loss = val_loss / len(val_dataset)
print(f"Val loss: {val_loss:.4f}, Val acc: {np.mean(np.array(all_preds)==np.array(all_labels)):.4f}")

# Classification report + AUC
print(classification_report(all_labels, all_preds, target_names=train_dataset.classes))
try:
    auc = roc_auc_score(all_labels, all_probs)
    print("ROC AUC:", auc)
except Exception as e:
    print("ROC AUC could not be computed:", e)


### Testing on VQDM val dataset

In [None]:
ADM_test_dir = rf"E:\Datasets\ADM\imagenet_ai_0508_adm\val"
ADM_val_dataset = datasets.ImageFolder(root=ADM_test_dir, transform=val_transform)
ADM_val_loader = DataLoader(ADM_val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN_v2().to(device)
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for images, labels in ADM_val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = (torch.sigmoid(outputs) > 0.5).float()
        correct += (preds == labels.unsqueeze(1)).sum().item()
        total += labels.size(0)

val_acc = correct / total
scheduler.step(val_acc)


RuntimeError: Caught RuntimeError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "d:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\cuda-venv\Lib\site-packages\torch\utils\data\_utils\worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "d:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\cuda-venv\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 55, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "d:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\cuda-venv\Lib\site-packages\torch\utils\data\_utils\collate.py", line 398, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\cuda-venv\Lib\site-packages\torch\utils\data\_utils\collate.py", line 211, in collate
    return [
           ^
  File "d:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\cuda-venv\Lib\site-packages\torch\utils\data\_utils\collate.py", line 212, in <listcomp>
    collate(samples, collate_fn_map=collate_fn_map)
  File "d:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\cuda-venv\Lib\site-packages\torch\utils\data\_utils\collate.py", line 155, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\sarthak\RCOEM\3rd\Projects\ML\Synthetic Media detection\cuda-venv\Lib\site-packages\torch\utils\data\_utils\collate.py", line 272, in collate_tensor_fn
    return torch.stack(batch, 0, out=out)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: stack expects each tensor to be equal size, but got [3, 256, 256] at entry 0 and [3, 333, 500] at entry 16


In [None]:
print(val_acc)


In [None]:
torch.cuda.empty_cache()


In [None]:
all_preds_VQDM, all_labels_VQDM = [], []

model.eval()  # Make sure model is in evaluation mode
with torch.no_grad():
    for images, labels in ADM_val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        preds = (outputs > 0.5).float()
        all_preds_VQDM.extend(preds.cpu().numpy())
        all_labels_VQDM.extend(labels.cpu().numpy())

# Confusion matrix
cm = confusion_matrix(all_labels_VQDM, all_preds_VQDM)
disp = ConfusionMatrixDisplay(cm, display_labels=["Real", "Fake"])
disp.plot(cmap=plt.cm.Reds)
plt.title("Confusion Matrix - VQDM val dataset")
plt.show()

all_preds_VQDM = np.array(all_preds_VQDM)
all_labels_VQDM = np.array(all_labels_VQDM)

# Final validation accuracy
val_acc = (all_preds_VQDM == all_labels_VQDM).mean()
print(f"Final VQDM Validation Accuracy: {val_acc:.4f}")


### Evaluation

In [None]:
import torch
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

def evaluate_images(model, image_paths, transform, device):
    """
    Display predictions for multiple input images with confidence scores.

    Args:
        model: Trained PyTorch model.
        image_paths: List of image file paths.
        transform: Transform pipeline (e.g., val_transform).
        device: 'cuda' or 'cpu'.
    """

    model.eval()
    plt.figure(figsize=(14, 6))

    for i, img_path in enumerate(image_paths):
        try:
            # Load and preprocess image
            img = Image.open(img_path).convert("RGB")
            img_t = transform(img).unsqueeze(0).to(device)

            # Predict
            with torch.no_grad():
                output = model(img_t)
                prob = torch.sigmoid(output).item()

            # Classification decision
            label = "Fake (AI-generated)" if prob > 0.5 else "Real (original)"
            conf = prob if prob > 0.5 else 1 - prob

            # Plotting
            plt.subplot(2, (len(image_paths) + 1) // 2, i + 1)
            plt.imshow(np.array(img))
            plt.axis("off")
            plt.title(f"{label}\nConf: {conf:.3f}", fontsize=10)

        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    plt.tight_layout()
    plt.show()



In [None]:
image_paths = []

evaluate_images(model, image_paths, val_transform, device)
