In [None]:
dataset_dir = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/" 
print("Loading dataset from: " + dataset_dir)

In [None]:
import os
import random
from PIL import Image
from pathlib import Path
from timeit import default_timer as Timer
import matplotlib.pyplot as plt
import torch
from torch import nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import Subset, DataLoader, ConcatDataset, Dataset
from torchinfo import summary
import onnx
from tqdm.auto import tqdm
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay


print(torch.__version__)
print(torchvision.__version__)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
train_path = dataset_dir + "train"
test_path = dataset_dir + "test"
train_path, test_path

In [None]:
train_transform = transforms.Compose([
    transforms.RandomApply([
        transforms.GaussianBlur(kernel_size = 3, sigma = (0.1,0.3))
    ], p = 0.5),
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                        std = [0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                        std = [0.229, 0.224, 0.225])
])

In [None]:
train_data = datasets.ImageFolder(root = train_path,
                                 transform = train_transform,
                                 target_transform = None)
test_data = datasets.ImageFolder(root = test_path,
                                transform = test_transform)
len(train_data), len(test_data)

In [None]:
class_names = ["FAKE", "REAL"]

random_idx = random.randint(0, 100000)
plt.imshow(train_data[random_idx][0].permute(1,2,0))
plt.title(f"Image class: {class_names[train_data[random_idx][1]]} & Image shape : {train_data[random_idx][0].permute(1,2,0).shape}")
plt.axis(False);

In [None]:
BATCH_SIZE = 32
train_dataloaders = DataLoader(train_data,
                              BATCH_SIZE,
                              shuffle = True)
test_dataloaders = DataLoader(test_data,
                             BATCH_SIZE)
train_dataloaders, test_dataloaders

In [None]:
fig = plt.figure(figsize = (8,8))

fig.suptitle("Batch Images", fontsize=32)
rows, columns = 4, 8
for batch_idx, (img, label) in enumerate(train_dataloaders):
    if (batch_idx < 1):
        for i in range(1, rows * columns + 1):
            fig.add_subplot(rows, columns, i)
            plt.imshow(img[i-1].permute(1,2,0))
            plt.title(class_names[int(label[i-1])], fontsize = 12)
            plt.axis(False);

In [None]:
class CNNBlock(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.input_shape = input_shape
        self.Layer = torch.nn.Sequential(
            nn.Conv2d(input_shape, hidden_units, kernel_size = (3,3)),
            nn.BatchNorm2d(hidden_units),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, kernel_size = (3,3)),
            nn.BatchNorm2d(hidden_units),
            nn.ReLU(),
            nn.Conv2d(hidden_units, output_shape, kernel_size = (5,5)),
            nn.BatchNorm2d(output_shape),
            nn.ReLU()
        )
    def get_output_shape(self, input_height, input_width):
        x = torch.randn(1, self.input_shape, input_height, input_width)
        return self.Layer(x).shape[2:]

    def forward(self, x):
        return self.Layer(x)

class PatchEmbedding(nn.Module):
    def __init__(self,
                 in_channels: int,
                 patch_size: int,
                 embedding_dim: int) -> None:
        super().__init__()
        self.in_channels = in_channels
        self.patch_size = patch_size
        self.embedding_dim = embedding_dim
        self.patcher = nn.Conv2d(in_channels= in_channels,
                                 out_channels= embedding_dim,
                                 stride= patch_size,
                                 kernel_size= patch_size,
                                 padding= 0)
        self.flatten = nn.Flatten(start_dim= 2,
                                  end_dim= 3)

    def forward(self, x):
        image_res = x.shape[-1]
        x_patched = self.patcher(x)
        x_flattened = self.flatten(x_patched)
        return x_flattened.permute(0,2,1)
    
class MultiHeadSelfAttentionBlock(nn.Module):
    def __init__(self,
                 embedding_dim : int,
                 num_heads : int,
                 att_dropout : float):
        super().__init__()

        self.LayerNorm = nn.LayerNorm(normalized_shape= embedding_dim)

        self.MultiHeadAttention = nn.MultiheadAttention(embed_dim= embedding_dim,
                                                        num_heads= num_heads,
                                                        dropout= att_dropout,
                                                        batch_first= True)

    def forward(self, x):
        x = self.LayerNorm(x)
        attn_output, _ = self.MultiHeadAttention(query= x,
                                                 key= x,
                                                 value= x,
                                                 need_weights = False)
        return attn_output
    
class MultiLayerPreceptronBlock(nn.Module):
    def __init__(self,
                 embedding_dim: int,
                 mlp_size: int,
                 dropout: float):
        super().__init__()

        self.LayerNorm = nn.LayerNorm(normalized_shape= embedding_dim)

        self.MLP = nn.Sequential(
            nn.Linear(in_features= embedding_dim,
                      out_features= mlp_size),
            nn.GELU(),
            nn.Dropout(p=dropout),
            nn.Linear(in_features= mlp_size,
                      out_features= embedding_dim),
            nn.Dropout(p= dropout)
        )

    def forward(self, x):
        x = self.LayerNorm(x)
        x = self.MLP(x)
        return x
    
class TransformerEncoder(nn.Module):
    def __init__(self,
                 embedding_dim: int,
                 num_heads: int,
                 mlp_size: int,
                 attn_dropout: float,
                 mlp_dropout: float):
        super().__init__()
        self.MSA_Block = MultiHeadSelfAttentionBlock(embedding_dim= embedding_dim,
                                               num_heads= num_heads,
                                               att_dropout= attn_dropout)
        self.MLP_Block = MultiLayerPreceptronBlock(embedding_dim= embedding_dim,
                                             mlp_size= mlp_size,
                                             dropout= mlp_dropout)

    def forward(self, x):
        x = self.MSA_Block(x) + x
        x = self.MLP_Block(x) + x
        x = self.MSA_Block(x) + x
        return x
    
class ViTBlock(nn.Module):
    def __init__(self,
                 image_size: int,
                 in_channels: int,
                 patch_size: int,
                 num_transformer_layers: int,
                 embedding_dim: int,
                 mlp_size: int,
                 num_heads: int,
                 attn_dropout: float,
                 mlp_dropout: float,
                 embedding_dropout: float,
                 num_classes: int = 2):
        super().__init__()

        self.num_patches = (image_size // patch_size) ** 2

        self.class_embedding = nn.Parameter(torch.randn(1, 1, embedding_dim),
                                            requires_grad= True)

        self.position_embedding = nn.Parameter(torch.randn(1, self.num_patches + 1, embedding_dim),
                                               requires_grad= True)

        self.patch_embedding = PatchEmbedding(in_channels= in_channels,
                                              patch_size= patch_size,
                                              embedding_dim= embedding_dim)

        self.embedding_dropout = nn.Dropout(p = embedding_dropout)

        self.transformerencoder = nn.Sequential(* [TransformerEncoder(embedding_dim= embedding_dim,
                                                     num_heads= num_heads,
                                                     mlp_size= mlp_size,
                                                     attn_dropout= attn_dropout,
                                                     mlp_dropout= mlp_dropout) for _ in range(num_transformer_layers)])


    def forward(self, x):
        batch_size = x.shape[0]

        class_token = self.class_embedding.expand(batch_size, -1, -1)

        x = self.patch_embedding(x)

        x = torch.cat((class_token, x), dim = 1)

        x = self.position_embedding + x

        x = self.embedding_dropout(x)

        x = self.transformerencoder(x)

        return x
    
class AttentionMechBlock(nn.Module):
    def __init__(self, dim, units=128):
        super().__init__()
        self.query = nn.Linear(dim, units)
        self.key = nn.Linear(dim, units)
        self.value = nn.Linear(dim, units)
        self.LayerNorm = nn.LayerNorm(normalized_shape= units)

    def forward(self, x):
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        attn = torch.softmax(Q @ K.transpose(1,2) / (x.size(-1)**0.5), dim=-1)
        return self.LayerNorm((attn @ V).mean(dim=1))

class HybridModel(nn.Module):

    def __init__(self,
                 image_size: int,
                 in_channels: int,
                 hidden_units: int,
                 output_shape: int,
                 patch_size: int,
                 num_transformer_layers: int,
                 embedding_dim: int,
                 mlp_size: int,
                 num_heads: int,
                 attn_dropout: float,
                 mlp_dropout: float,
                 embedding_dropout: float,
                 units: int = 128,
                 num_classes: int = 2):
        super().__init__()
        self.CNNBlock = CNNBlock(input_shape= 3,
                                 hidden_units= hidden_units,
                                 output_shape= output_shape)
        self.cnn_output_height, self.cnn_output_width = self.CNNBlock.get_output_shape(image_size, image_size)
        self.ViTBlock = ViTBlock(image_size= self.cnn_output_height,
                                 in_channels= in_channels,
                                 patch_size= patch_size,
                                 num_transformer_layers= num_transformer_layers,
                                 embedding_dim= embedding_dim,
                                 mlp_size= mlp_size,
                                 num_heads= num_heads,
                                 attn_dropout= attn_dropout,
                                 mlp_dropout= mlp_dropout,
                                 embedding_dropout= embedding_dropout,
                                 num_classes= num_classes)
        self.AttentionMechBlock = AttentionMechBlock(dim= embedding_dim,
                                                     units= units)
        self.classifier = torch.nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p = mlp_dropout),
            nn.Linear(in_features= units,
                      out_features= num_classes)
        )

    def forward(self, x):
        x = self.CNNBlock(x)
        x = self.ViTBlock(x)
        x = self.AttentionMechBlock(x)
        x = self.classifier(x)
        return x

In [None]:
Hybrid_Model = HybridModel(image_size = 32,
                          in_channels = 64,
                          hidden_units = 32,
                          output_shape = 64,
                          patch_size = 5,
                          num_transformer_layers = 5,
                          embedding_dim = 256,
                          mlp_size = 2048,
                          num_heads = 128,
                          attn_dropout = 0.1,
                          mlp_dropout = 0,
                          embedding_dropout = 0,
                          units = 128,
                          num_classes = 2).to(device)
Hybrid_Model

In [None]:
import torchinfo
torchinfo.summary(model = Hybrid_Model,
                 input_size = (32,3,32,32),
                 col_names = ["input_size", "output_size", "num_params", "trainable"])

In [None]:
optimizer = torch.optim.Adam(Hybrid_Model.parameters(), lr = 0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer = optimizer,
    mode = "min",
    factor = 0.5,
    patience = 5,
    verbose = True
)
lrs = []
loss_func = torch.nn.CrossEntropyLoss()

In [None]:
import os
from tqdm import tqdm
import torch

# Create directories for saving models
checkpoint_dir = "/kaggle/working/checkpoints"
weights_dir = "/kaggle/working/weights"
os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(weights_dir, exist_ok=True)

epochs = 10

results = {
    "train_loss": [],
    "train_accuracy": [],
    "test_loss": [],
    "test_accuracy": []
}

for epoch in tqdm(range(epochs)):
    Hybrid_Model.train()
    train_loss, train_acc = 0, 0
    y_train_actual, y_train_predicted = [], []

    for batch_idx, (x, y) in enumerate(train_dataloaders):
        x, y = x.to(device), y.to(device)
        y_pred = Hybrid_Model(x)
        loss = loss_func(y_pred, y)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_class_pred = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_class_pred == y).sum().item() / len(y_pred)
        y_train_actual.extend(y.cpu().numpy())
        y_train_predicted.extend(y_class_pred.cpu().numpy())

    train_loss /= len(train_dataloaders)
    train_acc /= len(train_dataloaders)

    Hybrid_Model.eval()
    test_loss, test_acc = 0, 0
    y_test_actual, y_test_predicted = [], []

    with torch.inference_mode():
        for batch_idx, (x, y) in enumerate(test_dataloaders):
            x, y = x.to(device), y.to(device)
            y_pred = Hybrid_Model(x)
            loss = loss_func(y_pred, y)
            test_loss += loss.item()

            y_class_pred = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
            test_acc += (y_class_pred == y).sum().item() / len(y_pred)
            y_test_predicted.extend(y_class_pred.cpu().numpy())
            y_test_actual.extend(y.cpu().numpy())

    test_loss /= len(test_dataloaders)
    test_acc /= len(test_dataloaders)

    # Store results
    results["train_loss"].append(train_loss)
    results["train_accuracy"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_accuracy"].append(test_acc)

    # Get current learning rate and step scheduler
    curr_lr = optimizer.param_groups[0]["lr"]
    scheduler.step(test_loss)

    print(f"Epoch {epoch + 1}/{epochs} | LR {curr_lr:.6f} | "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
          f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.4f}")

    # Save checkpoint every 5 epochs
    if (epoch + 1) % 5 == 0:
        checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_epoch_{epoch+1}.pth")
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': Hybrid_Model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_loss': train_loss,
            'test_loss': test_loss,
            'train_accuracy': train_acc,
            'test_accuracy': test_acc,
            'results': results
        }, checkpoint_path)
        print(f"✅ Checkpoint saved at {checkpoint_path}")

# Save final model weights
final_weights_path = os.path.join(weights_dir, "final_weights.pth")
torch.save(Hybrid_Model.state_dict(), final_weights_path)
print(f"🎯 Final model weights saved at {final_weights_path}")

print("✅ Model Training Completed")


In [None]:
classification_report(y_train_actual, y_train_predicted)

In [None]:
classification_report(y_test_actual, y_test_predicted)

In [None]:
cm = confusion_matrix(y_train_actual, y_train_predicted)
disp = ConfusionMatrixDisplay(cm, display_labels = class_names)
disp.plot();

In [None]:
cm = confusion_matrix(y_test_actual, y_test_predicted)
disp = ConfusionMatrixDisplay(cm, display_labels = class_names)
disp.plot();

In [None]:
epochs = range(len(results["train_loss"]))
plt.figure(figsize=(15, 7))

plt.subplot(1, 2, 1)
plt.plot(epochs, results["train_loss"], label="train loss")
plt.plot(epochs, results["test_loss"], label="test loss")
plt.title("Loss")
plt.xlabel("Epochs")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, results["train_accuracy"], label="train accuracy")
plt.plot(epochs, results["test_accuracy"], label="test accuracy")
plt.title("Accuracy")
plt.xlabel("Epochs")
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
import os
import torch
import torch.onnx

# Set model to evaluation mode
Hybrid_Model.eval()

# Create output directory
onnx_dir = "/kaggle/working/onnx"
os.makedirs(onnx_dir, exist_ok=True)

# Define the full path for the ONNX model
onnx_path = os.path.join(onnx_dir, "hybrid_model.onnx")

# Create dummy input tensor (batch_size, channels, height, width)
dummy_input = torch.randn(1, 3, 32, 32, device=device, requires_grad=False)

# Export the model
torch.onnx.export(
    Hybrid_Model,                           # model being run
    dummy_input,                            # model input (or a tuple for multiple inputs)
    onnx_path,                              # where to save the model
    export_params=True,                     # store the trained parameter weights inside the model file
    opset_version=14,                       # the ONNX version to export the model to (14 is widely supported)
    do_constant_folding=True,               # whether to execute constant folding for optimization
    input_names=['input'],                  # the model's input names
    output_names=['output'],                # the model's output names
    dynamic_axes={
        'input': {0: 'batch_size'},         # variable batch size
        'output': {0: 'batch_size'}
    }
)

print(f"✅ Model successfully exported to ONNX format")
print(f"📁 ONNX model saved at: {onnx_path}")

# Verify the ONNX model
try:
    import onnx
    onnx_model = onnx.load(onnx_path)
    onnx.checker.check_model(onnx_model)
    print("✅ ONNX model verification passed")
except ImportError:
    print("⚠️ Install onnx package to verify the exported model: pip install onnx")
except Exception as e:
    print(f"⚠️ ONNX model verification failed: {e}")


In [None]:
import torch
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Path to the image
image_path = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/test/FAKE/0 (7).jpg"

# Define class names (adjust based on your dataset)
class_names = ['REAL', 'FAKE']

# Set model to evaluation mode
Hybrid_Model.eval()

# Define the same transforms used during training
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load and preprocess the image
def predict_single_image(image_path, model, transform, device):
    image = Image.open(image_path).convert('RGB')
    original_image = image.copy()
    image_tensor = transform(image)
    image_tensor = image_tensor.unsqueeze(0)
    image_tensor = image_tensor.to(device)
    
    with torch.inference_mode():
        output = model(image_tensor)
        probabilities = torch.softmax(output, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()
        confidence = probabilities[0][predicted_class].item()
    
    return original_image, predicted_class, confidence, probabilities[0]

# Make prediction
original_img, predicted_class, confidence, probs = predict_single_image(
    image_path, Hybrid_Model, transform, device
)

# Display results
plt.figure(figsize=(10, 5))

# Show image
plt.subplot(1, 2, 1)
plt.imshow(original_img)
plt.axis('off')
plt.title(f"Predicted: {class_names[predicted_class]} | Confidence: {confidence:.2%}")

# Show probability distribution
plt.subplot(1, 2, 2)
probs_numpy = probs.cpu().numpy()
bars = plt.bar(class_names, probs_numpy)
bars[predicted_class].set_color('green')
plt.ylabel('Probability')
plt.title('Class Probabilities')
plt.ylim([0, 1])

# Add percentage labels on bars
for i, (name, prob) in enumerate(zip(class_names, probs_numpy)):
    plt.text(i, prob + 0.02, f'{prob:.2%}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Print detailed results
print("=" * 50)
print(f"Image Path: {image_path}")
print(f"Predicted Class: {class_names[predicted_class]}")
print(f"Confidence: {confidence:.4f} ({confidence:.2%})")
print("=" * 50)
print("Class Probabilities:")
for i, name in enumerate(class_names):
    print(f"  {name}: {probs[i].item():.4f} ({probs[i].item():.2%})")
print("=" * 50)
