In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ai-generated-images-vs-real-images/test/fake/3863.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/2664.png
/kaggle/input/ai-generated-images-vs-real-images/test/fake/3750.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/5982.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/2539.png
/kaggle/input/ai-generated-images-vs-real-images/test/fake/1231.png
/kaggle/input/ai-generated-images-vs-real-images/test/fake/0106.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/0375.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/3919.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/1017.png
/kaggle/input/ai-generated-images-vs-real-images/test/fake/3757.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/2437.png
/kaggle/input/ai-generated-images-vs-real-images/test/fake/4489.jpg
/kaggle/input/ai-generated-images-vs-real-images/test/fake/3138.jpg
/kaggle/input/ai-generated-images-vs-real-images

In [26]:
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image, ImageFile



# Handle problematic images safely
ImageFile.LOAD_TRUNCATED_IMAGES = True
Image.MAX_IMAGE_PIXELS = None  # allow very large images safely

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on:", DEVICE)


Running on: cuda


In [48]:
DATASET_ROOT = "/kaggle/input/ai-generated-images-vs-real-images"

TRAIN_DIR = os.path.join(DATASET_ROOT, "train")
TEST_DIR  = os.path.join(DATASET_ROOT, "test")

print("Train folders:", os.listdir(TRAIN_DIR))
print("Test folders :", os.listdir(TEST_DIR))

Train folders: ['fake', 'real']
Test folders : ['fake', 'real']


In [49]:
IMG_SIZE = 128
VECTOR_DIM = IMG_SIZE * IMG_SIZE * 3
COMPRESSED_DIM = 512

# NOTE: Resize is handled via thumbnail() during caching
transform = transforms.ToTensor()

In [50]:
projection_matrix = torch.randn(
    VECTOR_DIM, COMPRESSED_DIM
) / (COMPRESSED_DIM ** 0.5)

projection_matrix = projection_matrix.to(DEVICE)
projection_matrix.requires_grad = False

In [53]:
def build_projected_cache(split, save_path):
    vectors = []
    labels = []
    class_map = {"real": 0, "fake": 1}

    print(f"🔄 Building cache for {split}...")

    for cls in ["real", "fake"]:
        folder = os.path.join(DATASET_ROOT, split, cls)

        for img_name in os.listdir(folder):
            if not img_name.lower().endswith((".jpg", ".jpeg", ".png")):
                continue

            path = os.path.join(folder, img_name)

            try:
                # FAST & SAFE image load
                img = Image.open(path)
                img.thumbnail((IMG_SIZE, IMG_SIZE), Image.BILINEAR)
                img = img.convert("RGB")

                vec = transform(img).view(-1).to(DEVICE)

                with torch.no_grad():
                    proj = vec @ projection_matrix

                vectors.append(proj.cpu())
                labels.append(class_map[cls])

            except Exception as e:
                # Skip problematic images safely
                continue

    X = torch.stack(vectors)
    y = torch.tensor(labels)

    torch.save((X, y), save_path)
    print(f"✅ Saved {len(y)} samples → {save_path}")
print("Loader  ✅")

Loader  ✅


In [33]:
TRAIN_CACHE = "/kaggle/working/train_proj.pt"
TEST_CACHE  = "/kaggle/working/test_proj.pt"

if not os.path.exists(TRAIN_CACHE):
    build_projected_cache("train", TRAIN_CACHE)

if not os.path.exists(TEST_CACHE):
    build_projected_cache("test", TEST_CACHE)

print("Cache files:", os.listdir("/kaggle/working"))


🔄 Building cache for train...




✅ Saved 15074 samples → /kaggle/working/train_proj.pt
🔄 Building cache for test...
✅ Saved 3766 samples → /kaggle/working/test_proj.pt
Cache files: ['test_proj.pt', '.virtual_documents', 'train_proj.pt']


In [54]:
class ProjectedDataset(Dataset):
    def __init__(self, path):
        self.X, self.y = torch.load(path)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [55]:
train_loader = DataLoader(
    ProjectedDataset(TRAIN_CACHE),
    batch_size=2048,
    shuffle=True
)

test_loader = DataLoader(
    ProjectedDataset(TEST_CACHE),
    batch_size=2048,
    shuffle=False
)

In [56]:
class FastClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(COMPRESSED_DIM, 128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

    def forward(self, x):
        return self.net(x)


In [61]:
model = FastClassifier().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

print("🚀 Training started")
start = time.time()

for epoch in range(3):
    model.train()
    total_loss = 0.0

    for x, y in train_loader:
        x = x.to(DEVICE)
        y = y.to(DEVICE)

        out = model(x)
        loss = criterion(out, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1} | Loss: {total_loss / len(train_loader):.4f}")

print(f"Training finished in {time.time() - start:.2f} seconds")


🚀 Training started
Epoch 1 | Loss: 0.1978
Epoch 2 | Loss: 0.1361
Epoch 3 | Loss: 0.1189
Training finished in 0.63 seconds


In [62]:
model.eval()
correct = total = 0

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(DEVICE)
        y = y.to(DEVICE)

        preds = model(x).argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)

print(f"🏆 Test Accuracy: {100 * correct / total:.2f}%")

🏆 Test Accuracy: 98.04%


In [63]:
def predict_image(img_path):
    img = Image.open(img_path).convert("RGB")
    vec = transform(img).view(-1).to(DEVICE)

    with torch.no_grad():
        proj = vec @ projection_matrix
        logits = model(proj.unsqueeze(0))
        probs = F.softmax(logits, dim=1)[0]

    label = "AI Generated" if probs[1] > probs[0] else "Real Image"
    return label, probs.max().item()

In [68]:
import os
print(os.listdir("/kaggle/working"))

['test_proj.pt', '.virtual_documents', 'train_proj.pt']


In [None]:
from PIL import Image
import torch.nn.functional as F

def predict_image(image_path):
    """
    Predict whether an image is AI-generated or real.
    """

    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")

    # Load image safely (handles huge images)
    img = Image.open(image_path)
    img.thumbnail((IMG_SIZE, IMG_SIZE), Image.BILINEAR)
    img = img.convert("RGB")

    # Convert to vector
    vec = transform(img).view(-1).to(DEVICE)

    with torch.no_grad():
        projected = vec @ projection_matrix
        logits = model(projected.unsqueeze(0))
        probs = F.softmax(logits, dim=1)[0]

    confidence, pred_class = torch.max(probs, dim=0)

    label = "AI GENERATED" if pred_class.item() == 1 else "REAL IMAGE"

    return {
        "prediction": label,
        "confidence_percent": round(confidence.item() * 100, 2),
        "probabilities": {
            "real": round(probs[0].item() * 100, 2),
            "ai": round(probs[1].item() * 100, 2)
        }
    }
print("FINAL INFERENCE FUNCTION added sucessfully ✅")

In [None]:
from PIL import Image
import torch.nn.functional as F

def predict_image(image_path):
    """
    Predict whether an image is AI-generated or real.
    """

    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")

    # Load image safely (handles huge images)
    img = Image.open(image_path)
    img.thumbnail((IMG_SIZE, IMG_SIZE), Image.BILINEAR)
    img = img.convert("RGB")

    # Convert to vector
    vec = transform(img).view(-1).to(DEVICE)

    with torch.no_grad():
        projected = vec @ projection_matrix
        logits = model(projected.unsqueeze(0))
        probs = F.softmax(logits, dim=1)[0]

    confidence, pred_class = torch.max(probs, dim=0)

    label = "AI GENERATED" if pred_class.item() == 1 else "REAL IMAGE"

    return {
        "prediction": label,
        "confidence_percent": round(confidence.item() * 100, 2),
        "probabilities": {
            "real": round(probs[0].item() * 100, 2),
            "ai": round(probs[1].item() * 100, 2)
        }
    }
print("Something has happened")

In [None]:
result = predict_image("/kaggle/input/test-dataset/test_images.jpg")
print(result)