In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class RealFakeDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []
        self.transform = transform
        
        # Iterate over each generator folder (skip json_files)
        for gen_folder in os.listdir(root_dir):
            gen_path = os.path.join(root_dir, gen_folder)
            if not os.path.isdir(gen_path) or gen_folder.lower() == "json_files":
                continue
            
            # Look for real and fake subfolders (case-insensitive)
            for label_name in os.listdir(gen_path):
                label_path = os.path.join(gen_path, label_name)
                if not os.path.isdir(label_path):
                    continue
                
                label_lower = label_name.lower()
                if label_lower not in ["real", "fake"]:
                    continue
                
                label = 0 if label_lower == "real" else 1
                for file in os.listdir(label_path):
                    if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                        self.samples.append((os.path.join(label_path, file), label))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Use the correct root_dir
root_dir = r"D:\Python ML\hackathon\DeepGuardDB_v1"
dataset = RealFakeDataset(root_dir, transform=transform)

print(f"Total samples found: {len(dataset)}")
for path, label in dataset.samples[:5]:
    print(path, label)

# Now we can make a DataLoader
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


Total samples found: 13000
D:\Python ML\hackathon\DeepGuardDB_v1\DALLE_dataset\fake\000000000009.jpg.png 1
D:\Python ML\hackathon\DeepGuardDB_v1\DALLE_dataset\fake\000000000025.jpg.png 1
D:\Python ML\hackathon\DeepGuardDB_v1\DALLE_dataset\fake\000000000030.jpg.png 1
D:\Python ML\hackathon\DeepGuardDB_v1\DALLE_dataset\fake\000000000034.jpg.png 1
D:\Python ML\hackathon\DeepGuardDB_v1\DALLE_dataset\fake\000000000042.jpg.png 1


In [2]:
from torch.utils.data import random_split

# Define split ratios
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")


Train: 9100, Val: 1950, Test: 1950


In [3]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet50 pretrained
model = models.resnet50(pretrained=True)

# Replace final layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes: real(0) or fake(1)

model = model.to(device)

# Loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Minahil/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100.0%


In [4]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=5):
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_acc = 100 * correct / total
        
        # Validation
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
        
        val_acc = 100 * val_correct / val_total
        print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")

train_model(model, train_loader, val_loader, criterion, optimizer, epochs=5)


Epoch 1/5 | Loss: 79.3471 | Train Acc: 87.84% | Val Acc: 92.51%
Epoch 2/5 | Loss: 30.8959 | Train Acc: 95.98% | Val Acc: 93.54%
Epoch 3/5 | Loss: 17.6496 | Train Acc: 97.67% | Val Acc: 94.21%
Epoch 4/5 | Loss: 17.2617 | Train Acc: 97.71% | Val Acc: 93.18%
Epoch 5/5 | Loss: 12.9669 | Train Acc: 98.34% | Val Acc: 93.64%


In [5]:
def evaluate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    print(f"Test Accuracy: {100 * correct / total:.2f}%")

evaluate(model, test_loader)

Test Accuracy: 93.90%


In [9]:
def predict_image(image_path, model):
    model.eval()
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
        return "Real" if predicted.item() == 0 else "Fake"

# Example
print(predict_image(r"C:\Users\Minahil\OneDrive\Pictures\Saved Pictures\IMG_20230427_151113_888.jpg", model))

Real


In [10]:
print(predict_image(r"D:\minahil\ChatGPT Image Aug 6, 2025, 02_41_53 PM.png", model))

Fake


In [12]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.48.1-py3-none-any.whl.metadata (9.5 kB)
Collecting altair!=5.4.0,!=5.4.1,<6,>=4.0 (from streamlit)
  Downloading altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.5.0 (from streamlit)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<7,>=4.0 (from streamlit)
  Downloading cachetools-6.1.0-py3-none-any.whl.metadata (5.4 kB)
Collecting click<9,>=7.0 (from streamlit)
  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting pandas<3,>=1.4.0 (from streamlit)
  Downloading pandas-2.3.1-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting protobuf<7,>=3.20 (from streamlit)
  Downloading protobuf-6.31.1-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-21.0.0-cp312-cp312-win_amd64.whl.metadata (3.4 kB)
Collecting requests<3,>=2.27 (from streamlit)
  Downloading requests-2.32.4-py3-none-any.whl.metadata (4.9 kB)
Collec