<a href="https://colab.research.google.com/github/partho2001/forgery_detection/blob/main/forgery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import zipfile
import os


zip_path = '/content/drive/MyDrive/forgery.zip'
extract_path = '/content/CASIA'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)


os.listdir(extract_path)


['CASIA2']

In [57]:
rm -rf `find -type d -name .ipynb_checkpoints`

In [58]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

# Define a simple CNN model
class ForgeryDetectionModel(nn.Module):
    def __init__(self):
        super(ForgeryDetectionModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)  # Adjusted input size based on resized images
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define data transformations
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

# Load CASIA 2.0 dataset, excluding non-image files and Thumbs.db
root_folder = '/content/CASIA/CASIA2'

# Filter out non-image files and Thumbs.db
dataset = datasets.ImageFolder(root=root_folder, transform=transform, is_valid_file=lambda fname: fname.lower().endswith(('.png', '.jpeg', '.jpg', '.bmp', '.ppm', '.pgm', '.tif', '.tiff', '.webp')) and 'Thumbs.db' not in fname)

# Split the dataset into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create DataLoader for training and testing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Initialize the model, loss function, and optimizer
model = ForgeryDetectionModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {accuracy * 100:.2f}%')


Test Accuracy: 99.88%


In [51]:
import os

# Print the classes in the dataset folder
dataset_folder = '/content/CASIA2'
classes = [d for d in os.listdir(dataset_folder) if os.path.isdir(os.path.join(dataset_folder, d))]
print("Classes:", classes)

# Print the number of images in each class
for class_name in classes:
    class_path = os.path.join(dataset_folder, class_name)
    num_images = len([f for f in os.listdir(class_path) if f.endswith('.jpg') or f.endswith('.png')])
    print(f"Class: {class_name}, Num Images: {num_images}")

Classes: ['CASIA 2 Groundtruth', 'Au', '.ipynb_checkpoints']
Class: CASIA 2 Groundtruth, Num Images: 5123
Class: Au, Num Images: 7437
Class: .ipynb_checkpoints, Num Images: 0
