In [None]:
import os
from google.colab import files

# Define the path for the kaggle.json file
kaggle_dir = os.path.expanduser('~/.kaggle')
kaggle_json_path = os.path.join(kaggle_dir, 'kaggle.json')

# Ensure the .kaggle directory exists
os.makedirs(kaggle_dir, exist_ok=True)

# Check if the API key is already configured and working
api_works = False
if os.path.exists(kaggle_json_path):
    try:
        # A simple, quick command to test the API
        !kaggle datasets list -s "cats" --max-size 10m > /dev/null
        print("‚úÖ Kaggle API is already configured and working.")
        api_works = True
    except Exception as e:
        print("Kaggle.json found, but API is not working. Please re-upload.")
        os.remove(kaggle_json_path) # Remove the old, invalid file

if not api_works:
    print("üîë Please upload your kaggle.json file.")
    # Prompt user to upload the kaggle.json file
    uploaded = files.upload()

    # Move the file to the correct location and set permissions
    !mv kaggle.json {kaggle_dir}/
    !chmod 600 {kaggle_json_path}

    # Verify that the API works after the upload
    print("\nVerifying the API key...")
    try:
        !kaggle datasets list -s "cats" --max-size 10m > /dev/null
        print("‚úÖ Kaggle API configured successfully!")
    except Exception as e:
        print("‚ùå Error: The uploaded API key is not working. Please generate a new key from Kaggle and try again.")

üîë Please upload your kaggle.json file.


Saving kaggle.json to kaggle.json

Verifying the API key...
usage: kaggle datasets list [-h] [--sort-by SORT_BY] [--size SIZE]
                            [--file-type FILE_TYPE] [--license LICENSE_NAME]
                            [--tags TAG_IDS] [-s SEARCH] [-m] [--user USER]
                            [-p PAGE] [-v] [--max-size MAX_SIZE]
                            [--min-size MIN_SIZE]
kaggle datasets list: error: argument --max-size: invalid int value: '10m'
‚úÖ Kaggle API configured successfully!


In [None]:
# Kaggle API command to download the dataset
!kaggle datasets download -d manjilkarki/deepfake-and-real-images

# --- Selectively unzip only the necessary folders ---
print("\nUnzipping the 'train' folder...")
# The quotes are important to handle the path correctly
!unzip -q -o deepfake-and-real-images.zip 'Dataset/Train/*' -d .

print("Unzipping the 'validation' folder...")
!unzip -q -o deepfake-and-real-images.zip 'Dataset/Validation/*' -d .

# We leave 'Dataset/test/*' zipped to save space

print("\n‚úÖ Necessary folders unzipped successfully.")
# List the contents to verify that only the needed folders were created
!ls -R Dataset/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
fake_12600.jpg	fake_16137.jpg	fake_1997.jpg	fake_5532.jpg  fake_9069.jpg
fake_12601.jpg	fake_16138.jpg	fake_1998.jpg	fake_5533.jpg  fake_906.jpg
fake_12602.jpg	fake_16139.jpg	fake_1999.jpg	fake_5534.jpg  fake_9070.jpg
fake_12603.jpg	fake_1613.jpg	fake_199.jpg	fake_5535.jpg  fake_9071.jpg
fake_12604.jpg	fake_16140.jpg	fake_19.jpg	fake_5536.jpg  fake_9072.jpg
fake_12605.jpg	fake_16141.jpg	fake_1.jpg	fake_5537.jpg  fake_9073.jpg
fake_12606.jpg	fake_16142.jpg	fake_2000.jpg	fake_5538.jpg  fake_9074.jpg
fake_12607.jpg	fake_16143.jpg	fake_2001.jpg	fake_5539.jpg  fake_9075.jpg
fake_12608.jpg	fake_16144.jpg	fake_2002.jpg	fake_553.jpg   fake_9076.jpg
fake_12609.jpg	fake_16145.jpg	fake_2003.jpg	fake_5540.jpg  fake_9077.jpg
fake_1260.jpg	fake_16146.jpg	fake_2004.jpg	fake_5541.jpg  fake_9078.jpg
fake_12610.jpg	fake_16147.jpg	fake_2005.jpg	fake_5542.jpg  fake_9079.jpg
fake_12611.jpg	fake_16148.jpg	fake_2006.jpg	fake_5543.jpg  fake_907.

In [None]:
# Install the PyTorch Image Models library (quietly)
!pip install -q timm

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import timm

# Set the device to GPU if available, otherwise CPU
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

Using device: cuda


In [None]:
class HybridModel(nn.Module):
    def __init__(self, effnet_backbone, swin_backbone, num_classes=2):
        super(HybridModel, self).__init__()
        self.effnet = effnet_backbone
        self.swin = swin_backbone

        # Get the feature dimensions from each backbone
        effnet_features = self.effnet.num_features
        swin_features = self.swin.num_features
        concatenated_features = effnet_features + swin_features

        print(f"EfficientNet feature size: {effnet_features}")
        print(f"Swin Transformer feature size: {swin_features}")
        print(f"Concatenated feature size: {concatenated_features}")

        # Define the new classifier head
        self.classifier = nn.Sequential(
            nn.Linear(concatenated_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        features_effnet = self.effnet(x)
        features_swin = self.swin(x)
        combined_features = torch.cat((features_effnet, features_swin), dim=1)
        output = self.classifier(combined_features)
        return output

In [None]:
def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct_predictions += torch.sum(preds == labels.data)
        total_samples += inputs.size(0)

    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions.double() / total_samples
    print(f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct_predictions += torch.sum(preds == labels.data)
            total_samples += inputs.size(0)

    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions.double() / total_samples
    print(f"Validation Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
    return epoch_acc

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# --- IMPORTANT: Upload your model files to the Colab sidebar FIRST ---
EFFNET_PATH = 'deepfake_detection_model.pth'
SWIN_PATH = 'best_swin_model.pth'

if not os.path.exists(EFFNET_PATH) or not os.path.exists(SWIN_PATH):
    raise FileNotFoundError(f"Error: Make sure model files are uploaded.")

# --- Load Backbones ---
effnet_backbone = timm.create_model('efficientnet_b0', pretrained=False, num_classes=0)
effnet_backbone.load_state_dict(torch.load(EFFNET_PATH, map_location=DEVICE), strict=False)

swin_backbone = timm.create_model('swin_tiny_patch4_window7_224', pretrained=False, num_classes=0)
swin_backbone.load_state_dict(torch.load(SWIN_PATH, map_location=DEVICE), strict=False)

# --- Create the Hybrid Model ---
hybrid_model = HybridModel(effnet_backbone, swin_backbone, num_classes=2).to(DEVICE)

# --- Prepare DataLoaders ---
BATCH_SIZE = 32
TRAIN_DIR = 'Dataset/Train'
VAL_DIR = 'Dataset/Validation'
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=transform)
val_dataset = datasets.ImageFolder(root=VAL_DIR, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
criterion = nn.CrossEntropyLoss()

print("‚úÖ Models loaded and DataLoaders are ready.")

EfficientNet feature size: 1280
Swin Transformer feature size: 768
Concatenated feature size: 2048
‚úÖ Models loaded and DataLoaders are ready.


In [None]:
print("\n--- Training Classifier Head for 3 Epochs & Saving the Best Version ---")
NUM_EPOCHS = 3
best_val_acc = 0.0

# --- Define where to save the model in your Google Drive ---
save_dir = '/content/drive/My Drive/MyModels'
best_model_path = os.path.join(save_dir, 'best_hybrid_model_head_only.pth')
os.makedirs(save_dir, exist_ok=True)

# Freeze backbones, unfreeze classifier
for param in hybrid_model.effnet.parameters():
    param.requires_grad = False
for param in hybrid_model.swin.parameters():
    param.requires_grad = False
for param in hybrid_model.classifier.parameters():
    param.requires_grad = True

optimizer = optim.AdamW(hybrid_model.classifier.parameters(), lr=1e-3)

for epoch in range(NUM_EPOCHS):
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
    train_one_epoch(hybrid_model, train_loader, criterion, optimizer, DEVICE)
    val_acc = validate(hybrid_model, val_loader, criterion, DEVICE)

    # --- Logic to save the best model ---
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(hybrid_model.state_dict(), best_model_path)
        print(f"‚úÖ New best model saved to Drive with accuracy: {best_val_acc:.4f}")

print("\nTraining complete!")
print(f"The best model was saved to: {best_model_path}")


--- Training Classifier Head for 3 Epochs & Saving the Best Version ---

Epoch 1/3
Train Loss: 0.0306 Acc: 0.9881
Validation Loss: 0.0514 Acc: 0.9831
‚úÖ New best model saved to Drive with accuracy: 0.9831

Epoch 2/3
Train Loss: 0.0267 Acc: 0.9896
Validation Loss: 0.0544 Acc: 0.9849
‚úÖ New best model saved to Drive with accuracy: 0.9849

Epoch 3/3
Train Loss: 0.0247 Acc: 0.9902
Validation Loss: 0.0656 Acc: 0.9832

Training complete!
The best model was saved to: /content/drive/My Drive/MyModels/best_hybrid_model_head_only.pth


In [None]:
print("\n--- Evaluating the best model on the Test Set ---")

# --- 1. Define the path to the saved model in your Drive ---
saved_model_path = '/content/drive/My Drive/MyModels/best_hybrid_model_head_only.pth'

if not os.path.exists(saved_model_path):
    raise FileNotFoundError("Could not find the saved model in Google Drive. Did the previous cell run correctly?")

# --- 2. Unzip the Test folder ---
TEST_DIR = 'Dataset/Test'
if not os.path.exists(TEST_DIR):
    print("Unzipping the 'test' folder...")
    !unzip -q -o deepfake-and-real-images.zip 'Dataset/Test/*' -d .

# --- 3. Create the Test DataLoader ---
test_dataset = datasets.ImageFolder(root=TEST_DIR, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# --- 4. Re-create the model architecture ---
effnet_backbone_test = timm.create_model('efficientnet_b0', pretrained=False, num_classes=0)
swin_backbone_test = timm.create_model('swin_tiny_patch4_window7_224', pretrained=False, num_classes=0)
test_model = HybridModel(effnet_backbone_test, swin_backbone_test, num_classes=2).to(DEVICE)

# --- 5. Load the saved weights from Google Drive ---
test_model.load_state_dict(torch.load(saved_model_path, map_location=DEVICE))
print(f"Loaded best model from '{saved_model_path}'")

# --- 6. Run the evaluation ---
print("\nCalculating test accuracy...")
test_acc = validate(test_model, test_loader, criterion, DEVICE)

print(f"\nüéØ Final Test Accuracy: {test_acc:.4f}")


--- Evaluating the best model on the Test Set ---
EfficientNet feature size: 1280
Swin Transformer feature size: 768
Concatenated feature size: 2048
Loaded best model from '/content/drive/My Drive/MyModels/best_hybrid_model_head_only.pth'

Calculating test accuracy...
Validation Loss: 0.5215 Acc: 0.9045

üéØ Final Test Accuracy: 0.9045
