In [2]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch
from transformers import ViTForImageClassification, ViTFeatureExtractor, AdamW
from torch.optim import Adam
from tqdm.auto import tqdm
from PIL import Image
from torchvision import transforms
import os
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, Dataset
from sklearn.model_selection import KFold

In [3]:
# Load the pretrained ViT model and processor
model_name = "google/vit-base-patch16-224-in21k"  # Pretrained ViT model
model = ViTForImageClassification.from_pretrained(model_name, num_labels=2)  # 2 classes (original, fake)

# Processor: This will handle input transformation and resizing for ViT
processor = ViTFeatureExtractor.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



In [4]:

# Custom Dataset Class
class DeepfakeDataset(Dataset):
    def __init__(self, image_dir, label, processor):
        self.image_dir = image_dir
        self.label = label
        self.processor = processor
        self.image_files = [
            f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.jpeg'))
        ]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        filepath = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(filepath).convert("RGB")

        # Use the processor to handle the transformation (resize, normalization, etc.)
        encoding = self.processor(images=image, return_tensors="pt")
        label = torch.tensor(self.label)

        # Return the processed image and label
        return encoding["pixel_values"].squeeze(0), label

# Combine datasets into a single Dataset
class CombinedDataset(Dataset):
    def __init__(self, datasets):
        self.datasets = datasets

    def __len__(self):
        return sum(len(dataset) for dataset in self.datasets)

    def __getitem__(self, idx):
        for dataset in self.datasets:
            if idx < len(dataset):
                return dataset[idx]
            idx -= len(dataset)


train_original_path = "/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/Training_original/Training_original/Training_original"
train_fake_path = "/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/Training_fake/Training_fake/Training_fake"
val_original_path = "/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/Validation_original/Validation_original/Validation_original"
val_fake_path = "/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/Validation_fake/Validation_fake/Validation_fake"

# Create individual datasets
train_original_dataset = DeepfakeDataset(train_original_path, label=0, processor=processor)
train_fake_dataset = DeepfakeDataset(train_fake_path, label=1, processor=processor)
val_original_dataset = DeepfakeDataset(val_original_path, label=0, processor=processor)
val_fake_dataset = DeepfakeDataset(val_fake_path, label=1, processor=processor)

# Combine datasets
train_dataset = CombinedDataset([train_original_dataset, train_fake_dataset])
val_dataset = CombinedDataset([val_original_dataset, val_fake_dataset])



In [7]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.AdamW(model.parameters(), lr=5e-5)

def train_and_evaluate(train_dataset, k=5):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    device = "cuda" if torch.cuda.is_available() else "cpu"

    model.to(device)

    for fold, (train_indices, val_indices) in enumerate(kf.split(range(len(train_dataset)))):
        print(f"Fold {fold + 1}/{k}")

        # Split dataset
        train_subset = Subset(train_dataset, train_indices)
        val_subset = Subset(train_dataset, val_indices)

        train_loader = DataLoader(train_subset, batch_size=16, shuffle=True, num_workers=2)
        val_loader = DataLoader(val_subset, batch_size=16, shuffle=False, num_workers=2)

        for epoch in range(5):
            model.train()
            train_loss = 0.0

            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)

                # Forward pass
                outputs = model(pixel_values=images).logits
                loss = criterion(outputs, labels)

                # Backward pass
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                train_loss += loss.item()

            print(f"Epoch {epoch + 1}, Training Loss: {train_loss / len(train_loader)}")

            # Validation
            model.eval()
            val_loss = 0.0
            correct = 0
            total = 0

            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(pixel_values=images).logits
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    # Accuracy
                    _, predicted = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            val_accuracy = 100 * correct / total
            print(f"Validation Loss: {val_loss / len(val_loader)}, Accuracy: {val_accuracy}%")


In [8]:
train_and_evaluate(train_dataset, k=5)


Fold 1/5
Epoch 1, Training Loss: 0.4418938216811793
Validation Loss: 0.2664209430894026, Accuracy: 90.82125603864735%
Epoch 2, Training Loss: 0.20349348855659294
Validation Loss: 0.2478113189124717, Accuracy: 90.45893719806763%
Epoch 3, Training Loss: 0.1756137048708644
Validation Loss: 0.24777071691977864, Accuracy: 91.66666666666667%
Epoch 4, Training Loss: 0.14086205009281058
Validation Loss: 0.21129064137438455, Accuracy: 93.71980676328502%
Epoch 5, Training Loss: 0.1351396611077342
Validation Loss: 0.17943106267529613, Accuracy: 94.56521739130434%
Fold 2/5
Epoch 1, Training Loss: 0.14920686289290155
Validation Loss: 0.15475243600443578, Accuracy: 95.04830917874396%
Epoch 2, Training Loss: 0.13775476096152972
Validation Loss: 0.11199492806246361, Accuracy: 96.25603864734299%
Epoch 3, Training Loss: 0.12966195399904454
Validation Loss: 0.11822826662459053, Accuracy: 96.13526570048309%
Epoch 4, Training Loss: 0.10557525206114286
Validation Loss: 0.10263611536580496, Accuracy: 96.7391

In [9]:
save_path = "/content/drive/My Drive/Models/cross_validation_vit"
import os

# Create the directory if it doesn't exist
os.makedirs(save_path, exist_ok=True)

# Save the model and processor
model.save_pretrained(save_path)
processor.save_pretrained(save_path)


['/content/drive/My Drive/Models/cross_validation_vit/preprocessor_config.json']

In [3]:
from transformers import ViTForImageClassification, ViTImageProcessor, AutoProcessor
from PIL import Image
import os
import torch

# Paths to external test datasets
fake_dir = "/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/External_test1/External_test1/External_test1/FAKE"
original_dir = "/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/External_test1/External_test1/External_test1/ORIGINAL"


load_path = "/content/drive/My Drive/Models/cross_validation_vit"

# Load the model and processor
model = ViTForImageClassification.from_pretrained(load_path)
processor = AutoProcessor.from_pretrained(load_path)

# Define a function to preprocess images and make predictions
def predict_images(image_dir, label, processor, model):
    predictions = []
    true_labels = []
    model.eval()  # Set model to evaluation mode

    for filename in os.listdir(image_dir):
        if filename.endswith(('.jpg', '.png', '.jpeg')):  # Check for image files
            filepath = os.path.join(image_dir, filename)

            # Load and preprocess the image
            image = Image.open(filepath).convert("RGB")
            inputs = processor(images=image, return_tensors="pt")

            # Run inference
            with torch.no_grad():
                outputs = model(**inputs)
                predicted_label = torch.argmax(outputs.logits, dim=-1).item()

            # Store the prediction and true label
            predictions.append(predicted_label)
            true_labels.append(label)

    return predictions, true_labels

# Predict on fake images
fake_predictions, fake_labels = predict_images(fake_dir, label=1, processor=processor, model=model)

# Predict on original images
original_predictions, original_labels = predict_images(original_dir, label=0, processor=processor, model=model)

# Combine results
all_predictions = fake_predictions + original_predictions
all_true_labels = fake_labels + original_labels

# Calculate accuracy
correct = sum([1 for pred, true in zip(all_predictions, all_true_labels) if pred == true])
total = len(all_true_labels)
accuracy = correct / total

print(f"Accuracy on the external test dataset 1: {accuracy * 100:.2f}%")




Accuracy on the external test dataset 1: 99.34%


In [4]:

fake_dir2="/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/External_test2/External_test2/External_test2/Fake"
original_dir2="/content/drive/MyDrive/DEEPFAKE AUDIO/k47yd3m28w-4/External_test2/External_test2/External_test2/Original"


fake_predictions2, fake_labels2 = predict_images(fake_dir2, label=1, processor=processor, model=model)
original_predictions2, original_labels2 = predict_images(original_dir2, label=0, processor=processor, model=model)

all_predictions2 = fake_predictions2 + original_predictions2
all_true_labels2 = fake_labels2 + original_labels2

correct2=sum([1 for pred, true in zip(all_predictions2, all_true_labels2) if pred == true])
total2 = len(all_true_labels2)
accuracy2 = correct2 / total2

print(f"Accuracy on the external test dataset 2: {accuracy2 * 100:.2f}%")

Accuracy on the external test dataset 2: 100.00%
