## Imports

In [1]:
!pip install pyav

Collecting pyav
  Downloading pyav-14.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Downloading pyav-14.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.8/34.8 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyav
Successfully installed pyav-14.1.0


In [2]:
import itertools
import torch
import os
import random
import numpy as np
import torchvision
from torch.utils.data import DataLoader, IterableDataset, get_worker_info
from torchvision.datasets.folder import make_dataset
from torchvision import transforms as t
import torch.nn as nn
import torchvision
from torchvision.models import ResNet50_Weights
from tqdm import tqdm
import torch.optim as optim
from torchvision.transforms import v2
from torch.jit import script, trace, trace_module
import time
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

## Utils Class

In [3]:
class TrainingUtilities:
    @staticmethod
    def save_model(model, model_descriptor, save_folder, verbose=0):
        torch.save(model.state_dict(), save_folder + f"/{model_descriptor}b1_model.pth")
        if verbose > 0:
            print(f"Saved model to {save_folder}/b1_model.pth")

    @staticmethod
    def save_checkpoint(epoch, model_state_dict, optimizer_state_dict, scheduler_state_dict=None, save_folder='', verbose=0):
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model_state_dict,
            'optimizer_state_dict': optimizer_state_dict,
            'scheduler_state_dict': scheduler_state_dict
        }
        torch.save(checkpoint, save_folder + f'/checkpoint-epoch{epoch}.pth')
        if verbose > 0:
            print(f'Saved checkpoint to {save_folder}/checkpoint-epoch{epoch}.pth')

    @staticmethod
    def load_checkpoint(model, optimizer, checkpoint_path, scheduled, verbose=0):
        checkpoint = torch.load(checkpoint_path)

        if verbose > 0:
            print(f"Loading checkpoint from {checkpoint_path}")

        epoch = checkpoint['epoch']
        model_state_dict = checkpoint['model_state_dict']
        optimizer_state_dict = checkpoint['optimizer_state_dict']
        scheduler_state_dict = checkpoint['scheduler_state_dict']
        model = model.load_state_dict(model_state_dict)
        if scheduled:
            optimizer.load_state_dict(optimizer_state_dict, scheduler_state_dict)
        else:
            optimizer = optimizer.load_state_dict(optimizer_state_dict)
        return epoch, model, optimizer

## Dataset

In [4]:
def get_samples(root, extensions=(".mp4", ".avi")):
    samples = []

    # Define class labels
    class_to_idx = {
        "DFD_original sequences": 0,  # Real videos
        "DFD_manipulated_sequences": 1  # Deepfake videos
    }

    for class_name, label in class_to_idx.items():
        class_dir = os.path.join(root, class_name)
        if class_name == 'DFD_manipulated_sequences':
            class_dir = os.path.join(class_dir, class_name)
        print(class_dir)
        if not os.path.exists(class_dir):
            continue

        # Get all video files in the directory
        for filename in os.listdir(class_dir):
            if filename.endswith(extensions):
                file_path = os.path.join(class_dir, filename)
                samples.append((file_path, label))

    return samples

def set_seed(seed=None, seed_torch=True):
  """
  Function that controls randomness. NumPy and random modules must be imported.

  Args:
    seed : Integer
      A non-negative integer that defines the random state. Default is `None`.
    seed_torch : Boolean
      If `True` sets the random seed for pytorch tensors, so pytorch module
      must be imported. Default is `True`.

  Returns:
    Nothing.
  """
  if seed is None:
    seed = np.random.choice(2 ** 32)
  random.seed(seed)
  np.random.seed(seed)
  if seed_torch:
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
  print(f'Random seed {seed} has been set.')

def get_datasets(root, splits, epoch_size=None, frame_transform=None, video_transform=None, clip_len=16, seed=2024):
    train_split = splits[0]
    val_split = splits[1]
    test_split = splits[2]

    samples = get_samples(root)
    print(samples[0])
    print(samples[-1])
    
    set_seed(seed, seed_torch=True)
    random.shuffle(samples)

    start, end = train_split
    train_samples = samples[start:end]
    start, end = val_split
    val_samples = samples[start:end]
    start, end = test_split
    test_samples = samples[start:end]

    train_dataset = VideosDataset(train_samples, frame_transform=frame_transform, video_transform=video_transform, clip_len=clip_len)
    val_dataset = VideosDataset(val_samples, frame_transform=frame_transform, video_transform=video_transform, clip_len=clip_len)
    test_dataset = VideosDataset(test_samples, frame_transform=frame_transform, video_transform=video_transform, clip_len=clip_len)

    return train_dataset, val_dataset, test_dataset



class VideosDataset(torch.utils.data.IterableDataset):
    def __init__(self, samples, epoch_size=None, frame_transform=None, video_transform=None, clip_len=16):
        super(VideosDataset).__init__()
        self.samples = samples

        # Allow for temporal jittering
        if epoch_size is None:
            epoch_size = len(self.samples)
        self.epoch_size = epoch_size

        self.clip_len = clip_len
        if frame_transform is None:
            self.frame_transform = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True), v2.Resize(255),
                                 v2.CenterCrop(224)])
        else:
            self.frame_transform = frame_transform

        self.video_transform = video_transform

    def __len__(self):
        return self.epoch_size

    def __pad_video(self, video_frames):
        """Prepad video frames to match clip length."""
        n = len(video_frames)
        if n == self.clip_len:
            return video_frames

        # Create zero frames
        pad_tensor = torch.zeros_like(video_frames[0])
        pad_frames = [pad_tensor] * (self.clip_len - n)  # List of zero frames

        return pad_frames + video_frames  # Prepadding at the beginning

    def __iter__(self):
        for i in range(self.epoch_size):
            # Get random sample
            path, target = random.choice(self.samples)
            # Get video object
            vid = torchvision.io.VideoReader(path, "video")
            metadata = vid.get_metadata()
            video_frames = []  # video frame buffer

            for frame in itertools.islice(vid, self.clip_len):
                video_frames.append(self.frame_transform(frame['data']))
            video_frames = self.__pad_video(video_frames)
            # Stack it into a tensor
            video = torch.stack(video_frames, 0)
            if self.video_transform:
                video = self.video_transform(video)
            output = {
                'video': video,
                'target': target
            }
            yield output


## Model

In [5]:
class CNN_LSTM(nn.Module):
    def __init__(self, hidden_dim=512, num_layers=3, num_classes=2):
        super(CNN_LSTM, self).__init__()
        cnn = torchvision.models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
        self.feature_extractor = nn.Sequential(*list(cnn.children())[:-1])
        self.pool = nn.AdaptiveAvgPool2d((1, 1))  # Global Average Pooling to (B, 2048, 1, 1)

        self.lstm = nn.LSTM(input_size=2048, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        batch_size, frames, C, H, W = x.shape  # (Batch, Frames, C, H, W)

        x = x.view(batch_size * frames, C, H, W)  # (Batch × Frames, C, H, W)
        features = self.feature_extractor(x)
        features = self.pool(features).squeeze(-1).squeeze(-1)  # (Batch × Frames, 2048)

        features = features.view(batch_size, frames, -1)  # (Batch, Frames, Feature_Dim)
        lstm_out, _ = self.lstm(features)
        out = self.fc(lstm_out[:, -1, :])
        return out

## Loss

In [6]:
class Loss(nn.Module):
    def __init__(self, class_real=364, class_fake=3068):
        super(Loss, self).__init__()
        num_samples = class_real + class_fake
        self.real_weight = num_samples / (2 * class_real)
        self.fake_weight = num_samples / (2 * class_fake)
        weights = torch.tensor([self.real_weight, self.fake_weight], dtype=torch.float)
        self.loss = nn.CrossEntropyLoss(weight=weights)

    def forward(self, outputs, target):
        return self.loss(outputs, target)

## Trainer

In [7]:
class ModelTrainer:
    def __init__(self, model, optimizer, scheduled, criterion, epochs, dataloaders, device, save_folder,
                 is_continue=False, checkpoint=None):
        self.model = model
        self.optimizer = optimizer
        self.scheduled = scheduled
        self.criterion = criterion
        self.epochs = epochs
        self.dataloaders = dataloaders
        self.DEVICE = device
        self.save_folder = save_folder
        self.is_continue = is_continue
        self.checkpoint = checkpoint
        self.scaler = torch.amp.GradScaler()

    def train_model(self, verbose=0):
        model, optimizer, criterion, epochs, dataloaders = self.model, self.optimizer, self.criterion, self.epochs, self.dataloaders
        device = self.DEVICE
        scaler = self.scaler
        training_epoch = 0
        epoch = 0
        if self.is_continue:

            if verbose > 0:
                print(f"Continuing from checkpoint {self.checkpoint}")

            epoch, model, optimizer = TrainingUtilities.load_checkpoint(model, optimizer, self.checkpoint, self.scheduled, verbose)
        
        train_losses = []
        val_losses = []
        test_losses = []

        val_accuracies = []
        train_accuracies = []
        test_accuracies = []
        
        for training_epoch in range(epoch, epochs):
            print(f"\nTraining epoch {training_epoch+1}")
           

            avg_train_loss = 0
            avg_val_loss = 0
            val_accuracy = 0
            train_accuracy = 0
            train_time = 0
            val_time = 0
            val_f1 = 0
            for phase in ['train', 'val']:
                if phase == 'train':
                    start_time = time.time()
                    # print("Training phase.....")
                    train_loader = dataloaders['train']
                    model.train()
                    train_loss = 0
                    correct_train = 0
                    total_train = 0
                    for batch in train_loader:
                        video = batch['video'].to(device)  # (Batch, Frames, C, H, W)
                        target = batch['target'].to(device)

                        optimizer.zero_grad()
                        with torch.amp.autocast('cuda'):
                            outputs = model(video)
                            loss = criterion(outputs, target)

                        scaler.scale(loss).backward()
                        scaler.step(optimizer)
                        scaler.update()

                        train_loss += loss.item()
                        _, predicted = torch.max(outputs, 1)
                        correct_train += (predicted == target).sum().item()
                        total_train += target.size(0)

                    avg_train_loss = train_loss / len(train_loader)
                    train_accuracy = correct_train / total_train
                    end_time = time.time()
                    train_time = end_time - start_time
                    formatted_time = time.strftime("%Mmins %Ssecs", time.gmtime(train_time))
                    print(f"Training completed in {formatted_time}")
                else:
                    # print("Validation phase.....")
                    val_loader = dataloaders['val']
                    start_time = time.time()
                    avg_val_loss, val_accuracy, val_f1 = self.evaluate(val_loader, training_epoch)
                    end_time = time.time()
                    val_time = end_time - start_time
                    formatted_time = time.strftime("%Mmins %Ssecs", time.gmtime(val_time))
                    print(f"Validation completed in {formatted_time}")
            train_losses.append(avg_train_loss)
            val_losses.append(avg_val_loss)
            train_accuracies.append(train_accuracy)
            val_accuracies.append(val_accuracy)
            print(
                f"Epoch [{training_epoch + 1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f}, "
                f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}, Val F1: {val_f1:.4f}"
            )
            start_time = time.time()
            avg_test_loss, test_accuracy = self.test(dataloaders['test'], training_epoch+1)
            end_time = time.time()
            test_time = end_time - start_time
            formatted_time = time.strftime("%Mmins %Ssecs", time.gmtime(test_time))
            print(f"Testing completed in {formatted_time}")
            test_losses.append(avg_test_loss)
            test_accuracies.append(test_accuracy)

        return train_losses, val_losses, test_losses, train_accuracies, val_accuracies, test_accuracies

    def test(self, test_loader, epoch):
        model, criterion, device = self.model, self.criterion, self.DEVICE
        model.eval()
        test_loss = 0
        correct_test = 0
        total_test = 0
        y_true, y_pred = [], []

        with torch.no_grad():
            with torch.amp.autocast('cuda'):
                for batch in test_loader:
                    video = batch['video'].to(device)
                    target = batch['target'].to(device)

                    outputs = model(video)
                    loss = criterion(outputs, target)
                    test_loss += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    correct_test += (predicted == target).sum().item()
                    total_test += target.size(0)

                    # Store predictions and true labels for F1-score
                    y_true.extend(target.cpu().numpy())
                    y_pred.extend(predicted.cpu().numpy())

        avg_test_loss = test_loss / len(test_loader)
        test_accuracy = correct_test / total_test
        test_f1 = f1_score(y_true, y_pred, average='weighted')

        print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}, Test F1 Score: {test_f1:.4f}")
        TrainingUtilities.save_model(model, f'model_epoch{epoch}-acc{test_accuracy:.2f}', self.save_folder)
        return avg_test_loss, test_accuracy

    def evaluate(self, val_loader, epoch, verbose=0):
        model, criterion, device = self.model, self.criterion, self.DEVICE
        model.eval()
        val_loss = 0
        correct_val = 0
        total_val = 0
        y_true, y_pred = [], []

        with torch.no_grad():
            with torch.amp.autocast('cuda'):
                for batch in val_loader:
                    video = batch['video'].to(device)
                    target = batch['target'].to(device)

                    outputs = model(video)
                    loss = criterion(outputs, target)
                    val_loss += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    correct_val += (predicted == target).sum().item()
                    total_val += target.size(0)

                    # Store predictions and true labels for F1-score
                    y_true.extend(target.cpu().numpy())
                    y_pred.extend(predicted.cpu().numpy())

        val_accuracy = correct_val / total_val
        avg_val_loss = val_loss / len(val_loader)
        val_f1 = f1_score(y_true, y_pred, average='weighted')

        return avg_val_loss, val_accuracy, val_f1

## Code

In [8]:
dataset_root = "/kaggle/input/deep-fake-detection-dfd-entire-original-dataset"
dataset_size = len(get_samples(dataset_root))
dataset_size

/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences
/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_manipulated_sequences/DFD_manipulated_sequences


3431

In [9]:
# Define split sizes
train_size = int(0.7 * dataset_size)  # 70% for training
val_size = int(0.15 * dataset_size)   # 15% for validation
test_size = dataset_size - train_size - val_size  # 15% for testing

print(f'train: {train_size}, test: {val_size}, val: {test_size}')

train: 2401, test: 514, val: 516


In [10]:
# Split dataset
train_split = (0,train_size)
val_split = (train_size, val_size + train_size)
test_split = (val_size + train_size, val_size + train_size + test_size)

splits = [train_split, val_split, test_split]
frames= 10
train_dataset, val_dataset, test_dataset = get_datasets(root=dataset_root,splits=splits, clip_len=frames)
# train_dataset = RandomDataset(root=dataset_root, clip_len=frames, split=train_split)
# val_dataset = RandomDataset(root=dataset_root, clip_len=frames, split=val_split)
# test_dataset = RandomDataset(root=dataset_root, clip_len=frames, split=test_split)

batch_size = 30
print(f'train: {train_dataset.__len__()}, test: {val_dataset.__len__()}, val: {test_dataset.__len__()}')# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size,drop_last=True)

dataloaders = {'train': train_loader, 'val': val_loader, 'test': test_loader}

/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences
/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_manipulated_sequences/DFD_manipulated_sequences
('/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences/26__walking_down_street_outside_angry.mp4', 0)
('/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_manipulated_sequences/DFD_manipulated_sequences/06_02__exit_phone_room__3J3BHSHI.mp4', 1)
Random seed 2024 has been set.
train: 2401, test: 514, val: 516


In [11]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [12]:
# Initialize model, loss, and optimizer
model = CNN_LSTM().to(device)
criterion = Loss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 206MB/s]


In [13]:
save_folder = '/kaggle/working/'
trainer = ModelTrainer(model=model, optimizer=optimizer, criterion=criterion, epochs=10, dataloaders=dataloaders, device=device, save_folder=save_folder, scheduled=False)

In [14]:
train_losses, val_losses, test_losses, train_accuracies, val_accuracies, test_accuracies = trainer.train_model(verbose=1)


Training epoch 1
Training completed in 11mins 35secs
Validation completed in 02mins 11secs
Epoch [1/10], Train Loss: 0.8080, Train Acc: 0.5579, Val Loss: 1.0230, Val Acc: 0.6706, Val F1: 0.7325
Test Loss: 1.0325, Test Accuracy: 0.7000, Test F1 Score: 0.7766
Testing completed in 02mins 13secs

Training epoch 2
Training completed in 11mins 23secs
Validation completed in 02mins 04secs
Epoch [2/10], Train Loss: 0.7628, Train Acc: 0.5696, Val Loss: 0.9622, Val Acc: 0.5196, Val F1: 0.6057
Test Loss: 1.0027, Test Accuracy: 0.5667, Test F1 Score: 0.6671
Testing completed in 02mins 09secs

Training epoch 3
Training completed in 10mins 49secs
Validation completed in 02mins 01secs
Epoch [3/10], Train Loss: 0.7562, Train Acc: 0.6554, Val Loss: 0.7001, Val Acc: 0.8980, Val F1: 0.8498
Test Loss: 0.8320, Test Accuracy: 0.9314, Test F1 Score: 0.8983
Testing completed in 02mins 04secs

Training epoch 4
Training completed in 10mins 59secs
Validation completed in 02mins 05secs
Epoch [4/10], Train Loss: 

In [15]:
class Plotter:
    def __init__(self, save_dir="plots"):
        self.save_dir = save_dir
        os.makedirs(save_dir, exist_ok=True)

    def plot_loss(self, train_loss, val_loss, test_loss=None, filename="loss_plot.png"):
        plt.figure(figsize=(8, 6))
        plt.plot(train_loss, label="Train Loss", marker='o')
        plt.plot(val_loss, label="Validation Loss", marker='o')
        if test_loss is not None:
            plt.plot(test_loss, label="Test Loss", marker='o')
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.title("Training, Validation, and Test Loss")
        plt.legend()
        plt.grid()
        save_path = os.path.join(self.save_dir, filename)
        plt.savefig(save_path)
        plt.close()
        print(f"Loss plot saved to {save_path}")

    def plot_accuracy(self, train_acc, val_acc, test_acc=None, filename="accuracy_plot.png"):
        plt.figure(figsize=(8, 6))
        plt.plot(train_acc, label="Train Accuracy", marker='o')
        plt.plot(val_acc, label="Validation Accuracy", marker='o')
        if test_acc is not None:
            plt.plot(test_acc, label="Test Accuracy", marker='o')
        plt.xlabel("Epochs")
        plt.ylabel("Accuracy")
        plt.title("Training, Validation, and Test Accuracy")
        plt.legend()
        plt.grid()
        save_path = os.path.join(self.save_dir, filename)
        plt.savefig(save_path)
        plt.close()
        print(f"Accuracy plot saved to {save_path}")

In [16]:
import matplotlib.pyplot as plt
plot = Plotter(save_dir='/kaggle/working/')
plot.plot_loss(train_loss=train_losses, val_loss=val_losses, test_loss=test_losses)
plot.plot_accuracy(train_acc=train_accuracies, val_acc=val_accuracies, test_acc=test_accuracies)

Loss plot saved to /kaggle/working/loss_plot.png
Accuracy plot saved to /kaggle/working/accuracy_plot.png
