# Baseline Model Training

Set as GPU before running.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip3 install face_recognition

Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566166 sha256=2ec4a55cdd7aa4dab681252da0c2b99dfef4b219b9156b9967034444579193b2
  Stored in directory: /root/.cache/pip/wheels/8f/47/c8/f44c5aebb7507f7c8a2c0bd23151d732d0f0bd6884ad4ac635
Successfully built face-recognition-models
Installing collected packages: face-recogn

In [None]:
!pip install torch torchvision timm pandas scikit-learn matplotlib seaborn opencv-python

In [None]:
# Important imports
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torch import nn
from sklearn.metrics import f1_score, roc_curve, auc, confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
import cv2
import sys

Defining the model and preparing the datasets

In [None]:
# Model Definition
class Model(nn.Module):
    def __init__(self, num_classes, latent_dim=2048, lstm_layers=1, hidden_dim=2048, bidirectional=False):
        super(Model, self).__init__()
        base_model = models.resnext50_32x4d(weights=None)
        self.cnn = nn.Sequential(*list(base_model.children())[:-2])
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.lstm = nn.LSTM(latent_dim, hidden_dim, lstm_layers, bidirectional=bidirectional)
        self.dp = nn.Dropout(0.4)
        self.linear = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        batch_size, seq_length, c, h, w = x.shape
        x = x.view(batch_size * seq_length, c, h, w)
        fmap = self.cnn(x)
        x = self.avgpool(fmap)
        x = x.view(batch_size, seq_length, 2048)
        x_lstm, _ = self.lstm(x)
        x = torch.mean(x_lstm, dim=1)
        return fmap, self.dp(self.linear(x))

# Dataset with padding and skip zero frames
class video_dataset(Dataset):
    def __init__(self, video_names, labels, sequence_length=20, transform=None):
        self.video_names = video_names
        self.labels = labels
        self.count = sequence_length
        self.transform = transform

    def __len__(self):
        return len(self.video_names)

    def __getitem__(self, idx):
        video_path = self.video_names[idx]
        frames = []
        temp_video = os.path.basename(video_path)
        label = self.labels.loc[self.labels["file"] == temp_video, "label"].values[0]

        for frame in self.frame_extract(video_path):
            frames.append(self.transform(frame))
            if len(frames) == self.count:
                break

        if len(frames) == 0:
            # Skip videos with 0 frames
            return self.__getitem__((idx + 1) % len(self.video_names))

        frames = torch.stack(frames)

        # Pad short videos by repeating last frame
        if frames.shape[0] < self.count:
            pad_count = self.count - frames.shape[0]
            last_frame = frames[-1].unsqueeze(0).repeat(pad_count, 1, 1, 1)
            frames = torch.cat([frames, last_frame], dim=0)

        return frames, label

    @staticmethod
    def frame_extract(path):
        vidObj = cv2.VideoCapture(path)
        success = True
        while success:
            success, image = vidObj.read()
            if success:
                yield image

In [None]:
# Helper functions
def train_epoch(epoch, num_epochs, data_loader, model, criterion, optimizer):
    model.train()
    losses = AverageMeter()
    accuracies = AverageMeter()
    for i, (inputs, targets) in enumerate(data_loader):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        inputs, targets = inputs.to(device), targets.long().to(device)
        _, outputs = model(inputs)
        loss = criterion(outputs, targets)
        acc = calculate_accuracy(outputs, targets)
        losses.update(loss.item(), inputs.size(0))
        accuracies.update(acc, inputs.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        sys.stdout.write(
            f"\r[Epoch {epoch}/{num_epochs}] [Batch {i}/{len(data_loader)}] [Train Loss: {losses.avg:.4f}, Train Acc: {accuracies.avg:.2f}%]"
        )
    print()
    return losses.avg, accuracies.avg

def test(epoch, model, data_loader, criterion):
    model.eval()
    losses = AverageMeter()
    accuracies = AverageMeter()
    pred, true, probs_list = [], [], []
    with torch.no_grad():
        for i, (inputs, targets) in enumerate(data_loader):
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            inputs, targets = inputs.to(device), targets.long().to(device)
            _, outputs = model(inputs)
            probs = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            loss = criterion(outputs, targets)
            acc = calculate_accuracy(outputs, targets)
            _, p = torch.max(outputs, 1)
            true += targets.cpu().numpy().tolist()
            pred += p.cpu().numpy().tolist()
            probs_list.extend(probs.tolist())
            losses.update(loss.item(), inputs.size(0))
            accuracies.update(acc, inputs.size(0))
    return true, pred, probs_list, losses.avg, accuracies.avg


def calculate_accuracy(outputs, targets):
    _, pred = outputs.max(1)
    correct = pred.eq(targets).sum().item()
    return 100 * correct / targets.size(0)


class AverageMeter():
    def __init__(self):
        self.reset()
    def reset(self):
        self.val = self.avg = self.sum = self.count = 0
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def save_checkpoint(state, filename):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    torch.save(state, filename)
    print(f"\nCheckpoint saved: {filename}")


def read_list(txt_path):
    with open(txt_path, 'r') as f:
        return [line.strip() for line in f.readlines()]


def assign_label(path):
    path_low = path.lower()
    if "fake" in path_low or "deepfake" in path_low or "manipulated" in path_low:
        return 1
    return 0

Begin training here. (Can perform early stopping if you notice the validation accuracy decreasing)

In [None]:
# Parameters
SEQ_LEN = 20
BATCH_SIZE = 4
NUM_EPOCHS = 50 # update to the max epoch you want
LR = 1e-5
IM_SIZE = 112

# Dataset splits
train_files = read_list("/content/drive/MyDrive/deepfake_detection_project/Dataset_split/baseline_splits/train.txt")
val_files = read_list("/content/drive/MyDrive/deepfake_detection_project/Dataset_split/baseline_splits/val.txt")


train_labels = pd.DataFrame({"file":[os.path.basename(p) for p in train_files],
                             "label":[assign_label(p) for p in train_files]})
val_labels = pd.DataFrame({"file":[os.path.basename(p) for p in val_files],
                           "label":[assign_label(p) for p in val_files]})

# Transforms
train_transform = transforms.Compose([transforms.ToPILImage(),
                                      transforms.Resize((IM_SIZE, IM_SIZE)),
                                      transforms.ToTensor()])
val_transform = train_transform

# Dataset and loaders
train_dataset = video_dataset(train_files, train_labels, SEQ_LEN, transform=train_transform)
val_dataset   = video_dataset(val_files, val_labels, SEQ_LEN, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# Model, optimizer, criterion
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model(2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss().to(device)

# Uncomment the following code if we want to load from a checkpoint

# checkpoint_path = "/content/drive/MyDrive/deepfake_detection_project/Baseline_model/models/model_epoch_39.pt"
# checkpoint = torch.load(checkpoint_path, map_location=device)
# model.load_state_dict(checkpoint["model_state"])
# optimizer.load_state_dict(checkpoint["optimizer_state"])
# start_epoch = checkpoint["epoch"] + 1  # continue from next epoch

# print(f"Resuming training from epoch {start_epoch}")


# Training loop
train_loss_avg, train_acc_list = [], []
val_loss_avg, val_acc_list = [], []

# Change this to the path you want to save your validation accuracy
val_acc_file = "/content/drive/MyDrive/deepfake_detection_project/Baseline_model/valid_accs/val_accuracy.txt" 

for epoch in range(0, NUM_EPOCHS + 1): # If we start from an epoch change 0 to the start_epoch
    train_loss, train_acc = train_epoch(epoch, NUM_EPOCHS, train_loader, model, criterion, optimizer)
    train_loss_avg.append(train_loss)
    train_acc_list.append(train_acc)

    y_true, y_pred, y_probs, val_loss, val_acc = test(epoch, model, val_loader, criterion)
    val_loss_avg.append(val_loss)
    val_acc_list.append(val_acc)

    # Print and save validation accuracy
    print(f"Epoch {epoch} Validation Accuracy: {val_acc:.2f}%")
    with open(val_acc_file, "a") as f:
        f.write(f"Epoch {epoch}: {val_acc:.2f}%\n")

    # Save model checkpoint
    save_checkpoint({
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc
    }, filename=f"/content/drive/MyDrive/deepfake_detection_project/Baseline_model/new_models/model_epoch_{epoch}.pt") 
    # Change this to the path you want to save the model checkpoint at each epoch


[Epoch 0/50] [Batch 244/245] [Train Loss: 0.7116, Train Acc: 52.24%]
Epoch 0 Validation Accuracy: 53.93%

Checkpoint saved: /content/drive/MyDrive/deepfake_detection_project/Baseline_model/new_models/model_epoch_0.pt
[Epoch 1/50] [Batch 244/245] [Train Loss: 0.6829, Train Acc: 57.45%]
Epoch 1 Validation Accuracy: 61.26%

Checkpoint saved: /content/drive/MyDrive/deepfake_detection_project/Baseline_model/new_models/model_epoch_1.pt
[Epoch 2/50] [Batch 244/245] [Train Loss: 0.7042, Train Acc: 50.61%]
Epoch 2 Validation Accuracy: 63.35%

Checkpoint saved: /content/drive/MyDrive/deepfake_detection_project/Baseline_model/new_models/model_epoch_2.pt
[Epoch 3/50] [Batch 244/245] [Train Loss: 0.6870, Train Acc: 54.39%]
Epoch 3 Validation Accuracy: 66.49%

Checkpoint saved: /content/drive/MyDrive/deepfake_detection_project/Baseline_model/new_models/model_epoch_3.pt
[Epoch 4/50] [Batch 244/245] [Train Loss: 0.6861, Train Acc: 55.71%]
Epoch 4 Validation Accuracy: 65.97%

Checkpoint saved: /content

KeyboardInterrupt: 