# Model Training with Heart Rate extracted from rPPG along with other spatial features

In [1]:
print("Hello")

Hello


In [3]:
# imports

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
import os
from glob import glob
import albumentations as A
from albumentations.pytorch import ToTensorV2
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import train_test_split
import json
import pandas as pd

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Running on", device)

Running on cuda


In [5]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 2050'

In [6]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

2.7.1+cu118
11.8
True


In [7]:
# Real and Fake Video list json path 
 
fake_path = "D:/Desktop/FinalYearProject/DataSets/celeb_df_face_cropped/valid_fake_videos.json"
real_path = "D:/Desktop/FinalYearProject/DataSets/celeb_df_face_cropped/valid_real_videos.json"

In [8]:
# Load lists from JSON
valid_real_videos = []
valid_fake_videos = []

with open(real_path, 'r') as f:
    valid_real_videos = json.load(f)

with open(fake_path, 'r') as f:
    valid_fake_videos = json.load(f)

# Paths to real and fake video folders on Google Drive
celeb_df_real_path = 'D:/Desktop/FinalYearProject/DataSets/celeb_df_face_cropped/real_face_only224/real_face_only224'
celeb_df_fake_path = 'D:/Desktop/FinalYearProject/DataSets/celeb_df_face_cropped/fake_face_only224/fake_face_only224'

# Reconstruct full paths
valid_real_videos_path = [os.path.normpath(os.path.join(celeb_df_real_path, name)) for name in valid_real_videos]
valid_fake_videos_path = [os.path.normpath(os.path.join(celeb_df_fake_path, name)) for name in valid_fake_videos]

print(f"Total real videos for training: {len(valid_real_videos_path)}")
print(f"Total fake videos for training: {len(valid_fake_videos_path)}")

Total real videos for training: 585
Total fake videos for training: 5634


In [9]:
print(valid_real_videos_path[0])
print(valid_fake_videos_path[0])

D:\Desktop\FinalYearProject\DataSets\celeb_df_face_cropped\real_face_only224\real_face_only224\id0_0000.mp4
D:\Desktop\FinalYearProject\DataSets\celeb_df_face_cropped\fake_face_only224\fake_face_only224\id0_id16_0000.mp4


In [10]:
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

In [11]:
# hr values path

real_hr_csv = "D:/Desktop/FinalYearProject/DataSets/celeb_df_hr/real_heart_rate.csv"
fake_hr_csv = "D:/Desktop/FinalYearProject/DataSets/celeb_df_hr/fake_heart_rate.csv"

In [12]:
# hr mapping function from csv

def load_hr_from_csv(real_hr_csv, fake_hr_csv):
    hr_map = {}
    for path in ([real_hr_csv, fake_hr_csv]):
        df = pd.read_csv(path)
        df.columns = [c.strip().lower() for c in df.columns]
        for _, row in df.iterrows():
            video = str(row["video"]).strip()
            hr_map[video] = {
                'hr_time' : float(row.get("hr_time", 0)),
                'hr_freq' : float(row.get("hr_freq", 0))
            }
    
    return hr_map

In [13]:
hr_map = load_hr_from_csv(real_hr_csv, fake_hr_csv)
len(hr_map)
# hr_map

6219

In [14]:
# Dataset With HR features


class DeepfakeDataset(Dataset):

    def __init__(self, video_paths, labels, hr_map, transform=None, num_frames=16):
        self.video_paths = video_paths
        self.labels = labels
        self.hr_map = hr_map
        self.transform = transform
        self.num_frames = num_frames

    def read_frame(self, video_path):
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        # if unreadable, return zeros
        if total_frames is None or total_frames <= 0:
            cap.release()
            return torch.zeros((self.num_frames, 3, 224, 224), dtype=torch.float32)

        frame_indices = np.linspace(0, total_frames - 1, self.num_frames).astype(int)
        frames = []

        for index in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, index)
            ret, frame = cap.read()
            if not ret:
                continue
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            if self.transform:
                frame = self.transform(image=frame)["image"]
            frames.append(frame)

        cap.release()

        if len(frames) < self.num_frames:
            pad = self.num_frames - len(frames)
            filler = torch.zeros(3, 224, 224, dtype=torch.float32)
            frames.extend([filler] * pad)

        return torch.stack(frames)

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, index):
        try:
            video_path = self.video_paths[index]
            video_tensor = self.read_frame(video_path)

            video_name = os.path.basename(video_path)
            hr = self.hr_map.get(video_name, {"hr_time": 0.0, "hr_freq": 0.0})

            hr_time  = float(hr.get("hr_time", 0.0))
            hr_freq  = float(hr.get("hr_freq", 0.0))

            # replace NaN/inf with 60
            if not np.isfinite(hr_time): hr_time = 60.0
            if not np.isfinite(hr_freq): hr_freq = 60.0

            # simple scaling to ~[0,1]; tweak if you know your ranges
            hr_time /= 200.0
            hr_freq /= 200.0

            hr_tensor = torch.tensor([hr_time, hr_freq], dtype=torch.float32)

            label = torch.tensor(self.labels[index], dtype=torch.float32)

            return video_tensor, hr_tensor, label
        except Exception as e:
            print(f"Failed loading video: {self.video_paths[index]}, Error: {e}")
            return self.__getitem__((index + 1) % len(self))

In [15]:
# Transforms

# transform = A.Compose([
#     A.Resize(224, 224),
#     A.Normalize(),
#     ToTensorV2(),
# ])

transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

In [24]:
# Dataset Loading

# Combine video paths and labels
video_paths = valid_real_videos_path + valid_fake_videos_path
labels = [0] * len(valid_real_videos_path) + [1] * len(valid_fake_videos_path)

# train_test_split
train_path, test_path, train_labels, test_labels = train_test_split(
    video_paths, labels, test_size=0.2, stratify=labels, random_state=10
)

# Create dataset and dataloaders
train_dataset = DeepfakeDataset(
    train_path, train_labels, hr_map=hr_map, transform=transform, num_frames=16
)
test_dataset = DeepfakeDataset(
    test_path, test_labels, hr_map=hr_map, transform=transform, num_frames=16
)

train_dataloader = DataLoader(
    train_dataset, batch_size=8, shuffle=True, num_workers=0, pin_memory=True
)
test_dataloader = DataLoader(
    test_dataset, batch_size=8, shuffle=False, num_workers=0, pin_memory=True
)

In [25]:
train_dataloader.__len__()

622

In [17]:
# train_dataset.__getitem__(10)

In [26]:
# Deepfake Model with HR 

class TemporalAttention(nn.Module):
    def __init__(self, feature_dim):
        super().__init__()
        self.attention = nn.Linear(feature_dim, 1)

    def forward(self, x):
        # x: (batch, time, features)
        weights = F.softmax(self.attention(x), dim=1)
        return torch.sum(weights * x, dim=1)
    

class DeepfakeDetectorHR(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature_extractor = EfficientNet.from_pretrained('efficientnet-b0')
        self.feature_extractor._fc = nn.Identity()

        self.lstm = nn.LSTM(input_size=1280, hidden_size=256, num_layers=1, batch_first=True, bidirectional=True)
        self.attention = TemporalAttention(512) 

        # HR branch
        self.hr_fc = nn.Sequential(
            nn.Linear(2,32),
            nn.ReLU(),
            nn.Dropout(0.2)
        )   

        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(512 + 32,  128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128,1)
        )

    def forward(self, x, hr):
        B, T, C, H, W = x.shape
        x = x.view(B*T, C, H, W)

        with torch.no_grad():
            feats = self.feature_extractor(x)
        feats = feats.view(B, T, -1)

        lstm_out, _ = self.lstm(feats)
        attn_out = self.attention(lstm_out) # (B, 512)

        hr_feat = self.hr_fc(hr)    # (B, 32)
        fused = torch.cat([attn_out, hr_feat], dim=1)

        return self.classifier(fused).squeeze(1)
    


In [27]:
# Train and Validate

def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    first_batch = True
    
    loop = tqdm(dataloader, desc="Training ", leave=True)
    for inputs, hr, labels in loop:
        inputs, hr, labels = inputs.to(device), hr.to(device), labels.to(device)

        optimizer.zero_grad(set_to_none=True)
        outputs = model(inputs, hr)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        preds = (torch.sigmoid(outputs) > 0.5).float()
        correct += (preds == labels).sum().item()
        running_loss += loss.item() * inputs.size(0)

        if first_batch:
            with torch.no_grad():
                print(f"[sanity] batch loss={loss.item():.4f}, "
                      f"inputs={inputs.dtype}/{inputs.min().item():.3f}..{inputs.max().item():.3f}, "
                      f"hr={hr[0].tolist()}")
            first_batch = False

    return running_loss / len(dataloader.dataset),  correct / len(dataloader.dataset)

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0

    with torch.no_grad():
        loop = tqdm(dataloader, desc="Validation", leave=True)
        for inputs, hr, labels in loop:
            inputs, hr, labels = inputs.to(device), hr.to(device), labels.to(device)

            outputs = model(inputs, hr)
            loss = criterion(outputs, labels)

            preds = (torch.sigmoid(outputs) > 0.5).float()
            correct += (preds == labels).sum().item()
            running_loss += loss.item() * inputs.size(0)

        return running_loss / len(dataloader.dataset), correct / len(dataloader.dataset)

In [28]:
# START TRAINING
model = DeepfakeDetectorHR().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()

Loaded pretrained weights for efficientnet-b0


In [20]:
# model

In [29]:
# Prepare save path
model_dir = 'D:/Desktop/FinalYearProject/models/rPPG_model'
os.makedirs(model_dir, exist_ok=True)
save_path = os.path.join(model_dir, 'best_model_celeb_df.pth')


In [30]:
# Training loop
EPOCHS = 5
best_val_acc = 0.0

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")

    train_loss, train_acc = train_one_epoch(model, train_dataloader, optimizer, criterion, device)
    val_loss, val_acc     = validate(model, test_dataloader, criterion, device)

    print(f"Train Loss {train_loss:.4f} | Train Acc {train_acc:.4f} | "
          f"Val Loss {val_loss:.4f} | Val Acc {val_acc:.4f}")

    # Save best model checkpoint
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_accuracy': best_val_acc
        }, save_path)
        print(f"💾 Best model saved to {save_path} with Val Acc: {best_val_acc:.4f}")



Epoch 1/5


Training :   0%|          | 1/622 [00:01<14:38,  1.42s/it]

[sanity] batch loss=0.6881, inputs=torch.float32/-2.118..2.249, hr=[0.6207000017166138, 0.47679999470710754]


Training : 100%|██████████| 622/622 [09:13<00:00,  1.12it/s]
Validation: 100%|██████████| 156/156 [02:11<00:00,  1.19it/s]


Train Loss 0.2975 | Train Acc 0.9079 | Val Loss 0.2295 | Val Acc 0.9228
💾 Best model saved to D:/Desktop/FinalYearProject/models/rPPG_model\best_model_celeb_df.pth with Val Acc: 0.9228

Epoch 2/5


Training :   0%|          | 1/622 [00:00<09:48,  1.06it/s]

[sanity] batch loss=0.0851, inputs=torch.float32/-2.118..2.518, hr=[0.6000000238418579, 0.35760000348091125]


Training : 100%|██████████| 622/622 [08:57<00:00,  1.16it/s]
Validation: 100%|██████████| 156/156 [02:09<00:00,  1.21it/s]


Train Loss 0.2348 | Train Acc 0.9242 | Val Loss 0.2210 | Val Acc 0.9349
💾 Best model saved to D:/Desktop/FinalYearProject/models/rPPG_model\best_model_celeb_df.pth with Val Acc: 0.9349

Epoch 3/5


Training :   0%|          | 1/622 [00:00<08:11,  1.26it/s]

[sanity] batch loss=0.0615, inputs=torch.float32/-2.118..2.431, hr=[0.692300021648407, 0.5664499998092651]


Training : 100%|██████████| 622/622 [08:54<00:00,  1.16it/s]
Validation: 100%|██████████| 156/156 [02:10<00:00,  1.19it/s]


Train Loss 0.2120 | Train Acc 0.9290 | Val Loss 0.2117 | Val Acc 0.9317

Epoch 4/5


Training :   0%|          | 1/622 [00:00<09:04,  1.14it/s]

[sanity] batch loss=0.1213, inputs=torch.float32/-2.118..2.198, hr=[0.6428499817848206, 0.2980000078678131]


Training : 100%|██████████| 622/622 [08:06<00:00,  1.28it/s]
Validation: 100%|██████████| 156/156 [01:47<00:00,  1.44it/s]


Train Loss 0.1973 | Train Acc 0.9325 | Val Loss 0.1806 | Val Acc 0.9349

Epoch 5/5


Training :   0%|          | 1/622 [00:00<09:09,  1.13it/s]

[sanity] batch loss=0.0333, inputs=torch.float32/-2.118..2.379, hr=[0.75, 0.2980000078678131]


Training : 100%|██████████| 622/622 [07:40<00:00,  1.35it/s]
Validation: 100%|██████████| 156/156 [01:46<00:00,  1.47it/s]

Train Loss 0.1920 | Train Acc 0.9357 | Val Loss 0.2092 | Val Acc 0.9357
💾 Best model saved to D:/Desktop/FinalYearProject/models/rPPG_model\best_model_celeb_df.pth with Val Acc: 0.9357



