In [1]:
# @title 1. Setup & Imports
# Install dependencies once (outside the notebook):
#   pip install -r requirements.txt

import os
import cv2
import numpy as np
import librosa
import torch
import torch.nn as nn
import torch.nn.functional as F
import mediapipe as mp
import matplotlib.pyplot as plt
import seaborn as sns
from torch_geometric.nn import GCNConv
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix, classification_report
from tqdm import tqdm

# Hardware Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Hardware Detected: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

# CONFIGURATION
# -----------------------------
TRAIN_ROOT = "DOLOS_Train"
TEST_ROOT = "RLT_Test"
FRAME_LIMIT = 60        # Number of frames per clip (Fixed size)
BATCH_SIZE = 16         # Batch size (tune to your GPU/CPU)
LEARNING_RATE = 0.001   # Adam Optimizer
EPOCHS = 30             # Training Epochs
HIDDEN_DIM = 64         # Model Hidden Dimension
NUM_CONFOUNDERS = 32    # Size of Causal Dictionary
# -----------------------------

  from .autonotebook import tqdm as notebook_tqdm


Hardware Detected: CPU


In [2]:
# @title 2. Graph Definition (68-Point Skeleton)
# Mapping MediaPipe's 468 points to the standard 68-point Dlib format
MP_TO_DLIB_68 = [
    162, 234, 93, 58, 172, 136, 149, 148, 152, 377, 378, 365, 397, 288, 323, 454, 389, # Jaw (0-16)
    71, 63, 105, 66, 107, 336, 296, 334, 293, 300, # Eyebrows (17-26)
    168, 6, 195, 4, 64, 60, 94, 285, 292, 419, 197, 19, 1, 2, 98, 327, 276, 283, 282, 295, 294, # Nose (27-35) + Eyes (36-47)
    33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 144, 145, 153, 154, 155, 133, # Eyes detail
    78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78 # Mouth (48-67)
]
SELECTED_LANDMARKS = MP_TO_DLIB_68[:68]

def get_anatomical_edges():
    """Creates edges between physically connected facial points."""
    edges = []
    # Anatomical chains (Jaw, Brows, Nose, Eyes, Mouth)
    chains = [
        range(0, 17), range(17, 22), range(22, 27), range(27, 31),
        range(31, 36), range(36, 42), range(42, 48), range(48, 60), range(60, 68)
    ]
    for chain in chains:
        for i in range(len(chain) - 1):
            edges.append([chain[i], chain[i+1]])
            edges.append([chain[i+1], chain[i]]) # Undirected Graph
    return torch.tensor(edges, dtype=torch.long).t().contiguous().to(device)

STATIC_EDGE_INDEX = get_anatomical_edges()
print(f"Graph Topology Built: 68 Nodes with Anatomical Connections.")

Graph Topology Built: 68 Nodes with Anatomical Connections.


In [5]:
# @title 3. Multimodal Preprocessing (Face + Audio)
# This notebook expects MediaPipe FaceMesh via `mp.solutions`.
# If you're on an unsupported Python version (e.g., 3.14), the `mediapipe` wheel may not include `solutions`.
# In that case we fall back to audio-only features (face landmarks are zeroed) so the rest of the pipeline can run.

ENABLE_FACE_LANDMARKS = True
try:
    mp_face_mesh = mp.solutions.face_mesh
except AttributeError:
    ENABLE_FACE_LANDMARKS = False
    mp_face_mesh = None
    print(
        "WARNING: `mediapipe` does not expose `mp.solutions` in this environment. "
        "Falling back to audio-only features (face landmarks = 0). "
        "For full face landmarks, use Python 3.10-3.12 and reinstall mediapipe."
    )


class DeceptionDataset(Dataset):
    def __init__(self, root_dir: str, frame_limit: int = 60):
        self.samples: list[str] = []
        self.labels: list[int] = []
        self.frame_limit = frame_limit

        classes = {"truth": 0, "lie": 1}

        if not os.path.exists(root_dir):
            raise FileNotFoundError(f"Directory not found: {root_dir}")

        print(f"Scanning {root_dir}...")
        for class_name, label in classes.items():
            class_path = os.path.join(root_dir, class_name)
            if not os.path.exists(class_path):
                continue

            for f in tqdm(os.listdir(class_path), desc=f"Loading {class_name}"):
                if f.lower().endswith((".mp4", ".avi", ".mov")):
                    self.samples.append(os.path.join(class_path, f))
                    self.labels.append(label)

    def _extract_mfcc(self, video_path: str) -> np.ndarray:
        try:
            y, sr = librosa.load(video_path, sr=16000, duration=3.0)
            return librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T.astype(np.float32)  # (Time, 13)
        except Exception:
            return np.zeros((100, 13), dtype=np.float32)

    def process_video(self, video_path: str) -> torch.Tensor:
        mfcc = self._extract_mfcc(video_path)

        # If face landmarks aren't available, build an audio-only tensor (face xyz = 0)
        if not ENABLE_FACE_LANDMARKS:
            node_feats = []
            for frame_idx in range(self.frame_limit):
                audio_idx = min(int(frame_idx * (len(mfcc) / self.frame_limit)), len(mfcc) - 1)
                audio_vec = np.tile(mfcc[audio_idx], (68, 1))
                face_geo = np.zeros((68, 3), dtype=np.float32)
                node_feats.append(np.concatenate((face_geo, audio_vec), axis=1))
            return torch.from_numpy(np.array(node_feats, dtype=np.float32))

        # 2. Video Extraction (Landmarks)
        cap = cv2.VideoCapture(video_path)
        node_feats = []

        with mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1) as face_mesh:
            while cap.isOpened() and len(node_feats) < self.frame_limit:
                ret, frame = cap.read()
                if not ret:
                    break

                results = face_mesh.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

                if results.multi_face_landmarks:
                    lm = results.multi_face_landmarks[0].landmark
                    face_geo = np.array([[lm[i].x, lm[i].y, lm[i].z] for i in SELECTED_LANDMARKS], dtype=np.float32)

                    audio_idx = min(int(len(node_feats) * (len(mfcc) / self.frame_limit)), len(mfcc) - 1)
                    audio_vec = np.tile(mfcc[audio_idx], (68, 1))

                    fused = np.concatenate((face_geo, audio_vec), axis=1)
                    node_feats.append(fused)
                else:
                    node_feats.append(np.zeros((68, 16), dtype=np.float32))

        cap.release()

        # 3. Padding/Truncating
        if len(node_feats) == 0:
            return torch.zeros(self.frame_limit, 68, 16, dtype=torch.float32)

        tensor = np.array(node_feats, dtype=np.float32)
        if len(tensor) < self.frame_limit:
            pad = np.zeros((self.frame_limit - len(tensor), 68, 16), dtype=np.float32)
            tensor = np.concatenate((tensor, pad), axis=0)

        return torch.from_numpy(tensor)

    def __len__(self) -> int:
        return len(self.samples)

    def __getitem__(self, idx: int):
        x = self.process_video(self.samples[idx])
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y


# Initialize Datasets
print("--- Initializing Training Set (DOLOS) ---")
train_dataset = DeceptionDataset(TRAIN_ROOT, FRAME_LIMIT)

print("\n--- Initializing Test Set (Real-Life Trial) ---")
test_dataset = DeceptionDataset(TEST_ROOT, FRAME_LIMIT)

if len(train_dataset) == 0:
    raise RuntimeError(f"No training samples found under {TRAIN_ROOT}. Check folder structure.")
if len(test_dataset) == 0:
    raise RuntimeError(f"No test samples found under {TEST_ROOT}. Check folder structure.")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"\nData Loaded: {len(train_dataset)} Train Samples, {len(test_dataset)} Test Samples")
print(f"Face landmarks enabled: {ENABLE_FACE_LANDMARKS}")

AttributeError: module 'mediapipe' has no attribute 'solutions'

In [3]:
# @title 4. C-GNN-D Architecture
class CausalInterventionLayer(nn.Module):
    def __init__(self, in_dim, num_confounders=32):
        super().__init__()
        # Learnable Confounder Dictionary (e.g., "Generic Nervousness")
        self.confounder_mem = nn.Parameter(torch.randn(num_confounders, in_dim))
        self.attention = nn.Linear(in_dim, num_confounders)

    def forward(self, x):
        # 1. Calc P(Confounder | Input)
        attn = F.softmax(self.attention(x), dim=-1)
        # 2. Calc Expected Confounder E[C]
        expected_confounder = torch.matmul(attn, self.confounder_mem)
        # 3. Intervention (Do-Calculus): Remove Confounder
        return x - expected_confounder


def make_batched_edge_index(edge_index: torch.Tensor, batch_size: int, num_nodes: int) -> torch.Tensor:
    """Repeat a single-graph edge_index into a batched edge_index for B disjoint graphs."""
    if batch_size == 1:
        return edge_index

    edge_index = edge_index.to(torch.long)
    e = edge_index.size(1)
    offsets = (torch.arange(batch_size, device=edge_index.device, dtype=edge_index.dtype) * num_nodes).repeat_interleave(e)
    return edge_index.repeat(1, batch_size) + offsets.unsqueeze(0)


class CGNND(nn.Module):
    def __init__(self, node_feats=16, hidden_dim=64):
        super().__init__()
        # Spatial: Graph Conv
        self.gcn = GCNConv(node_feats, hidden_dim)
        # Temporal: LSTM
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        # Causal: Intervention
        self.causal = CausalInterventionLayer(hidden_dim, NUM_CONFOUNDERS)
        # Classifier
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 2),
        )

    def forward(self, x, edge_index):
        # x: (Batch, Time, Nodes, Feats)
        b, t, n, f_dim = x.shape

        # Build a correct disjoint-graph edge_index for this batch.
        batched_edge_index = make_batched_edge_index(edge_index, b, n)

        spatial_feats = []
        for frame_idx in range(t):
            frame_x = x[:, frame_idx, :, :].reshape(b * n, f_dim)

            out = F.relu(self.gcn(frame_x, batched_edge_index))
            out = out.view(b, n, -1).mean(dim=1)
            spatial_feats.append(out)

        spatial_seq = torch.stack(spatial_feats, dim=1)  # (B, T, H)

        # 2. Temporal Pass
        _, (hn, _) = self.lstm(spatial_seq)
        embedding = hn[-1]  # (B, H)

        # 3. Causal Intervention
        causal_embedding = self.causal(embedding)

        # 4. Classify
        logits = self.fc(causal_embedding)

        return logits, embedding, causal_embedding


model = CGNND(hidden_dim=HIDDEN_DIM).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()
print("✅ Model Initialized.")

✅ Model Initialized.


In [4]:
# Quick sanity check (no dataset required)
with torch.no_grad():
    dummy_x = torch.randn(4, FRAME_LIMIT, 68, 16, device=device)
    dummy_logits, dummy_emb, dummy_causal = model(dummy_x, STATIC_EDGE_INDEX)

print("Shapes:", {
    "logits": tuple(dummy_logits.shape),
    "embedding": tuple(dummy_emb.shape),
    "causal_embedding": tuple(dummy_causal.shape),
})

Shapes: {'logits': (4, 2), 'embedding': (4, 64), 'causal_embedding': (4, 64)}


In [None]:
# @title 5. Training Loop
train_acc_history = []
train_loss_history = []

print(f"Starting Training for {EPOCHS} Epochs...")

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        
        # Forward
        logits, _, _ = model(batch_x, STATIC_EDGE_INDEX)
        
        loss = criterion(logits, batch_y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        correct += (preds == batch_y).sum().item()
        total += batch_y.size(0)
        
    avg_loss = total_loss / len(train_loader)
    acc = correct / total
    train_loss_history.append(avg_loss)
    train_acc_history.append(acc)
    
    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {avg_loss:.4f} | Acc: {acc*100:.2f}%")

# Plot Learning Curve
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(train_loss_history, label='Loss', color='red')
plt.title("Training Loss")
plt.subplot(1, 2, 2)
plt.plot(train_acc_history, label='Accuracy', color='blue')
plt.title("Training Accuracy")
plt.show()

In [None]:
# @title 6. Experimental Results

# --- A. Cross-Corpus Evaluation ---
model.eval()
all_preds = []
all_labels = []

print("Running Cross-Corpus Evaluation (Test Set)...")
with torch.no_grad():
    for batch_x, batch_y in tqdm(test_loader):
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        logits, _, _ = model(batch_x, STATIC_EDGE_INDEX)
        preds = torch.argmax(logits, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

# --- B. Confusion Matrix ---
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Truth', 'Lie'], yticklabels=['Truth', 'Lie'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Cross-Corpus Confusion Matrix (DOLOS -> RLT)')
plt.show()

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=['Truth', 'Lie']))

# --- C. Causal Explainability Plot ---
# Pick a "lie" sample if present; otherwise fall back to index 0.
try:
    sample_idx = next(i for i, y in enumerate(test_dataset.labels) if y == 1)
except StopIteration:
    sample_idx = 0

sample_x, sample_y = test_dataset[sample_idx]
sample_x = sample_x.unsqueeze(0).to(device)

model.eval()
_, orig_emb, causal_emb = model(sample_x, STATIC_EDGE_INDEX)
orig = orig_emb.detach().cpu().numpy().flatten()
caus = causal_emb.detach().cpu().numpy().flatten()

plt.figure(figsize=(12, 5))
plt.bar(np.arange(len(orig)) - 0.2, orig, width=0.4, label='Original (Confounded)', color='gray', alpha=0.7)
plt.bar(np.arange(len(caus)) + 0.2, caus, width=0.4, label='Causal (Intervened)', color='#d62728')
plt.legend()
plt.title("Feature Disentanglement: Effect of Causal Layer")
plt.xlabel("Feature Index")
plt.ylabel("Activation Magnitude")
plt.show()

print("All results generated successfully.")