In [37]:
from SEMPIDataLoader import InterPersenSEMPIDataset, DataSetLoader, DataLoader
from SEMPIDataLoader import create_dataloaders
from SEMPIDataLoader import DATA_PATH

import os
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm

# OpenFace only

In [11]:
# load the dataset and dataloaders with pickle
with open(os.path.join(DATA_PATH, 'dataset.pkl'), 'rb') as f:
    dataset: InterPersenSEMPIDataset = pickle.load(f)

train_loader, val_loader = create_dataloaders(dataset, batch_size=32)
print(len(train_loader), len(val_loader))
print(len(dataset))
print("Data loaded successfully!")
print(f"Train size: {len(train_loader.dataset)}")
print(f"Val size: {len(val_loader.dataset)}")

for i, data in enumerate(train_loader):
    print(f"Batch {i}")
    if i == 2:
        break
    print(data['features'].shape)
    print(data['pids'])
    print(data['score'])


382 96
15256
Data loaded successfully!
Train size: 12204
Val size: 3052
Batch 0
torch.Size([32, 2, 329, 64])
tensor([[7, 2],
        [4, 5],
        [4, 1],
        [8, 4],
        [8, 6],
        [3, 4],
        [4, 2],
        [4, 5],
        [2, 4],
        [4, 6],
        [2, 1],
        [6, 4],
        [2, 5],
        [1, 2],
        [5, 7],
        [1, 5],
        [3, 1],
        [6, 2],
        [5, 7],
        [1, 4],
        [4, 3],
        [1, 4],
        [4, 5],
        [4, 1],
        [1, 8],
        [1, 5],
        [8, 7],
        [5, 1],
        [3, 6],
        [8, 4],
        [1, 5],
        [5, 4]], dtype=torch.int32)
tensor([ 1.6667e-02, -1.6667e-02,  1.5000e-01,  5.0000e-02, -1.3333e-01,
         8.3333e-02, -1.3878e-17,  1.8333e-01, -1.8333e-01, -3.6667e-01,
         1.8333e-01,  1.8333e-01, -1.0000e-01,  1.6667e-01, -2.7756e-17,
         1.6667e-02, -5.0000e-02,  5.0000e-02, -6.6667e-02, -1.6667e-02,
         5.0000e-02, -6.6667e-02, -6.6667e-02,  1.6667e-01, -2.7756

In [35]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CrossAttention(nn.Module):
    def __init__(self, feature_dim, num_heads):
        super(CrossAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=feature_dim, num_heads=num_heads, batch_first=True)
    
    def forward(self, main_input, attending_input):
        """
        main_input: (batch_size, num_features, num_frames)
        attending_input: (batch_size, num_features, num_frames)
        """
        main_input = main_input.permute(0, 2, 1)  # (batch_size, num_frames, num_features)
        attending_input = attending_input.permute(0, 2, 1)  # (batch_size, num_frames, num_features)
        attended_output, attention_weights = self.attention(main_input, attending_input, attending_input)
        return attended_output.permute(0, 2, 1)  # (batch_size, num_features, num_frames)


class EngagementPredictor(nn.Module):
    def __init__(self, num_features, num_frames, hidden_dim=128, num_heads=4):
        super(EngagementPredictor, self).__init__()
        self.cross_attention = CrossAttention(feature_dim=num_features, num_heads=num_heads)
        self.mlp = nn.Sequential(
            nn.Linear(num_features * num_frames, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Tanh()
        )
    
    def forward(self, x):
        main_input, attending_input = x[:, 0, :, :], x[:, 1, :, :]
        attended_output = self.cross_attention(main_input, attending_input)
        flattened = attended_output.contiguous().reshape(attended_output.size(0), -1) # Flatten (batch_size, num_features * num_frames)
        return self.mlp(flattened).squeeze(-1)  # Output a single value per sample


def train_model(model, train_loader, val_loader, num_epochs=10, lr=1e-4, device='cuda'):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    for epoch in tqdm(range(num_epochs)):
        model.train()
        total_loss = 0
        for batch in train_loader:
            feat = batch['features'].to(device)
            optimizer.zero_grad()
            predictions = model(feat)
            loss = criterion(predictions, batch['score'].to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        print(f"Epoch {epoch+1}: Training Loss = {total_loss / len(train_loader)}")
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                feat = batch['features'].to(device)
                predictions = model(feat)
                loss = criterion(predictions, batch['score'].to(device))
                val_loss += loss.item()
        
        print(f"Validation Loss = {val_loss / len(val_loader)}")
    
    return model

In [38]:
num_features = 329
num_frames = 64

model = train_model(EngagementPredictor(num_features, num_frames, num_heads=7), # it must be divisor of 329
            train_loader, val_loader, num_epochs=10, lr=1e-4, device='cpu')
print("Training completed successfully")

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1: Training Loss = 1.0101529877847402


 10%|█         | 1/10 [00:10<01:34, 10.50s/it]

Validation Loss = 1.0103347015877564
Epoch 2: Training Loss = 1.0099812193690794


 20%|██        | 2/10 [00:20<01:20, 10.11s/it]

Validation Loss = 1.0103347015877564
Epoch 3: Training Loss = 1.0098998749443373


 30%|███       | 3/10 [00:30<01:09,  9.99s/it]

Validation Loss = 1.0103347015877564
Epoch 4: Training Loss = 1.010009327328018


 40%|████      | 4/10 [00:40<00:59,  9.98s/it]

Validation Loss = 1.0103347015877564
Epoch 5: Training Loss = 1.0101095759431729


 50%|█████     | 5/10 [00:49<00:49,  9.91s/it]

Validation Loss = 1.0103347015877564
Epoch 6: Training Loss = 1.009867258408931


 60%|██████    | 6/10 [00:59<00:39,  9.94s/it]

Validation Loss = 1.0103347015877564
Epoch 7: Training Loss = 1.010077150860382


 70%|███████   | 7/10 [01:09<00:29,  9.88s/it]

Validation Loss = 1.0103347015877564
Epoch 8: Training Loss = 1.0099845151002493


 80%|████████  | 8/10 [01:19<00:19,  9.83s/it]

Validation Loss = 1.0103347015877564
Epoch 9: Training Loss = 1.0099701670764005


 90%|█████████ | 9/10 [01:29<00:09,  9.89s/it]

Validation Loss = 1.0103347015877564
Epoch 10: Training Loss = 1.0101879359227826


100%|██████████| 10/10 [01:39<00:00,  9.94s/it]

Validation Loss = 1.0103347015877564
Training completed successfully





In [39]:
# see models's prediction on a batch
for i, data in enumerate(val_loader):
    print(f"Batch {i}")
    if i == 2:
        break
    print(data['features'].shape)
    print(data['pids'])
    print(data['score'])
    print(model(data['features']).detach().numpy())
    print("")

Batch 0
torch.Size([32, 2, 329, 64])
tensor([[1, 4],
        [3, 5],
        [1, 5],
        [8, 1],
        [4, 1],
        [1, 4],
        [5, 2],
        [4, 3],
        [7, 2],
        [2, 4],
        [2, 6],
        [5, 6],
        [6, 5],
        [6, 1],
        [5, 1],
        [5, 3],
        [2, 4],
        [4, 1],
        [6, 7],
        [3, 6],
        [6, 5],
        [3, 5],
        [4, 1],
        [4, 3],
        [4, 2],
        [4, 1],
        [2, 4],
        [2, 1],
        [2, 1],
        [3, 4],
        [5, 3],
        [6, 4]], dtype=torch.int32)
tensor([ 8.3333e-02, -2.6667e-01,  3.3333e-02,  5.0000e-02, -3.6667e-01,
        -1.0000e-01,  3.3333e-02,  8.3333e-02, -8.3333e-02, -2.7756e-17,
        -3.8333e-01, -1.3333e-01,  1.6667e-01,  2.0000e-01,  6.6667e-02,
         3.3333e-02, -2.6667e-01,  1.1667e-01,  1.6667e-01, -2.7756e-17,
         2.0000e-01,  1.8333e-01, -1.6667e-01, -3.3333e-02,  5.0000e-02,
         5.0000e-02, -2.8333e-01,  5.0000e-02, -2.7756e-17, -1.000

In [40]:
# Save the model
torch.save(model.state_dict(), 'engagement_predictor_xatn.pth')
print("Model saved successfully")

Model saved successfully


In [41]:
# Load the model
model = EngagementPredictor(num_features, num_frames, num_heads=7)
model.load_state_dict(torch.load('engagement_predictor_xatn.pth'))
model.eval()
print("Model loaded successfully")

Model loaded successfully


In [42]:
model

EngagementPredictor(
  (cross_attention): CrossAttention(
    (attention): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=329, out_features=329, bias=True)
    )
  )
  (mlp): Sequential(
    (0): Linear(in_features=21056, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=1, bias=True)
    (3): Tanh()
  )
)

# Listener (Video) + Speaker (Audio)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CrossAttentionEngagementModel(nn.Module):
    def __init__(self, listener_dim, speaker_dim, proj_dim=128):
        super().__init__()
        self.listener_proj = nn.Linear(listener_dim, proj_dim)
        self.speaker_proj = nn.Linear(speaker_dim, proj_dim)

        self.query_proj = nn.Linear(proj_dim, proj_dim)
        self.key_proj = nn.Linear(proj_dim, proj_dim)
        self.value_proj = nn.Linear(proj_dim, proj_dim)

        self.attn = nn.MultiheadAttention(embed_dim=proj_dim, num_heads=4, batch_first=True)

        self.pool = nn.AdaptiveAvgPool1d(1)
        self.out = nn.Sequential(
            nn.Linear(proj_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Tanh()
        )

    def forward(self, listener_feat, speaker_feat):
        listener_feat = listener_feat.permute(0, 2, 1)
        speaker_feat = speaker_feat.permute(0, 2, 1)

        listener_proj = self.listener_proj(listener_feat)
        speaker_proj = self.speaker_proj(speaker_feat)

        query = self.query_proj(listener_proj)
        key = self.key_proj(speaker_proj)
        value = self.value_proj(speaker_proj)

        attended, _ = self.attn(query, key, value)

        attended = attended.permute(0, 2, 1)
        pooled = self.pool(attended).squeeze(-1)

        return self.out(pooled).squeeze(-1)


In [33]:
# pwd

In [32]:
import torch
from torch.utils.data import DataLoader, random_split
from torch import nn, optim
from tqdm import tqdm
from SEMPIDataLoader import ListenerSpeakerFeatureDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dataset = ListenerSpeakerFeatureDataset(
    csv_path="AudioVideo_Feature_Paths.csv",
    frame_length=64,
    root_dir="./",
)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, drop_last=False)

sample = dataset[0]
speaker_feat, listener_feat = sample["features"]
listener_dim, speaker_dim = listener_feat.shape[0], speaker_feat.shape[0]
print(f"Listener feature shape: {listener_feat.shape}")
print(f"Speaker feature shape: {speaker_feat.shape}")

model = CrossAttentionEngagementModel(listener_dim, speaker_dim).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

Listener feature shape: torch.Size([329, 64])
Speaker feature shape: torch.Size([329, 64])


In [None]:
# sys.path.append(os.path.abspath("code"))
from metr import compute_ccc_batched , compute_pearson_correlation_batched


In [None]:
from tqdm import tqdm
import torch
import torch.nn as nn

def train_model(model, train_loader, val_loader, num_epochs=10, lr=1e-4):
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    threshold = 0.1

    for epoch in range(1, num_epochs + 1):
        model.train()
        total_loss = 0
        total_correct = 0
        total_preds = []
        total_targets = []

        for batch in tqdm(train_loader, desc=f"Epoch {epoch} [Train]", leave=False):
            speaker, listener = batch["features"]
            target = batch["score"].to(device)
            speaker = speaker.to(device)
            listener = listener.to(device)

            optimizer.zero_grad()
            output = model(listener, speaker)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * target.size(0)
            total_correct += (torch.abs(output - target) < threshold).sum().item()
            total_preds.append(output.detach().cpu())
            total_targets.append(target.detach().cpu())

        train_loss = total_loss / len(train_loader.dataset)
        train_acc = total_correct / len(train_loader.dataset)


        print(f"Epoch {epoch} | Train Loss: {train_loss:.4f} | Acc: {train_acc:.4f}")

        model.eval()
        val_loss = 0
        val_correct = 0
        val_preds = []
        val_targets = []

        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Epoch {epoch} [Val]", leave=False):
                speaker, listener = batch["features"]
                target = batch["score"].to(device)
                speaker = speaker.to(device)
                listener = listener.to(device)

                output = model(listener, speaker)
                loss = criterion(output, target)

                val_loss += loss.item() * target.size(0)
                val_preds.append(output.cpu())
                val_targets.append(target.cpu())

        val_loss /= len(val_loader.dataset)
        val_ccc = compute_ccc_batched(
            np.concatenate(val_preds),
            np.concatenate(val_targets)
            
        )
        val_pcc = compute_pearson_correlation_batched(
             np.concatenate(val_preds),
            np.concatenate(val_targets)
           
        )
        val_preds = torch.cat(val_preds).numpy()
        val_targets = torch.cat(val_targets).numpy()
        # if epoch == 5:
        #     print("Val targets and preds:")
        #     print(val_targets , val_preds)
        
        print(f"| Val Loss: {val_loss:.4f} | Val CCC: {val_ccc:.4f} | Val PCC: {val_pcc:.4f}")

    return model


In [34]:
trained_model = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=30,
    lr=1e-4
)


                                                                

Epoch 1 | Train Loss: 0.1091 | Acc: 0.3229


                                                              

| Val Loss: 0.0272 | Val CCC: 0.0932 | Val PCC: 0.0962


                                                                

Epoch 2 | Train Loss: 0.0244 | Acc: 0.5191


                                                              

| Val Loss: 0.0317 | Val CCC: 0.0781 | Val PCC: 0.0909


                                                                

Epoch 3 | Train Loss: 0.0248 | Acc: 0.5092


                                                              

| Val Loss: 0.0271 | Val CCC: 0.0774 | Val PCC: 0.0845


                                                                

Epoch 4 | Train Loss: 0.0228 | Acc: 0.5220


                                                              

| Val Loss: 0.0362 | Val CCC: 0.0000 | Val PCC: 0.0001


                                                                

Epoch 5 | Train Loss: 0.0219 | Acc: 0.5442


                                                              

| Val Loss: 0.0269 | Val CCC: 0.0885 | Val PCC: 0.0917


                                                                

Epoch 6 | Train Loss: 0.0239 | Acc: 0.5177


                                                              

| Val Loss: 0.0264 | Val CCC: 0.0457 | Val PCC: 0.0508


                                                                

Epoch 7 | Train Loss: 0.0227 | Acc: 0.5329


                                                              

| Val Loss: 0.0262 | Val CCC: 0.0696 | Val PCC: 0.0734


                                                                

Epoch 8 | Train Loss: 0.0218 | Acc: 0.5305


                                                              

| Val Loss: 0.0266 | Val CCC: 0.0738 | Val PCC: 0.0787


                                                                

Epoch 9 | Train Loss: 0.0214 | Acc: 0.5277


                                                              

| Val Loss: 0.0260 | Val CCC: 0.0515 | Val PCC: 0.0552


                                                                 

Epoch 10 | Train Loss: 0.0219 | Acc: 0.5357


                                                               

| Val Loss: 0.0291 | Val CCC: 0.0396 | Val PCC: 0.0451


                                                                 

Epoch 11 | Train Loss: 0.0208 | Acc: 0.5551


                                                               

| Val Loss: 0.0376 | Val CCC: 0.0465 | Val PCC: 0.0621


                                                                 

Epoch 12 | Train Loss: 0.0217 | Acc: 0.5461


                                                               

| Val Loss: 0.0266 | Val CCC: 0.0503 | Val PCC: 0.0537


                                                                 

Epoch 13 | Train Loss: 0.0209 | Acc: 0.5418


                                                               

| Val Loss: 0.0310 | Val CCC: 0.0535 | Val PCC: 0.0606


                                                                 

Epoch 14 | Train Loss: 0.0216 | Acc: 0.5310


                                                               

| Val Loss: 0.0292 | Val CCC: 0.0348 | Val PCC: 0.0400


                                                                 

Epoch 15 | Train Loss: 0.0213 | Acc: 0.5418


                                                               

| Val Loss: 0.0269 | Val CCC: 0.0338 | Val PCC: 0.0387


                                                                 

Epoch 16 | Train Loss: 0.0207 | Acc: 0.5504


                                                               

| Val Loss: 0.0278 | Val CCC: 0.0506 | Val PCC: 0.0559


                                                                 

Epoch 17 | Train Loss: 0.0210 | Acc: 0.5262


                                                               

| Val Loss: 0.0273 | Val CCC: 0.0524 | Val PCC: 0.0612


                                                                 

Epoch 18 | Train Loss: 0.0216 | Acc: 0.5281


                                                               

| Val Loss: 0.0264 | Val CCC: 0.0438 | Val PCC: 0.0478


                                                                 

Epoch 19 | Train Loss: 0.0207 | Acc: 0.5513


                                                               

| Val Loss: 0.0280 | Val CCC: 0.0418 | Val PCC: 0.0456


                                                                 

Epoch 20 | Train Loss: 0.0216 | Acc: 0.5338


                                                               

| Val Loss: 0.0261 | Val CCC: 0.0426 | Val PCC: 0.0457


                                                                 

Epoch 21 | Train Loss: 0.0207 | Acc: 0.5409


                                                               

| Val Loss: 0.0343 | Val CCC: 0.0527 | Val PCC: 0.0650


                                                                 

Epoch 22 | Train Loss: 0.0212 | Acc: 0.5414


                                                               

| Val Loss: 0.0270 | Val CCC: 0.0646 | Val PCC: 0.0729


                                                                 

Epoch 23 | Train Loss: 0.0213 | Acc: 0.5357


                                                               

| Val Loss: 0.0262 | Val CCC: 0.0724 | Val PCC: 0.0774


                                                                 

Epoch 24 | Train Loss: 0.0206 | Acc: 0.5499


                                                               

| Val Loss: 0.0264 | Val CCC: 0.0437 | Val PCC: 0.0474


                                                                 

Epoch 25 | Train Loss: 0.0203 | Acc: 0.5499


                                                               

| Val Loss: 0.0284 | Val CCC: 0.0459 | Val PCC: 0.0520


                                                                 

Epoch 26 | Train Loss: 0.0207 | Acc: 0.5452


                                                               

| Val Loss: 0.0287 | Val CCC: 0.0499 | Val PCC: 0.0558


                                                                 

Epoch 27 | Train Loss: 0.0205 | Acc: 0.5513


                                                               

| Val Loss: 0.0260 | Val CCC: 0.0447 | Val PCC: 0.0480


                                                                 

Epoch 28 | Train Loss: 0.0199 | Acc: 0.5527


                                                               

| Val Loss: 0.0263 | Val CCC: 0.0429 | Val PCC: 0.0483


                                                                 

Epoch 29 | Train Loss: 0.0199 | Acc: 0.5712


                                                               

| Val Loss: 0.0279 | Val CCC: 0.0568 | Val PCC: 0.0682


                                                                 

Epoch 30 | Train Loss: 0.0212 | Acc: 0.5357


                                                               

| Val Loss: 0.0263 | Val CCC: 0.0571 | Val PCC: 0.0626


