In [14]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


# Dataset

In [7]:
class EmotionSequenceDataset(Dataset):
    def __init__(self, df):
        self.video_ids = df['video'].unique().tolist()
        df['deceptive'] = df['deceptive'].astype(int)
        self.data = []

        for vid in self.video_ids:
            subset = df[df['video'] == vid]
            X = subset.drop(columns=['video', 'frame_idx', 'deceptive']).values.astype(np.float32)
            y = subset['deceptive'].iloc[0]
            self.data.append((X, y))

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X, y = self.data[idx]
        features = torch.tensor(X, dtype=torch.float32)
        label = torch.tensor(y, dtype=torch.float32)
        return features, label
    
def collate_fn(batch):
    sequences, labels = zip(*batch)
    lengths = [len(seq) for seq in sequences]
    padded_sequences = nn.utils.rnn.pad_sequence(sequences, batch_first=True)
    return padded_sequences, torch.tensor(lengths), torch.tensor(labels)

# BiLSTM with attention

In [11]:
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim * 2, 1)

    def forward(self, lstm_out, mask):
        scores = self.attn(lstm_out).squeeze(-1)
        scores = scores.masked_fill(~mask, float('-inf'))
        weights = torch.softmax(scores, dim=1).unsqueeze(-1)
        context = torch.sum(weights * lstm_out, dim=1)
        return context, weights

class BiLSTMAttention(nn.Module):
    def __init__(self, input_dim=7, hidden_dim=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers,
                            bidirectional=True, batch_first=True)
        self.attn = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim * 2, 1)

    def forward(self, x, lengths):
        packed = nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        out, _ = self.lstm(packed)
        out, _ = nn.utils.rnn.pad_packed_sequence(out, batch_first=True)

        mask = torch.arange(out.size(1), device=out.device)[None, :] < lengths[:, None].to(out.device)
        context, weights = self.attn(out, mask)
        logits = self.fc(context).squeeze(1)
        return logits, weights

In [None]:
df = pd.read_csv("processed_data/silesian_deception_dataset/emotions.csv")
videos = df['video'].unique().tolist()
train_videos, test_videos = train_test_split(videos, test_size=0.2, random_state=42)
train_df = df[df['video'].isin(train_videos)]
test_df = df[df['video'].isin(test_videos)]

train_dataset = EmotionSequenceDataset(train_df)
test_dataset = EmotionSequenceDataset(test_df)

train_loader = DataLoader(train_dataset, batch_size=8, collate_fn=collate_fn, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, collate_fn=collate_fn, shuffle=False)

model = BiLSTMAttention().to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

for epoch in range(10):
    model.train()
    total_loss = 0
    for X, lengths, y in train_loader:
        X = X.to(device)
        y = y.to(device)
        opt.zero_grad()
        logits, _ = model(X, lengths)
        loss = criterion(logits, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, loss={total_loss/len(train_loader):.4f}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['deceptive'] = df['deceptive'].astype(int)


['person1', 'person10', 'person11', 'person12', 'person13', 'person14', 'person15', 'person16', 'person17', 'person18', 'person19', 'person2', 'person20', 'person22', 'person23', 'person24', 'person26', 'person28', 'person29', 'person3', 'person30', 'person31', 'person35', 'person36', 'person37', 'person39', 'person4', 'person40', 'person41', 'person42', 'person43', 'person44', 'person45', 'person46', 'person47', 'person48', 'person49', 'person7', 'person9']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['deceptive'] = df['deceptive'].astype(int)


Epoch 1, loss=0.7037
Epoch 2, loss=0.6348
Epoch 3, loss=0.5507
Epoch 4, loss=0.4200
Epoch 5, loss=0.1829
Epoch 6, loss=0.0295
Epoch 7, loss=0.0095
Epoch 8, loss=0.0051
Epoch 9, loss=0.0033
Epoch 10, loss=0.0024


In [19]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for X, lengths, y in test_loader:
        X = X.to(device)
        y = y.to(device)

        logits, _ = model(X, lengths)
        preds = torch.sigmoid(logits)
        preds = (preds > 0.5).float()

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(y.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
cm = confusion_matrix(all_labels, all_preds)

print(f"✅ Test accuracy: {acc:.4f}")
print(f"✅ F1 score: {f1:.4f}")
print("✅ Confusion matrix:")
print(cm)

✅ Test accuracy: 1.0000
✅ F1 score: 1.0000
✅ Confusion matrix:
[[10]]


