In [26]:
from SEMPIDataLoader import InterPersenSEMPIDataset, DataSetLoader, DataLoader
from SEMPIDataLoader import create_dataloaders
from SEMPIDataLoader import DATA_PATH

import torch
import os
import numpy as np
import pandas as pd
import pickle


import torch.nn as nn
import torch.optim as optim

import sklearn
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [16]:
# load the dataset and dataloaders with pickle
with open(os.path.join(DATA_PATH, 'dataset.pkl'), 'rb') as f:
    dataset: InterPersenSEMPIDataset = pickle.load(f)

train_loader, val_loader = create_dataloaders(dataset, batch_size=32)
print(len(train_loader), len(val_loader))
print(len(dataset))
print("Data loaded successfully!")
print(f"Train size: {len(train_loader.dataset)}")
print(f"Val size: {len(val_loader.dataset)}")

for i, data in enumerate(train_loader):
    print(f"Batch {i}")
    if i == 2:
        break
    print(data['features'].shape)
    print(data['pids'])
    print(data['score'])


382 96
15256
Data loaded successfully!
Train size: 12204
Val size: 3052
Batch 0
torch.Size([32, 2, 329, 64])
tensor([[6, 1],
        [4, 1],
        [6, 7],
        [6, 7],
        [4, 2],
        [7, 4],
        [2, 5],
        [5, 2],
        [1, 3],
        [5, 2],
        [7, 1],
        [7, 2],
        [8, 4],
        [4, 1],
        [5, 6],
        [5, 7],
        [3, 4],
        [5, 3],
        [5, 3],
        [3, 1],
        [3, 5],
        [2, 8],
        [4, 7],
        [3, 4],
        [1, 2],
        [6, 5],
        [5, 2],
        [4, 7],
        [4, 5],
        [1, 5],
        [4, 5],
        [5, 4]], dtype=torch.int32)
tensor([ 3.3333e-02,  1.0000e-01,  1.3333e-01, -1.8333e-01,  8.3333e-02,
        -1.6667e-02, -2.7756e-17, -3.3333e-02,  5.0000e-02,  1.6667e-02,
         1.0000e-01, -1.6667e-02, -1.0000e-01, -1.3878e-17, -3.1667e-01,
        -8.3333e-02,  1.8333e-01, -2.7756e-17, -1.3878e-17, -8.3333e-02,
         5.0000e-02,  1.1667e-01, -1.5000e-01,  1.1667e-01,  2.0000

In [17]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.3):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Two separate RNNs for the two individuals
        self.rnn1 = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, nonlinearity='relu', dropout=dropout)
        self.rnn2 = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, nonlinearity='relu', dropout=dropout)

        # Fully connected layer after merging outputs
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        # Split the input into two separate individuals
        x1, x2 = x[:, 0, :, :], x[:, 1, :, :]  # Each becomes (batch_size, 329, 64)

        # Initialize hidden states
        h0_1 = torch.zeros(self.num_layers, x1.size(0), self.hidden_size).to(x.device)
        h0_2 = torch.zeros(self.num_layers, x2.size(0), self.hidden_size).to(x.device)

        # Process both individuals separately
        out1, _ = self.rnn1(x1, h0_1)
        out2, _ = self.rnn2(x2, h0_2)

        # Take the output from the last time step of each individual
        out1 = out1[:, -1, :]  # Shape: (batch_size, hidden_size)
        out2 = out2[:, -1, :]  # Shape: (batch_size, hidden_size)

        # Concatenate both outputs
        out = torch.cat((out1, out2), dim=1)  # Shape: (batch_size, hidden_size * 2)

        # Fully connected layer for final prediction
        out = self.fc(out)
        return out

In [18]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for batch in train_loader:
        features, scores = batch['features'].to(device), batch['score'].to(device)

        # Forward pass
        outputs = model(features)
        loss = criterion(outputs.squeeze(), scores)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [1/10], Loss: 0.0185
Epoch [2/10], Loss: 0.0163
Epoch [3/10], Loss: 0.0233
Epoch [4/10], Loss: 0.0118
Epoch [5/10], Loss: 0.0226
Epoch [6/10], Loss: 0.0139
Epoch [7/10], Loss: 0.0225
Epoch [8/10], Loss: 0.0271
Epoch [9/10], Loss: 0.0100
Epoch [10/10], Loss: 0.0126


In [27]:
def evaluate_model(model, val_loader):
    model.eval()
    predictions_list = []
    actual_list = []

    with torch.no_grad():
        for batch in val_loader:
            features, scores = batch['features'].to(device), batch['score'].to(device)

            # Forward pass
            predictions = model(features).squeeze()

            predictions_list.extend(predictions.cpu().numpy())
            actual_list.extend(scores.cpu().numpy())

    # Convert to NumPy arrays for metric calculations
    predictions_list = np.array(predictions_list)
    actual_list = np.array(actual_list)

    # Compute evaluation metrics
    mse = mean_squared_error(actual_list, predictions_list)
    mae = mean_absolute_error(actual_list, predictions_list)
    r2 = r2_score(actual_list, predictions_list)

    print(f"Validation Metrics:\n"
          f"  - MSE  = {mse:.4f}\n"
          f"  - MAE  = {mae:.4f}\n"
          f"  - R²   = {r2:.4f}")

    return mse, mae, r2


In [29]:
print("Evaluating RNN...")
evaluate_model(model, val_loader)

Evaluating RNN...
Validation Metrics:
  - MSE  = 0.0095
  - MAE  = 0.0753
  - R²   = 0.5074


(0.009499510750174522, 0.07533258944749832, 0.5074359178543091)

In [20]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.3):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Two separate LSTMs for each individual
        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.lstm2 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

        # Fully connected layer after merging outputs
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        # Split the input into two separate individuals
        x1, x2 = x[:, 0, :, :], x[:, 1, :, :]  # Each becomes (batch_size, 329, 64)

        # Initialize hidden and cell states
        h0_1 = torch.zeros(self.num_layers, x1.size(0), self.hidden_size).to(x.device)
        c0_1 = torch.zeros(self.num_layers, x1.size(0), self.hidden_size).to(x.device)

        h0_2 = torch.zeros(self.num_layers, x2.size(0), self.hidden_size).to(x.device)
        c0_2 = torch.zeros(self.num_layers, x2.size(0), self.hidden_size).to(x.device)

        # Process both individuals separately
        out1, _ = self.lstm1(x1, (h0_1, c0_1))
        out2, _ = self.lstm2(x2, (h0_2, c0_2))

        # Take the output from the last time step of each individual
        out1 = out1[:, -1, :]  # Shape: (batch_size, hidden_size)
        out2 = out2[:, -1, :]  # Shape: (batch_size, hidden_size)

        # Concatenate both outputs
        out = torch.cat((out1, out2), dim=1)  # Shape: (batch_size, hidden_size * 2)

        # Fully connected layer for final prediction
        out = self.fc(out)
        return out



In [21]:
# Model parameters
input_size = 64       # Features per timestep
hidden_size = 128     # Hidden units in LSTM
num_layers = 2        # LSTM layers
output_size = 1       # Predicting a single score

# Initialize model
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)

# Define loss function (Huber loss is more robust to outliers)
criterion = nn.SmoothL1Loss()  

# Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler to reduce LR by half every 3 epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)


In [22]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    for batch in train_loader:
        features, scores = batch['features'].to(device), batch['score'].to(device)

        # Forward pass
        outputs = model(features).squeeze()

        # Compute loss
        loss = criterion(outputs, scores)
        total_loss += loss.item()

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Adjust learning rate
    scheduler.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")


Epoch [1/10], Loss: 0.0096, LR: 0.001000
Epoch [2/10], Loss: 0.0091, LR: 0.001000
Epoch [3/10], Loss: 0.0084, LR: 0.000500
Epoch [4/10], Loss: 0.0073, LR: 0.000500
Epoch [5/10], Loss: 0.0066, LR: 0.000500
Epoch [6/10], Loss: 0.0060, LR: 0.000250
Epoch [7/10], Loss: 0.0052, LR: 0.000250
Epoch [8/10], Loss: 0.0048, LR: 0.000250
Epoch [9/10], Loss: 0.0044, LR: 0.000125
Epoch [10/10], Loss: 0.0039, LR: 0.000125


In [30]:
print("\nEvaluating LSTM...")
evaluate_model(model, val_loader)


Evaluating LSTM...
Validation Metrics:
  - MSE  = 0.0095
  - MAE  = 0.0753
  - R²   = 0.5074


(0.009499510750174522, 0.07533258944749832, 0.5074359178543091)

In [32]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.3):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Two separate GRUs for each individual
        self.gru1 = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.gru2 = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

        # Fully connected layer after merging outputs
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        # Split the input into two separate individuals
        x1, x2 = x[:, 0, :, :], x[:, 1, :, :]  # Each becomes (batch_size, 329, 64)

        # Initialize hidden states
        h0_1 = torch.zeros(self.num_layers, x1.size(0), self.hidden_size).to(x.device)
        h0_2 = torch.zeros(self.num_layers, x2.size(0), self.hidden_size).to(x.device)

        # Process both individuals separately
        out1, _ = self.gru1(x1, h0_1)
        out2, _ = self.gru2(x2, h0_2)

        # Take the output from the last time step of each individual
        out1 = out1[:, -1, :]  # Shape: (batch_size, hidden_size)
        out2 = out2[:, -1, :]  # Shape: (batch_size, hidden_size)

        # Concatenate both outputs
        out = torch.cat((out1, out2), dim=1)  # Shape: (batch_size, hidden_size * 2)

        # Fully connected layer for final prediction
        out = self.fc(out)
        return out


In [33]:
# Initialize the GRU model
hidden_size = 128  
num_layers = 2  
output_size = 1  
gru_model = GRUModel(input_size=64, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size).to(device)

# Define optimizer, loss, and scheduler
optimizer = torch.optim.Adam(gru_model.parameters(), lr=0.001)
criterion = nn.SmoothL1Loss()  # Huber loss
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

# Training loop (same as before)
num_epochs = 10
for epoch in range(num_epochs):
    gru_model.train()
    total_loss = 0
    
    for batch in train_loader:
        features, scores = batch['features'].to(device), batch['score'].to(device)

        # Forward pass
        outputs = gru_model(features).squeeze()

        # Compute loss
        loss = criterion(outputs, scores)
        total_loss += loss.item()

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Adjust learning rate
    scheduler.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")


Epoch [1/10], Loss: 0.0099, LR: 0.001000
Epoch [2/10], Loss: 0.0092, LR: 0.001000
Epoch [3/10], Loss: 0.0086, LR: 0.000500
Epoch [4/10], Loss: 0.0075, LR: 0.000500
Epoch [5/10], Loss: 0.0069, LR: 0.000500
Epoch [6/10], Loss: 0.0063, LR: 0.000250
Epoch [7/10], Loss: 0.0055, LR: 0.000250
Epoch [8/10], Loss: 0.0051, LR: 0.000250
Epoch [9/10], Loss: 0.0047, LR: 0.000125
Epoch [10/10], Loss: 0.0042, LR: 0.000125


In [34]:
print("Evaluating GRU Model...")
evaluate_model(gru_model, val_loader)


Evaluating GRU Model...
Validation Metrics:
  - MSE  = 0.0101
  - MAE  = 0.0776
  - R²   = 0.4752


(0.01012139581143856, 0.07758079469203949, 0.47519028186798096)