In [2]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 5
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 5
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-5.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1376
Epoch 1/100, Validation Loss: 0.1076
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 2/100, Training Loss: 0.1205
Epoch 2/100, Validation Loss: 0.1144
Epoch 3/100, Training Loss: 0.1156
Epoch 3/100, Validation Loss: 0.1107
Epoch 4/100, Training Loss: 0.1158
Epoch 4/100, Validation Loss: 0.1080
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.1031
Epoch 5/100, Validation Loss: 0.1023
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 6/100, Training Loss: 0.1014
Epoch 6/100, Validation Loss: 0.1032
Epoch 7/100, Training Loss: 0.1004
Epoch 7/100, Validation Loss: 0.1027
Epoch 8/100, Training Loss: 0.0990
Epoch 8/100, Validation Loss: 0.1019
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 9/100, Training Loss: 0.0979
Epoch 9/100, Validation Loss: 0.1034
Epoch 10/100, Training Loss: 0.0974
Epoch 10/100, Validation Loss: 0.1028
Epoch 11/100, Training Loss: 0.0960
Epoch 11/100, Validation Loss: 0.1027
Epoch 00011: reducing lea

In [3]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 10 //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-10.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1352
Epoch 1/100, Validation Loss: 0.1139
Model saved to sewa-best_GRU_AVG-10.pth
Epoch 2/100, Training Loss: 0.1136
Epoch 2/100, Validation Loss: 0.1096
Model saved to sewa-best_GRU_AVG-10.pth
Epoch 3/100, Training Loss: 0.1107
Epoch 3/100, Validation Loss: 0.1004
Model saved to sewa-best_GRU_AVG-10.pth
Epoch 4/100, Training Loss: 0.1086
Epoch 4/100, Validation Loss: 0.1054
Epoch 5/100, Training Loss: 0.1038
Epoch 5/100, Validation Loss: 0.1081
Epoch 6/100, Training Loss: 0.1020
Epoch 6/100, Validation Loss: 0.1025
Epoch 00006: reducing learning rate of group 0 to 1.0000e-03.
Epoch 7/100, Training Loss: 0.0903
Epoch 7/100, Validation Loss: 0.0972
Model saved to sewa-best_GRU_AVG-10.pth
Epoch 8/100, Training Loss: 0.0875
Epoch 8/100, Validation Loss: 0.1023
Epoch 9/100, Training Loss: 0.0854
Epoch 9/100, Validation Loss: 0.1026
Epoch 10/100, Training Loss: 0.0847
Epoch 10/100, Validation Loss: 0.1055
Epoch 00010: reducing learning rate of group 0 to 1.0000

In [1]:
#1 sec only val

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_valence = data['valence'].values
    return X, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "1sec/SEWA_features_wav2vec_1_seconds_train.csv"
dev_file = "1sec/SEWA_features_wav2vec_1_seconds_dev.csv"
test_file = "1sec/SEWA_features_wav2vec_1_seconds_test.csv"

# Load and preprocess datasets
X_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Valence - Dev MSE: 0.015723829621847926 Dev RMSE: 0.12539469534971534 Test MSE: 0.02818249009312014 Test RMSE: 0.1678764131530101


In [2]:
#2 sec ar

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    return X, y_arousal

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "2sec/SEWA_features_wav2vec_2_seconds_train.csv"
dev_file = "2sec/SEWA_features_wav2vec_2_seconds_dev.csv"
test_file = "2sec/SEWA_features_wav2vec_2_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train= load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev= load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test= load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)


Arousal - Dev MSE: 0.01150712431384633 Dev RMSE: 0.10727126508924154 Test MSE: 0.0337811262872568 Test RMSE: 0.18379642620915349


In [3]:
#2 sec val

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_valence = data['valence'].values
    return X, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "2sec/SEWA_features_wav2vec_2_seconds_train.csv"
dev_file = "2sec/SEWA_features_wav2vec_2_seconds_dev.csv"
test_file = "2sec/SEWA_features_wav2vec_2_seconds_test.csv"

# Load and preprocess datasets
X_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


Valence - Dev MSE: 0.014963897433684464 Dev RMSE: 0.12232701023765954 Test MSE: 0.028525227667764635 Test RMSE: 0.1688941315373765


In [4]:
#3 sec ar

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    return X, y_arousal

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "3sec/SEWA_features_wav2vec_3_seconds_train.csv"
dev_file = "3sec/SEWA_features_wav2vec_3_seconds_dev.csv"
test_file = "3sec/SEWA_features_wav2vec_3_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train= load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev= load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test= load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)


Arousal - Dev MSE: 0.011329502120488008 Dev RMSE: 0.106440133974399 Test MSE: 0.03427857801002751 Test RMSE: 0.18514474880489457


In [5]:
#3 sec val

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_valence = data['valence'].values
    return X, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "3sec/SEWA_features_wav2vec_3_seconds_train.csv"
dev_file = "3sec/SEWA_features_wav2vec_3_seconds_dev.csv"
test_file = "3sec/SEWA_features_wav2vec_3_seconds_test.csv"

# Load and preprocess datasets
X_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


Valence - Dev MSE: 0.014270796036210186 Dev RMSE: 0.11946043711710663 Test MSE: 0.02775016039644333 Test RMSE: 0.1665837939189864


In [6]:
#4 sec ar

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    return X, y_arousal

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "4sec/SEWA_features_wav2vec_4_seconds_train.csv"
dev_file = "4sec/SEWA_features_wav2vec_4_seconds_dev.csv"
test_file = "4sec/SEWA_features_wav2vec_4_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train= load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev= load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test= load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)


Arousal - Dev MSE: 0.011034525287948183 Dev RMSE: 0.10504534872115082 Test MSE: 0.033976497371320497 Test RMSE: 0.18432714767857852


In [7]:
#4 sec val

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_valence = data['valence'].values
    return X, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "4sec/SEWA_features_wav2vec_4_seconds_train.csv"
dev_file = "4sec/SEWA_features_wav2vec_4_seconds_dev.csv"
test_file = "4sec/SEWA_features_wav2vec_4_seconds_test.csv"

# Load and preprocess datasets
X_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


Valence - Dev MSE: 0.014388082103504183 Dev RMSE: 0.1199503318190666 Test MSE: 0.028316769121122524 Test RMSE: 0.1682758720706047


In [1]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 15 //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 15
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-15.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)


Epoch 1/100, Training Loss: 0.1400
Epoch 1/100, Validation Loss: 0.0960
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 2/100, Training Loss: 0.1131
Epoch 2/100, Validation Loss: 0.1092
Epoch 3/100, Training Loss: 0.1096
Epoch 3/100, Validation Loss: 0.1127
Epoch 4/100, Training Loss: 0.1070
Epoch 4/100, Validation Loss: 0.1250
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.0942
Epoch 5/100, Validation Loss: 0.0956
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 6/100, Training Loss: 0.0891
Epoch 6/100, Validation Loss: 0.0950
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 7/100, Training Loss: 0.0874
Epoch 7/100, Validation Loss: 0.0955
Epoch 8/100, Training Loss: 0.0860
Epoch 8/100, Validation Loss: 0.0956
Epoch 9/100, Training Loss: 0.0844
Epoch 9/100, Validation Loss: 0.0965
Epoch 00009: reducing learning rate of group 0 to 1.0000e-04.
Epoch 10/100, Training Loss: 0.0810
Epoch 10/100, Validation Loss: 0.0972
Epoch 11/100, Training Loss: 0.080

In [2]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 20 //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 20
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-20.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1436
Epoch 1/100, Validation Loss: 0.1153
Model saved to sewa-best_GRU_AVG-20.pth
Epoch 2/100, Training Loss: 0.1109
Epoch 2/100, Validation Loss: 0.0895
Model saved to sewa-best_GRU_AVG-20.pth
Epoch 3/100, Training Loss: 0.1055
Epoch 3/100, Validation Loss: 0.1042
Epoch 4/100, Training Loss: 0.1025
Epoch 4/100, Validation Loss: 0.1000
Epoch 5/100, Training Loss: 0.0957
Epoch 5/100, Validation Loss: 0.0966
Epoch 00005: reducing learning rate of group 0 to 1.0000e-03.
Epoch 6/100, Training Loss: 0.0834
Epoch 6/100, Validation Loss: 0.0909
Epoch 7/100, Training Loss: 0.0785
Epoch 7/100, Validation Loss: 0.0923
Early stopping triggered
Test MAE Valence: 0.0964, Test RMSE Valence: 0.1261
Test MAE Arousal: 0.0669, Test RMSE Arousal: 0.0878


In [3]:
#GRU max pool with GPU SWITCH, sewa 5
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 5
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-5.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1378
Epoch 1/100, Validation Loss: 0.0997
Model saved to sewa-best_GRU_MAX-5.pth
Epoch 2/100, Training Loss: 0.1246
Epoch 2/100, Validation Loss: 0.1093
Epoch 3/100, Training Loss: 0.1187
Epoch 3/100, Validation Loss: 0.1075
Epoch 4/100, Training Loss: 0.1143
Epoch 4/100, Validation Loss: 0.1090
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.1050
Epoch 5/100, Validation Loss: 0.1012
Epoch 6/100, Training Loss: 0.1009
Epoch 6/100, Validation Loss: 0.1016
Early stopping triggered
Test MAE Valence: 0.0811, Test RMSE Valence: 0.1111
Test MAE Arousal: 0.0761, Test RMSE Arousal: 0.0990


In [4]:
#GRU max pool with GPU SWITCH, sewa 10  //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-10.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1375
Epoch 1/100, Validation Loss: 0.1063
Model saved to sewa-best_GRU_MAX-10.pth
Epoch 2/100, Training Loss: 0.1172
Epoch 2/100, Validation Loss: 0.0966
Model saved to sewa-best_GRU_MAX-10.pth
Epoch 3/100, Training Loss: 0.1145
Epoch 3/100, Validation Loss: 0.1110
Epoch 4/100, Training Loss: 0.1109
Epoch 4/100, Validation Loss: 0.1141
Epoch 5/100, Training Loss: 0.1064
Epoch 5/100, Validation Loss: 0.1034
Epoch 00005: reducing learning rate of group 0 to 1.0000e-03.
Epoch 6/100, Training Loss: 0.0963
Epoch 6/100, Validation Loss: 0.0995
Epoch 7/100, Training Loss: 0.0920
Epoch 7/100, Validation Loss: 0.1009
Early stopping triggered
Test MAE Valence: 0.0788, Test RMSE Valence: 0.1157
Test MAE Arousal: 0.0630, Test RMSE Arousal: 0.0780


In [5]:
#GRU max pool with GPU SWITCH, sewa 15  //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 15
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-15.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path)) 

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1387
Epoch 1/100, Validation Loss: 0.1028
Model saved to sewa-best_GRU_MAX-15.pth
Epoch 2/100, Training Loss: 0.1135
Epoch 2/100, Validation Loss: 0.0899
Model saved to sewa-best_GRU_MAX-15.pth
Epoch 3/100, Training Loss: 0.1119
Epoch 3/100, Validation Loss: 0.1177
Epoch 4/100, Training Loss: 0.1116
Epoch 4/100, Validation Loss: 0.1063
Epoch 5/100, Training Loss: 0.1048
Epoch 5/100, Validation Loss: 0.0945
Epoch 00005: reducing learning rate of group 0 to 1.0000e-03.
Epoch 6/100, Training Loss: 0.0953
Epoch 6/100, Validation Loss: 0.0947
Epoch 7/100, Training Loss: 0.0895
Epoch 7/100, Validation Loss: 0.0954
Early stopping triggered
Test MAE Valence: 0.0789, Test RMSE Valence: 0.1123
Test MAE Arousal: 0.0654, Test RMSE Arousal: 0.0834


In [6]:
#GRU max pool with GPU SWITCH, sewa 20  //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 20
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-20.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1418
Epoch 1/100, Validation Loss: 0.0922
Model saved to sewa-best_GRU_MAX-20.pth
Epoch 2/100, Training Loss: 0.1143
Epoch 2/100, Validation Loss: 0.1093
Epoch 3/100, Training Loss: 0.1102
Epoch 3/100, Validation Loss: 0.0998
Epoch 4/100, Training Loss: 0.1069
Epoch 4/100, Validation Loss: 0.1100
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.0917
Epoch 5/100, Validation Loss: 0.0943
Epoch 6/100, Training Loss: 0.0876
Epoch 6/100, Validation Loss: 0.0929
Early stopping triggered
Test MAE Valence: 0.0742, Test RMSE Valence: 0.1063
Test MAE Arousal: 0.0686, Test RMSE Arousal: 0.0861


In [7]:
#GRU 1d cnn SEWA 5 DONE
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')
# Hyperparameters
window_size = 5
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        #cnn kernel size = window size
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension/ should be 128, что за входные данные? какой х
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-5.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.9033
Epoch 1/100, Validation Loss: 1.0495
Model saved to sewa-best_GRU_1D-5.pth
Epoch 2/100, Training Loss: 0.9860
Epoch 2/100, Validation Loss: 1.0495
Epoch 3/100, Training Loss: 0.9858
Epoch 3/100, Validation Loss: 1.0495
Epoch 4/100, Training Loss: 0.9856
Epoch 4/100, Validation Loss: 1.0495
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.9855
Epoch 5/100, Validation Loss: 1.0495
Epoch 6/100, Training Loss: 0.9856
Epoch 6/100, Validation Loss: 1.0495
Early stopping triggered
Test MAE Valence: 0.7753, Test RMSE Valence: 0.8232
Test MAE Arousal: 1.1981, Test RMSE Arousal: 1.2281


In [8]:
#GRU 1d cnn SEWA 10 TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-10.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.9803
Epoch 1/100, Validation Loss: 1.0453
Model saved to sewa-best_GRU_1D-10.pth
Epoch 2/100, Training Loss: 0.9872
Epoch 2/100, Validation Loss: 1.0453
Epoch 3/100, Training Loss: 0.9871
Epoch 3/100, Validation Loss: 1.0453
Epoch 4/100, Training Loss: 0.9871
Epoch 4/100, Validation Loss: 1.0453
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.9871
Epoch 5/100, Validation Loss: 1.0453
Epoch 6/100, Training Loss: 0.9872
Epoch 6/100, Validation Loss: 1.0453
Early stopping triggered
Test MAE Valence: 0.7735, Test RMSE Valence: 0.8217
Test MAE Arousal: 1.2006, Test RMSE Arousal: 1.2304


In [9]:
#GRU 1d cnn SEWA 15 TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 15
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-15.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.8628
Epoch 1/100, Validation Loss: 0.8044
Model saved to sewa-best_GRU_1D-15.pth
Epoch 2/100, Training Loss: 0.8678
Epoch 2/100, Validation Loss: 0.8044
Epoch 3/100, Training Loss: 0.8662
Epoch 3/100, Validation Loss: 0.8044
Epoch 4/100, Training Loss: 0.8669
Epoch 4/100, Validation Loss: 0.8044
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.8650
Epoch 5/100, Validation Loss: 0.8044
Epoch 6/100, Training Loss: 0.8675
Epoch 6/100, Validation Loss: 0.8044
Early stopping triggered
Test MAE Valence: 0.7720, Test RMSE Valence: 0.8206
Test MAE Arousal: 0.7973, Test RMSE Arousal: 0.8413


In [10]:
#GRU 1d cnn SEWA 20 TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 20
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-20.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 1.0897
Epoch 1/100, Validation Loss: 1.0837
Model saved to sewa-best_GRU_1D-20.pth
Epoch 2/100, Training Loss: 1.1013
Epoch 2/100, Validation Loss: 1.0837
Epoch 3/100, Training Loss: 1.1013
Epoch 3/100, Validation Loss: 1.0837
Epoch 4/100, Training Loss: 1.1013
Epoch 4/100, Validation Loss: 1.0837
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 1.1014
Epoch 5/100, Validation Loss: 1.0837
Epoch 6/100, Training Loss: 1.1014
Epoch 6/100, Validation Loss: 1.0837
Early stopping triggered
Test MAE Valence: 1.2307, Test RMSE Valence: 1.2620
Test MAE Arousal: 0.7949, Test RMSE Arousal: 0.8387
