In [3]:

#LSTM:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
import os


# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()


    def __len__(self):
        return len(self.video_windows)


    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)


        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)


        return frames_tensor, labels_tensor


    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))


        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))


        return video_windows, labels_windows


    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id

print(os.getcwd())
# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of LSTM
output_size = 2  # Output size (arousal and valence)
num_layers = 2 # Number of LSTM layers
learning_rate = 0.001
epochs = 10

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)

batch_size = 64 #before:32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# LSTM Network
class LSTMNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, bidirectional=True):
        super(LSTMNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional #bidirectional lstm
        #self.dropout = nn.Dropout(0.5) #add dropout before fully con.layer if overfitting


        # LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) # GRU instead of LSTM


        # Fully connected layer
        # Multiply hidden_size by 2 if bidirectional
        fc_input_size = hidden_size * 2 if bidirectional else hidden_size
        self.fc = nn.Linear(fc_input_size, output_size)
        self.tanh = nn.Tanh()
        #self.dropout = nn.Dropout(0.5)


    def forward(self, x):
        # Forward propagate LSTM
        out, _ = self.lstm(x)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        #out = self.dropout(out)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        out = self.tanh(out)


        return out


# Initialize the model
model = LSTMNetwork(input_size, hidden_size, output_size, num_layers)


# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    #Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        # Save the model if desired
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break


def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []


    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())


    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)


    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))


    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)


print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


/nfs/home/kdatbayev


  frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)
  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 1/10, Training Loss: 0.0197
Epoch 1/10, Validation Loss: 0.0105
Epoch 2/10, Training Loss: 0.0128
Epoch 2/10, Validation Loss: 0.0100
Epoch 3/10, Training Loss: 0.0118
Epoch 3/10, Validation Loss: 0.0130
Epoch 00006: reducing learning rate of group 0 to 5.0000e-04.
Epoch 4/10, Training Loss: 0.0107
Epoch 4/10, Validation Loss: 0.0108
Epoch 5/10, Training Loss: 0.0102
Epoch 5/10, Validation Loss: 0.0112
Epoch 00009: reducing learning rate of group 0 to 2.5000e-04.
Epoch 6/10, Training Loss: 0.0096
Epoch 6/10, Validation Loss: 0.0106
Epoch 00012: reducing learning rate of group 0 to 1.2500e-04.
Epoch 7/10, Training Loss: 0.0092
Epoch 7/10, Validation Loss: 0.0105
Early stopping triggered
Test MAE Valence: 0.0840, Test RMSE Valence: 0.1251
Test MAE Arousal: 0.0617, Test RMSE Arousal: 0.0779


In [7]:

def predict_on_dev(model, dev_loader):
  y_valence_true = []
  y_valence_pred = []
  y_arousal_true = []
  y_arousal_pred = []

  model.eval()
  with torch.no_grad():
      for inputs, labels in dev_loader:
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

  # Calculate metrics
  mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
  rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
  mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
  rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

  return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

In [8]:
#GRU, AVG Pooling
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, seq_length=10, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout_rate))

        # Dropout layer
        self.dropout = nn.Dropout(dropout_rate)
        
        # 1D Average Pooling
        self.avg_pool1d = nn.AvgPool1d(seq_length)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply dropout
        out = self.dropout(out)
        
        # Apply 1D average pooling
        out = out.permute(0, 2, 1)  # Change shape to (batch_size, hidden_size, seq_length) for AvgPool1d
        out = self.avg_pool1d(out)
        out = out.squeeze(2)  # Remove the last dimension

        # Decode the hidden state
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
# Hyperparameters
window_size = 5
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of GRU layers
learning_rate = 0.01
batch_size = 32 #try 64 later
epochs = 100

# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Custom RMSE Loss Function
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, predicted, actual):
        return torch.sqrt(self.mse(predicted, actual))

# Initialize the model with GRU
model = GRUNetwork(input_size, hidden_size, output_size, num_layers)
#summary(model, (window_size, input_size))

# Loss and optimizer
criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0
    
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1
        
    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss:.4f}")
   
    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    #Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
    # Save the model if desired
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Test evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)

print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


RuntimeError: Given input size: (128x1x5). Calculated output size: (128x1x0). Output size is too small

In [None]:
#test test GRU avg pool old
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        # TODO: normalize arousal and valence
        self.df['arousal'] = self.df['arousal'] / 10.
        #print(df)
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id

# Load data
train_df = pd.read_csv('AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 15
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of LSTM
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        out = self.tanh(out)

        return out

# Initialize the model
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.5)
#summary(model, (window_size, input_size))

# Loss and optimizer
criterion = nn.MSELoss()  # TODO: RMSE loss
# criterion = nn.RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        # Save the model if desired
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


In [1]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 5
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 5
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-5.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)


Epoch 1/100, Training Loss: 0.1013
Epoch 1/100, Validation Loss: 0.0838
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 2/100, Training Loss: 0.0869
Epoch 2/100, Validation Loss: 0.0824
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 3/100, Training Loss: 0.0852
Epoch 3/100, Validation Loss: 0.0817
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 4/100, Training Loss: 0.0834
Epoch 4/100, Validation Loss: 0.0811
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 5/100, Training Loss: 0.0837
Epoch 5/100, Validation Loss: 0.0872
Epoch 6/100, Training Loss: 0.0830
Epoch 6/100, Validation Loss: 0.0861
Epoch 7/100, Training Loss: 0.0821
Epoch 7/100, Validation Loss: 0.0816
Epoch 00007: reducing learning rate of group 0 to 1.0000e-03.
Epoch 8/100, Training Loss: 0.0778
Epoch 8/100, Validation Loss: 0.0804
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 9/100, Training Loss: 0.0763
Epoch 9/100, Validation Loss: 0.0801
Model saved to sewa-best_GRU_AVG-5.pth
Epoch 10/100, Training Loss: 0.0759
Epoch 10/100, Valida

In [2]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 10 //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-10.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1177
Epoch 1/100, Validation Loss: 0.0799
Model saved to sewa-best_GRU_AVG-10.pth
Epoch 2/100, Training Loss: 0.0790
Epoch 2/100, Validation Loss: 0.0737
Model saved to sewa-best_GRU_AVG-10.pth
Epoch 3/100, Training Loss: 0.0765
Epoch 3/100, Validation Loss: 0.0740
Epoch 4/100, Training Loss: 0.0771
Epoch 4/100, Validation Loss: 0.0752
Epoch 5/100, Training Loss: 0.0761
Epoch 5/100, Validation Loss: 0.0767
Epoch 00005: reducing learning rate of group 0 to 1.0000e-03.
Epoch 6/100, Training Loss: 0.0705
Epoch 6/100, Validation Loss: 0.0738
Epoch 7/100, Training Loss: 0.0688
Epoch 7/100, Validation Loss: 0.0748
Early stopping triggered
Test MAE Valence: 0.0850, Test RMSE Valence: 0.1077
Test MAE Arousal: 0.0894, Test RMSE Arousal: 0.1159


In [3]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 15 //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 15
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-15.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1270
Epoch 1/100, Validation Loss: 0.0930
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 2/100, Training Loss: 0.0762
Epoch 2/100, Validation Loss: 0.0762
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 3/100, Training Loss: 0.0749
Epoch 3/100, Validation Loss: 0.0756
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 4/100, Training Loss: 0.0727
Epoch 4/100, Validation Loss: 0.0726
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 5/100, Training Loss: 0.0725
Epoch 5/100, Validation Loss: 0.0803
Epoch 6/100, Training Loss: 0.0720
Epoch 6/100, Validation Loss: 0.0720
Model saved to sewa-best_GRU_AVG-15.pth
Epoch 7/100, Training Loss: 0.0698
Epoch 7/100, Validation Loss: 0.0763
Epoch 8/100, Training Loss: 0.0707
Epoch 8/100, Validation Loss: 0.0761
Epoch 9/100, Training Loss: 0.0697
Epoch 9/100, Validation Loss: 0.0772
Epoch 00009: reducing learning rate of group 0 to 1.0000e-03.
Epoch 10/100, Training Loss: 0.0655
Epoch 10/100, Validation Loss: 0.0713
Model saved to s

In [4]:
#GRU avg pool WITH GPU CPU SWITCH, sewa 20 //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 20
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Directly average across the sequence length dimension
        out = torch.mean(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_AVG-20.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1065
Epoch 1/100, Validation Loss: 0.0686
Model saved to sewa-best_GRU_AVG-20.pth
Epoch 2/100, Training Loss: 0.0711
Epoch 2/100, Validation Loss: 0.0740
Epoch 3/100, Training Loss: 0.0689
Epoch 3/100, Validation Loss: 0.0693
Epoch 4/100, Training Loss: 0.0655
Epoch 4/100, Validation Loss: 0.0706
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.0606
Epoch 5/100, Validation Loss: 0.0689
Epoch 6/100, Training Loss: 0.0588
Epoch 6/100, Validation Loss: 0.0688
Early stopping triggered
Test MAE Valence: 0.0805, Test RMSE Valence: 0.1050
Test MAE Arousal: 0.0887, Test RMSE Arousal: 0.1135


In [5]:
#GRU max pool with GPU SWITCH, sewa 5
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 5
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-5.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1131
Epoch 1/100, Validation Loss: 0.0842
Model saved to sewa-best_GRU_MAX-5.pth
Epoch 2/100, Training Loss: 0.0856
Epoch 2/100, Validation Loss: 0.0852
Epoch 3/100, Training Loss: 0.0849
Epoch 3/100, Validation Loss: 0.0806
Model saved to sewa-best_GRU_MAX-5.pth
Epoch 4/100, Training Loss: 0.0843
Epoch 4/100, Validation Loss: 0.0809
Epoch 5/100, Training Loss: 0.0830
Epoch 5/100, Validation Loss: 0.0798
Model saved to sewa-best_GRU_MAX-5.pth
Epoch 6/100, Training Loss: 0.0828
Epoch 6/100, Validation Loss: 0.0807
Epoch 7/100, Training Loss: 0.0816
Epoch 7/100, Validation Loss: 0.0801
Epoch 8/100, Training Loss: 0.0815
Epoch 8/100, Validation Loss: 0.0781
Model saved to sewa-best_GRU_MAX-5.pth
Epoch 9/100, Training Loss: 0.0827
Epoch 9/100, Validation Loss: 0.0850
Epoch 10/100, Training Loss: 0.0812
Epoch 10/100, Validation Loss: 0.0825
Epoch 11/100, Training Loss: 0.0801
Epoch 11/100, Validation Loss: 0.0801
Epoch 00011: reducing learning rate of group 0 t

In [6]:
#GRU max pool with GPU SWITCH, sewa 10  //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-10.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1013
Epoch 1/100, Validation Loss: 0.0789
Model saved to sewa-best_GRU_MAX-10.pth
Epoch 2/100, Training Loss: 0.0827
Epoch 2/100, Validation Loss: 0.0776
Model saved to sewa-best_GRU_MAX-10.pth
Epoch 3/100, Training Loss: 0.0796
Epoch 3/100, Validation Loss: 0.0798
Epoch 4/100, Training Loss: 0.0789
Epoch 4/100, Validation Loss: 0.0831
Epoch 5/100, Training Loss: 0.0769
Epoch 5/100, Validation Loss: 0.0838
Epoch 00005: reducing learning rate of group 0 to 1.0000e-03.
Epoch 6/100, Training Loss: 0.0716
Epoch 6/100, Validation Loss: 0.0737
Model saved to sewa-best_GRU_MAX-10.pth
Epoch 7/100, Training Loss: 0.0699
Epoch 7/100, Validation Loss: 0.0766
Epoch 8/100, Training Loss: 0.0696
Epoch 8/100, Validation Loss: 0.0761
Epoch 9/100, Training Loss: 0.0689
Epoch 9/100, Validation Loss: 0.0751
Epoch 00009: reducing learning rate of group 0 to 1.0000e-04.
Epoch 10/100, Training Loss: 0.0679
Epoch 10/100, Validation Loss: 0.0750
Epoch 11/100, Training Loss: 0.068

In [7]:
#GRU max pool with GPU SWITCH, sewa 15  //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 15
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-15.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path)) 

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1076
Epoch 1/100, Validation Loss: 0.0786
Model saved to sewa-best_GRU_MAX-15.pth
Epoch 2/100, Training Loss: 0.0773
Epoch 2/100, Validation Loss: 0.0804
Epoch 3/100, Training Loss: 0.0752
Epoch 3/100, Validation Loss: 0.0795
Epoch 4/100, Training Loss: 0.0724
Epoch 4/100, Validation Loss: 0.0755
Model saved to sewa-best_GRU_MAX-15.pth
Epoch 5/100, Training Loss: 0.0727
Epoch 5/100, Validation Loss: 0.0747
Model saved to sewa-best_GRU_MAX-15.pth
Epoch 6/100, Training Loss: 0.0711
Epoch 6/100, Validation Loss: 0.0721
Model saved to sewa-best_GRU_MAX-15.pth
Epoch 7/100, Training Loss: 0.0703
Epoch 7/100, Validation Loss: 0.0780
Epoch 8/100, Training Loss: 0.0700
Epoch 8/100, Validation Loss: 0.0715
Model saved to sewa-best_GRU_MAX-15.pth
Epoch 9/100, Training Loss: 0.0697
Epoch 9/100, Validation Loss: 0.0814
Epoch 10/100, Training Loss: 0.0700
Epoch 10/100, Validation Loss: 0.0735
Epoch 11/100, Training Loss: 0.0694
Epoch 11/100, Validation Loss: 0.0787
Epoc

In [8]:
#GRU max pool with GPU SWITCH, sewa 20  //TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU before converting to numpy

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 20
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # torch.max returns both the max values and the indices, so we select the values with [0]
        out, _ = torch.max(out, dim=1)
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_MAX-20.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device) # Move inputs, labels to the same device as the model
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.1019
Epoch 1/100, Validation Loss: 0.0879
Model saved to sewa-best_GRU_MAX-20.pth
Epoch 2/100, Training Loss: 0.0755
Epoch 2/100, Validation Loss: 0.0736
Model saved to sewa-best_GRU_MAX-20.pth
Epoch 3/100, Training Loss: 0.0738
Epoch 3/100, Validation Loss: 0.0889
Epoch 4/100, Training Loss: 0.0703
Epoch 4/100, Validation Loss: 0.0690
Model saved to sewa-best_GRU_MAX-20.pth
Epoch 5/100, Training Loss: 0.0709
Epoch 5/100, Validation Loss: 0.0696
Epoch 6/100, Training Loss: 0.0684
Epoch 6/100, Validation Loss: 0.0683
Model saved to sewa-best_GRU_MAX-20.pth
Epoch 7/100, Training Loss: 0.0679
Epoch 7/100, Validation Loss: 0.0699
Epoch 8/100, Training Loss: 0.0667
Epoch 8/100, Validation Loss: 0.0814
Epoch 9/100, Training Loss: 0.0663
Epoch 9/100, Validation Loss: 0.0699
Epoch 00009: reducing learning rate of group 0 to 1.0000e-03.
Epoch 10/100, Training Loss: 0.0599
Epoch 10/100, Validation Loss: 0.0692
Epoch 11/100, Training Loss: 0.0584
Epoch 11/100, Valida

In [9]:
#GRU 1d cnn SEWA 5 DONE
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 5
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        #cnn kernel size = window size
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension/ should be 128, что за входные данные? какой х
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-5.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.9138
Epoch 1/100, Validation Loss: 0.9016
Model saved to sewa-best_GRU_1D-5.pth
Epoch 2/100, Training Loss: 0.9102
Epoch 2/100, Validation Loss: 0.9014
Model saved to sewa-best_GRU_1D-5.pth
Epoch 3/100, Training Loss: 0.9102
Epoch 3/100, Validation Loss: 0.9014
Epoch 4/100, Training Loss: 0.9102
Epoch 4/100, Validation Loss: 0.9014
Epoch 5/100, Training Loss: 0.9102
Epoch 5/100, Validation Loss: 0.9014
Epoch 00005: reducing learning rate of group 0 to 1.0000e-03.
Epoch 6/100, Training Loss: 0.9102
Epoch 6/100, Validation Loss: 0.9014
Epoch 7/100, Training Loss: 0.9103
Epoch 7/100, Validation Loss: 0.9014
Early stopping triggered
Test MAE Valence: 0.9532, Test RMSE Valence: 0.9682
Test MAE Arousal: 0.9539, Test RMSE Arousal: 0.9673


In [10]:
#GRU 1d cnn SEWA 10 TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-10.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 1.0228
Epoch 1/100, Validation Loss: 1.0249
Model saved to sewa-best_GRU_1D-10.pth
Epoch 2/100, Training Loss: 1.0280
Epoch 2/100, Validation Loss: 1.0249
Epoch 3/100, Training Loss: 1.0280
Epoch 3/100, Validation Loss: 1.0249
Epoch 4/100, Training Loss: 1.0280
Epoch 4/100, Validation Loss: 1.0249
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 1.0280
Epoch 5/100, Validation Loss: 1.0249
Epoch 6/100, Training Loss: 1.0280
Epoch 6/100, Validation Loss: 1.0249
Early stopping triggered
Test MAE Valence: 1.0467, Test RMSE Valence: 1.0599
Test MAE Arousal: 0.9539, Test RMSE Arousal: 0.9669


In [11]:
#GRU 1d cnn SEWA 15 TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 15
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-15.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 1.0954
Epoch 1/100, Validation Loss: 1.1094
Model saved to sewa-best_GRU_1D-15.pth
Epoch 2/100, Training Loss: 1.1063
Epoch 2/100, Validation Loss: 1.1094
Epoch 3/100, Training Loss: 1.1062
Epoch 3/100, Validation Loss: 1.1094
Epoch 4/100, Training Loss: 1.1063
Epoch 4/100, Validation Loss: 1.1094
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 1.1062
Epoch 5/100, Validation Loss: 1.1094
Epoch 6/100, Training Loss: 1.1062
Epoch 6/100, Validation Loss: 1.1094
Early stopping triggered
Test MAE Valence: 1.0466, Test RMSE Valence: 1.0594
Test MAE Arousal: 1.0457, Test RMSE Arousal: 1.0572


In [12]:
#GRU 1d cnn SEWA 20 TODO
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau
#from torchsummary import summary

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to GPU
            outputs = model(inputs)
            outputs = outputs.to('cpu')  # Move outputs back to CPU

            # Ensure labels are on CPU before converting to numpy
            labels = labels.to('cpu')
            
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


def predict_on_dev(model, dev_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
      for inputs, labels in dev_loader:
          # Send inputs and labels to GPU
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] 
        #print(df)
        self.df['valence'] = self.df['valence']
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[4:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id
        
# RMSELoss as a class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
        
# Load data
train_df = pd.read_csv('SEWA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('SEWA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('SEWA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 20
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of GRU
output_size = 2  # Output size (arousal and valence)
num_layers = 2  # Number of layers
learning_rate = 0.01
batch_size = 32
epochs = 100

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)
 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# GRU Network
class GRUNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.2, cnn_kernel_size=window_size, cnn_out_channels=hidden_size):
        super(GRUNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU Layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        # 1D CNN Layer for local feature extraction
        # Adjust in_channels to match the GRU's output hidden size
        # You can choose cnn_out_channels to transform feature dimensionality if desired
        self.conv1d_layer = nn.Conv1d(in_channels=hidden_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
        self.tanh = nn.Tanh()
        
        # Dropout layer applied to the output of the GRU layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Apply 1D CNN
        out = out.permute(0, 2, 1)  # Permute for Conv1d
        out = self.conv1d_layer(out)
        out = out.squeeze()  # Squeeze the singleton dimension
        
        # Apply dropout to the outputs of the GRU layer
        out = self.dropout(out)
        
        # Decode the averaged output
        out = self.fc(out)
        out = self.tanh(out)

        return out
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")      
# Initialize the model, optimizer, and RMSELoss
model = GRUNetwork(input_size, hidden_size, output_size, num_layers, dropout_rate=0.2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = RMSELoss() 

# adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0
model_save_path = 'sewa-best_GRU_1D-20.pth'  # Define model save path 

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved to {model_save_path}")
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break
            
# Load the best model for evaluation
model.load_state_dict(torch.load(model_save_path))

# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)
print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


Epoch 1/100, Training Loss: 0.9041
Epoch 1/100, Validation Loss: 0.9003
Model saved to sewa-best_GRU_1D-20.pth
Epoch 2/100, Training Loss: 0.9085
Epoch 2/100, Validation Loss: 0.9003
Epoch 3/100, Training Loss: 0.9085
Epoch 3/100, Validation Loss: 0.9003
Epoch 4/100, Training Loss: 0.9085
Epoch 4/100, Validation Loss: 0.9003
Epoch 00004: reducing learning rate of group 0 to 1.0000e-03.
Epoch 5/100, Training Loss: 0.9084
Epoch 5/100, Validation Loss: 0.9003
Epoch 6/100, Training Loss: 0.9085
Epoch 6/100, Validation Loss: 0.9003
Early stopping triggered
Test MAE Valence: 0.9533, Test RMSE Valence: 0.9670
Test MAE Arousal: 0.9539, Test RMSE Arousal: 0.9660
