In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()

    def __len__(self):
        return len(self.video_windows)

    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values.flatten() for frame in window_frames]
        # embeddings shape list:(10, 256)
        embeddings_flat = np.array(embeddings)
        frames_tensor = torch.tensor(embeddings_flat, dtype=torch.float32)

        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        return frames_tensor, labels_tensor

    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))

        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))

        return video_windows, labels_windows

    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id

# Load your dataframes
train_df = pd.read_csv('/content/drive/MyDrive/AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('/content/drive/MyDrive/AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('/content/drive/MyDrive/AFEW-VA_radiant_fog_160_test.csv')

# Create instances of CustomVideoDataset for train, dev, and test
train_dataset = CustomVideoDataset(train_df)
dev_dataset = CustomVideoDataset(dev_df)
test_dataset = CustomVideoDataset(test_df)

# Create DataLoaders for train, dev, and test datasets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:

def predict_on_dev(model, dev_loader):
  y_valence_true = []
  y_valence_pred = []
  y_arousal_true = []
  y_arousal_pred = []

  model.eval()
  with torch.no_grad():
      for inputs, labels in dev_loader:
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

  # Calculate metrics
  mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
  rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
  mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
  rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

  return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
import numpy as np

class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
       # Flatten the last two dimensions
       # x = x.view(x.size(0), -1)  # Reshapes input to [batch_size, window_size * embeddings_per_frame]
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.tanh(x)  # Apply tanh activation to the output
        return x

# Define the window size and calculate the input dimension
window_size = 10  # Assuming a window size of 10 frames
embeddings_per_frame = 256
input_dim = window_size * embeddings_per_frame  # 10 frames * 256 embeddings per frame

# Define your model and training hyperparameters
hidden_dim = 64
output_dim = 2  # Arousal and Valence
learning_rate = 0.001
epochs = 100

# Instantiate the model
model = FullyConnectedNN(input_dim, hidden_dim, output_dim)

# Define separate loss functions for arousal and valence
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

criterion_arousal = RMSELoss()
criterion_valence = RMSELoss()

# Define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Define early stopping parameters
patience = 10  # Number of epochs to wait for improvement
min_val_loss = float('inf')
counter = 0  # Counter for epochs without improvement
best_val_loss = float('inf')
best_epoch = 0
best_mae_arousal = float('inf')
best_mae_valence = float('inf')
best_rmse_arousal = float('inf')
best_rmse_valence = float('inf')
stop_training = False

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
       # print(inputs.shape)
       # print("---------------")
        outputs = model(inputs)

        # Split predicted values into arousal and valence
        predicted_arousal = outputs[:, 0]
        predicted_valence = outputs[:, 1]

        # Split ground truth labels into arousal and valence
        labels_arousal = labels[:, 0]
        labels_valence = labels[:, 1]

        # Calculate separate losses for arousal and valence
        loss_arousal = criterion_arousal(predicted_arousal, labels_arousal)
        loss_valence = criterion_valence(predicted_valence, labels_valence)
        loss = loss_arousal + loss_valence

        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Predict on dev data using the trained model
    model.eval()
    dev_mae_valence, dev_rmse_valence, dev_mae_arousal, dev_rmse_arousal = predict_on_dev(model, dev_loader)
    general_rmse_metric = (dev_rmse_valence + dev_rmse_arousal) / 2.

    print(f"Validation RMSE: {general_rmse_metric:.4f}")

    # Check Early stopping criteria
    if general_rmse_metric < min_val_loss:  # Check if the validation loss has improved
      min_val_loss = general_rmse_metric
      counter = 0

    # Save the model weights if RMSE is lower than the best value
      #if dev_rmse_arousal < best_rmse_arousal and dev_rmse_valence < best_rmse_valence:
      best_rmse_arousal = dev_rmse_arousal
      best_mae_arousal = dev_mae_arousal
      best_rmse_valence = dev_rmse_valence
      best_mae_valence = dev_mae_valence
      best_epoch = epoch
      torch.save(model.state_dict(), 'best_model_SEWA.pth') #should be called AFEW actually
    else:
        counter += 1
    # If the validation loss hasn't improved for 'patience' epochs, set the stop_training variable
    if counter >= patience:
        print(f"Early stopping at epoch {epoch + 1} due to no improvement in validation loss.")
        stop_training = True
    if stop_training:
        break

# Print the last best results and epoch
if stop_training:
    print(f"Best RMSE Arousal: {best_rmse_arousal:.4f} at epoch {best_epoch + 1}")
    print(f"Best MAE Arousal: {best_mae_arousal:.4f} at epoch {best_epoch + 1}")
    print(f"Best RMSE Valence: {best_rmse_valence:.4f} at epoch {best_epoch + 1}")
    print(f"Best MAE Valence: {best_mae_valence:.4f} at epoch {best_epoch + 1}")
else:
    print("Training completed without early stopping")

def evaluate_on_test(model, test_loader):
    model.eval()
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            labels_valence = labels[:, 0]
            labels_arousal = labels[:, 1]
            outputs_valence = outputs[:, 0]
            outputs_arousal = outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy().flatten())
            y_valence_pred.extend(outputs_valence.cpu().numpy().flatten())
            y_arousal_true.extend(labels_arousal.cpu().numpy().flatten())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy().flatten())

    # Calculate metrics
    mae_valence_test = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence_test = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal_test = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal_test = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return mae_valence_test, rmse_valence_test, mae_arousal_test, rmse_arousal_test

# Load the best model
model.load_state_dict(torch.load('/content/best_model_AFEW-TESTT.pth')) #AFEW actually
model.eval()

# Call the evaluate_on_test function
mae_valence_test, rmse_valence_test, mae_arousal_test, rmse_arousal_test = evaluate_on_test(model, test_loader)

# Print the results for the test dataset
print(f"Test MAE Valence: {mae_valence_test:.4f}, Test RMSE Valence: {rmse_valence_test:.4f}")
print(f"Test MAE Arousal: {mae_arousal_test:.4f}, Test RMSE Arousal: {rmse_arousal_test:.4f}")


Validation RMSE: 3.0264
Validation RMSE: 3.0303
Validation RMSE: 3.0270
Validation RMSE: 3.0282
Validation RMSE: 3.0289


KeyboardInterrupt: 

In [None]:

#LSTM:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.optim.lr_scheduler import ReduceLROnPlateau


# Custom Dataset
class CustomVideoDataset(Dataset):
    def __init__(self, df, window_size=10, stride=5):
        self.df = df
        self.df['arousal'] = self.df['arousal'] / 10.
        self.df['valence'] = self.df['valence'] / 10.
        self.window_size = window_size
        self.stride = stride
        self.video_windows, self.labels_windows = self.prepare_windows()


    def __len__(self):
        return len(self.video_windows)


    def __getitem__(self, idx):
        window_frames = self.video_windows[idx]
        embeddings = [self.df.loc[self.df['path'] == frame, self.df.columns[3:]].values for frame in window_frames]
        frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)


        labels = self.labels_windows[idx]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)


        return frames_tensor, labels_tensor


    def prepare_windows(self):
        video_frames = {}
        labels = {}
        for _, row in self.df.iterrows():
            video_id = self.extract_video_info(row['path'])
            if video_id not in video_frames:
                video_frames[video_id] = []
                labels[video_id] = []
            video_frames[video_id].append(row['path'])
            labels[video_id].append((row['arousal'], row['valence']))


        video_windows = []
        labels_windows = []
        for video_id in video_frames:
            frames = video_frames[video_id]
            label_vals = labels[video_id]
            for i in range(0, len(frames) - self.window_size + 1, self.stride):
                video_windows.append(frames[i:i + self.window_size])
                window_labels = label_vals[i:i + self.window_size]
                avg_arousal = sum([label[0] for label in window_labels]) / len(window_labels)
                avg_valence = sum([label[1] for label in window_labels]) / len(window_labels)
                labels_windows.append((avg_arousal, avg_valence))


        return video_windows, labels_windows


    def extract_video_info(self, file_path):
        parts = file_path.split('/')
        video_id = parts[-2]
        return video_id


# Load data
train_df = pd.read_csv('/content/drive/MyDrive/AFEW-VA_radiant_fog_160_train.csv')
dev_df = pd.read_csv('/content/drive/MyDrive/AFEW-VA_radiant_fog_160_dev.csv')
test_df = pd.read_csv('/content/drive/MyDrive/AFEW-VA_radiant_fog_160_test.csv')

# Hyperparameters
window_size = 10
input_size = 256  # Number of features (embeddings) per frame
hidden_size = 128  # Number of features in hidden state of LSTM
output_size = 2  # Output size (arousal and valence)
num_layers = 2 # Number of LSTM layers
learning_rate = 0.001
epochs = 10

# Create datasets and dataloaders
train_dataset = CustomVideoDataset(train_df, window_size)
dev_dataset = CustomVideoDataset(dev_df, window_size)
test_dataset = CustomVideoDataset(test_df, window_size)

batch_size = 64 #before:32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# LSTM Network
class LSTMNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, bidirectional=True):
        super(LSTMNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional #bidirectional lstm
        #self.dropout = nn.Dropout(0.5) #add dropout before fully con.layer if overfitting


        # LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) # GRU instead of LSTM


        # Fully connected layer
        # Multiply hidden_size by 2 if bidirectional
        fc_input_size = hidden_size * 2 if bidirectional else hidden_size
        self.fc = nn.Linear(fc_input_size, output_size)
        self.tanh = nn.Tanh()
        #self.dropout = nn.Dropout(0.5)


    def forward(self, x):
        # Forward propagate LSTM
        out, _ = self.lstm(x)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        #out = self.dropout(out)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        out = self.tanh(out)


        return out


# Initialize the model
model = LSTMNetwork(input_size, hidden_size, output_size, num_layers)


# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#adding learning rate scheduler to dynamically adjust the LR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5, min_lr=1e-6, verbose=True)

# Training loop
early_stopping_patience = 5
best_val_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0
    num_batches = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        num_batches += 1

    avg_train_loss = total_train_loss / num_batches
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation step
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            val_loss = criterion(outputs, labels)
            total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(dev_loader)
        print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    # Update the learning rate scheduler
    scheduler.step(avg_val_loss)

    #Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        # Save the model if desired
    else:
        patience_counter += 1
        if patience_counter >= early_stopping_patience:
            print("Early stopping triggered")
            break


def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []


    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            y_true.append(labels.numpy())
            y_pred.append(outputs.numpy())


    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)


    mae_valence = mean_absolute_error(y_true[:, 0], y_pred[:, 0])
    rmse_valence = sqrt(mean_squared_error(y_true[:, 0], y_pred[:, 0]))
    mae_arousal = mean_absolute_error(y_true[:, 1], y_pred[:, 1])
    rmse_arousal = sqrt(mean_squared_error(y_true[:, 1], y_pred[:, 1]))


    return mae_valence, rmse_valence, mae_arousal, rmse_arousal


# Evaluate the model on test data
mae_valence, rmse_valence, mae_arousal, rmse_arousal = evaluate_model(model, test_loader)


print(f"Test MAE Valence: {mae_valence:.4f}, Test RMSE Valence: {rmse_valence:.4f}")
print(f"Test MAE Arousal: {mae_arousal:.4f}, Test RMSE Arousal: {rmse_arousal:.4f}")


  frames_tensor = torch.tensor(embeddings, dtype=torch.float32).squeeze(1)


Epoch 1/10, Training Loss: 6.7890
Epoch 1/10, Validation Loss: 8.9403
Epoch 2/10, Training Loss: 6.3134
Epoch 2/10, Validation Loss: 8.9619
Epoch 00004: reducing learning rate of group 0 to 5.0000e-04.
Epoch 3/10, Training Loss: 6.2975
Epoch 3/10, Validation Loss: 8.9598
Epoch 4/10, Training Loss: 6.2606
Epoch 4/10, Validation Loss: 8.9346
Epoch 5/10, Training Loss: 6.2737
Epoch 5/10, Validation Loss: 8.9331
Epoch 6/10, Training Loss: 6.2445
Epoch 6/10, Validation Loss: 8.9400
Epoch 00012: reducing learning rate of group 0 to 2.5000e-04.
Epoch 7/10, Training Loss: 6.2097
Epoch 7/10, Validation Loss: 8.9432
Epoch 8/10, Training Loss: 6.2041
Epoch 8/10, Validation Loss: 8.9460
Epoch 00015: reducing learning rate of group 0 to 1.2500e-04.
Epoch 9/10, Training Loss: 6.2112
Epoch 9/10, Validation Loss: 8.9352
Epoch 00018: reducing learning rate of group 0 to 6.2500e-05.
Epoch 10/10, Training Loss: 6.2379
Epoch 10/10, Validation Loss: 8.9375
Early stopping triggered
Test MAE Valence: 2.2378,