In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [4]:
def predict_on_dev(model, dev_loader):
  y_valence_true = []
  y_valence_pred = []
  y_arousal_true = []
  y_arousal_pred = []

  model.eval()
  with torch.no_grad():
      for inputs, labels in dev_loader:
          outputs = model(inputs)
          labels_valence = labels[:, 0]
          labels_arousal = labels[:, 1]
          outputs_valence = outputs[:, 0]
          outputs_arousal = outputs[:, 1]

          y_valence_true.extend(labels_valence.cpu().numpy())
          y_valence_pred.extend(outputs_valence.cpu().numpy())
          y_arousal_true.extend(labels_arousal.cpu().numpy())
          y_arousal_pred.extend(outputs_arousal.cpu().numpy())

  # Calculate metrics
  mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
  rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
  mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
  rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

  return (mae_valence, rmse_valence, mae_arousal, rmse_arousal)

In [6]:
import torch
import pandas as pd
import torch.nn as nn
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.utils.data import DataLoader, TensorDataset

# Define a function to load and preprocess a dataset
def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)

    # Extract features and labels
    X = data.loc[:, 'emb_0':].values
    y = data[["valence", "arousal"]].values  # Use valence and arousal as labels

    # Convert data to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    return X_tensor, y_tensor

# Load and preprocess each dataset
X_train_tensor, y_train_tensor = load_and_preprocess_dataset("/content/drive/MyDrive/SEWA_radiant_fog_160_train.csv")
X_dev_tensor, y_dev_tensor = load_and_preprocess_dataset("/content/drive/MyDrive/SEWA_radiant_fog_160_dev.csv")
X_test_tensor, y_test_tensor = load_and_preprocess_dataset("/content/drive/MyDrive/SEWA_radiant_fog_160_test.csv")

# Create DataLoaders for each dataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

dev_dataset = TensorDataset(X_dev_tensor, y_dev_tensor)
dev_loader = DataLoader(dev_dataset, batch_size=64, shuffle=False)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define your fully connected neural network for regression with tanh activation
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.tanh = nn.Tanh()  # Apply tanh activation to the output
        self.output_dim = output_dim

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.tanh(x)  # Apply tanh activation to the output
        return x

# Define hyperparameters
input_dim = X_train_tensor.shape[1]
hidden_dim = 64
output_dim = 2
learning_rate = 0.001
epochs = 100

# Instantiate the model
model = FullyConnectedNN(input_dim, hidden_dim, output_dim)

# Define separate loss functions for arousal and valence
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

criterion_arousal = RMSELoss()
criterion_valence = RMSELoss()

# Define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Define early stopping parameters
patience = 10  # Number of epochs to wait for improvement
min_val_loss = float('inf')
counter = 0  # Counter for epochs without improvement
best_val_loss = float('inf')
best_epoch = 0
best_mae_arousal = float('inf')
best_mae_valence = float('inf')
best_rmse_arousal = float('inf')
best_rmse_valence = float('inf')
stop_training = False

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)

        # Split predicted values into arousal and valence
        predicted_arousal = outputs[:, 0]
        predicted_valence = outputs[:, 1]

        # Split ground truth labels into arousal and valence
        labels_arousal = labels[:, 0]
        labels_valence = labels[:, 1]

        # Calculate separate losses for arousal and valence
        loss_arousal = criterion_arousal(predicted_arousal, labels_arousal)
        loss_valence = criterion_valence(predicted_valence, labels_valence)
        loss = loss_arousal + loss_valence

        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Predict on dev data using the trained model
    model.eval()
    dev_mae_valence, dev_rmse_valence, dev_mae_arousal, dev_rmse_arousal = predict_on_dev(model, dev_loader)
    general_rmse_metric = (dev_rmse_valence + dev_rmse_arousal) / 2.

    print(f"Validation RMSE: {general_rmse_metric:.4f}")

    # Check Early stopping criteria
    if general_rmse_metric < min_val_loss:  # Check if the validation loss has improved
      min_val_loss = general_rmse_metric
      counter = 0

    # Save the model weights if RMSE is lower than the best value
      #if dev_rmse_arousal < best_rmse_arousal and dev_rmse_valence < best_rmse_valence:
      best_rmse_arousal = dev_rmse_arousal
      best_mae_arousal = dev_mae_arousal
      best_rmse_valence = dev_rmse_valence
      best_mae_valence = dev_mae_valence
      best_epoch = epoch
      torch.save(model.state_dict(), 'best_model_SEWA.pth')
    else:
        counter += 1
    # If the validation loss hasn't improved for 'patience' epochs, set the stop_training variable
    if counter >= patience:
        print(f"Early stopping at epoch {epoch + 1} due to no improvement in validation loss.")
        stop_training = True
    if stop_training:
        break

# Print the last best results and epoch
if stop_training:
    print(f"Best RMSE Arousal: {best_rmse_arousal:.4f} at epoch {best_epoch + 1}")
    print(f"Best MAE Arousal: {best_mae_arousal:.4f} at epoch {best_epoch + 1}")
    print(f"Best RMSE Valence: {best_rmse_valence:.4f} at epoch {best_epoch + 1}")
    print(f"Best MAE Valence: {best_mae_valence:.4f} at epoch {best_epoch + 1}")
else:
    print("Training completed without early stopping")

Validation RMSE: 0.0889
Validation RMSE: 0.0887
Validation RMSE: 0.0917
Validation RMSE: 0.0892
Validation RMSE: 0.0905
Validation RMSE: 0.0897
Validation RMSE: 0.0899
Validation RMSE: 0.0903
Validation RMSE: 0.0910
Validation RMSE: 0.0940
Validation RMSE: 0.0911
Validation RMSE: 0.0932
Early stopping at epoch 12 due to no improvement in validation loss.
Best RMSE Arousal: 0.0804 at epoch 2
Best MAE Arousal: 0.0594 at epoch 2
Best RMSE Valence: 0.0971 at epoch 2
Best MAE Valence: 0.0685 at epoch 2


In [7]:
def evaluate_on_test(model, test_loader):
    model.eval()
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            labels_valence = labels[:, 0]
            labels_arousal = labels[:, 1]
            outputs_valence = outputs[:, 0]
            outputs_arousal = outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy().flatten())
            y_valence_pred.extend(outputs_valence.cpu().numpy().flatten())
            y_arousal_true.extend(labels_arousal.cpu().numpy().flatten())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy().flatten())

    # Calculate metrics
    mae_valence_test = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence_test = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal_test = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal_test = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return mae_valence_test, rmse_valence_test, mae_arousal_test, rmse_arousal_test

In [8]:
# Load the best model
model.load_state_dict(torch.load('/content/best_model_SEWA.pth'))
model.eval()

# Call the evaluate_on_test function
mae_valence_test, rmse_valence_test, mae_arousal_test, rmse_arousal_test = evaluate_on_test(model, test_loader)

# Print the results for the test dataset
print(f"Test MAE Valence: {mae_valence_test:.4f}, Test RMSE Valence: {rmse_valence_test:.4f}")
print(f"Test MAE Arousal: {mae_arousal_test:.4f}, Test RMSE Arousal: {rmse_arousal_test:.4f}")

Test MAE Valence: 0.0928, Test RMSE Valence: 0.1178
Test MAE Arousal: 0.0878, Test RMSE Arousal: 0.1124


In [None]:
#Only arousal

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from math import sqrt

# Load the training dataset
train_data = pd.read_csv("/content/drive/MyDrive/SEWA_radiant_fog_160_train.csv")

# Load the dev dataset
dev_data = pd.read_csv("/content/drive/MyDrive/SEWA_radiant_fog_160_dev.csv")

# Load the test dataset
test_data = pd.read_csv("/content/drive/MyDrive/SEWA_radiant_fog_160_test.csv")

# Extract features (X) and target (y) for arousal in the training data
X_train = train_data.iloc[:, 5:].values  # Features start from column index 5
y_arousal_train = train_data["arousal"].values

# Extract features (X) and target (y) for arousal in the dev data
X_dev = dev_data.iloc[:, 5:].values
y_arousal_dev = dev_data["arousal"].values

# Extract features (X) and target (y) for arousal in the test data
X_test = test_data.iloc[:, 5:].values
y_arousal_test = test_data["arousal"].values


# Create a parameter grid for SVR
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Concatenate train and dev
concat_x_train_dev_arousal = np.concatenate((X_train, X_dev), axis=0)
concat_y_arousal_train_dev = np.concatenate((y_arousal_train, y_arousal_dev), axis=0)

# Generate indices for training and development parts
split_index_arousal = [-1 for _ in range(X_train.shape[0])] + [0 for _ in range(X_dev.shape[0])]

# Create PredefinedSplit
pds_arousal = PredefinedSplit(test_fold=split_index_arousal)

# Use PredefinedSplit in GridSearchCV for Arousal
svr_arousal = SVR()
grid_search_arousal = GridSearchCV(svr_arousal, param_grid, cv=pds_arousal, scoring='neg_mean_squared_error')
grid_search_arousal.fit(concat_x_train_dev_arousal, concat_y_arousal_train_dev)

# Get the best estimators
best_svr_arousal = grid_search_arousal.best_estimator_

# Predict on the dev data for arousal
y_arousal_dev_pred = best_svr_arousal.predict(X_dev)

# Calculate regression metrics for arousal on the dev data
mse_arousal_dev = mean_squared_error(y_arousal_dev, y_arousal_dev_pred)
r2_arousal_dev = sqrt(mse_arousal_dev)

print("Results for Arousal on Dev Data:")
print("Arousal - Mean Squared Error:", mse_arousal_dev)
print("Arousal - R-squared:", r2_arousal_dev)

# Predict on the test data for arousal
y_arousal_test_pred = best_svr_arousal.predict(X_test)

# Calculate regression metrics for arousal on the test data
mse_arousal_test = mean_squared_error(y_arousal_test, y_arousal_test_pred)
r2_arousal_test = sqrt(mse_arousal_test)

print("Results for Arousal on Test Data:")
print("Arousal - Mean Squared Error:", mse_arousal_test)
print("Arousal - R-squared:", r2_arousal_test)

In [None]:
#Only valence
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from math import sqrt

# Load the training dataset
train_data = pd.read_csv("/content/drive/MyDrive/SEWA_radiant_fog_160_train.csv")

# Load the dev dataset
dev_data = pd.read_csv("/content/drive/MyDrive/SEWA_radiant_fog_160_dev.csv")

# Load the test dataset
test_data = pd.read_csv("/content/drive/MyDrive/SEWA_radiant_fog_160_test.csv")

# Extract features (X) and target (y) for valence in the training data
X_train = train_data.iloc[:, 5:].values  # Features start from column index 4
y_valence_train = train_data["valence"].values

# Extract features (X) and target (y) for valence in the dev data
X_dev = dev_data.iloc[:, 5:].values
y_valence_dev = dev_data["valence"].values

# Extract features (X) and target (y) for valence in the test data
X_test = test_data.iloc[:, 5:].values
y_valence_test = test_data["valence"].values

# Create a parameter grid for SVR
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Concatenate train and dev
concat_x_train_dev_valence = np.concatenate((X_train, X_dev), axis=0)
concat_y_valence_train_dev = np.concatenate((y_valence_train, y_valence_dev), axis=0)

# Generate indices for training and development parts
split_index_valence = [-1 for _ in range(X_train.shape[0])] + [0 for _ in range(X_dev.shape[0])]

# Create PredefinedSplit
pds_valence = PredefinedSplit(test_fold=split_index_valence)

# Use PredefinedSplit in GridSearchCV for Valence
svr_valence = SVR()
grid_search_valence = GridSearchCV(svr_valence, param_grid, cv=pds_valence, scoring='neg_mean_squared_error')
grid_search_valence.fit(concat_x_train_dev_valence, concat_y_valence_train_dev)

# Get the best estimators
best_svr_valence = grid_search_valence.best_estimator_

# Predict on the dev data for valence
y_valence_dev_pred = best_svr_valence.predict(X_dev)

# Calculate regression metrics for valence on the dev data
mse_valence_dev = mean_squared_error(y_valence_dev, y_valence_dev_pred)
r2_valence_dev = sqrt(mse_valence_dev)

print("Results for Valence on Dev Data:")
print("Valence - Mean Squared Error:", mse_valence_dev)
print("Valence - R-squared:", r2_valence_dev)

# Predict on the test data for valence
y_valence_test_pred = best_svr_valence.predict(X_test)

# Calculate regression metrics for valence on the test data
mse_valence_test = mean_squared_error(y_valence_test, y_valence_test_pred)
r2_valence_test = sqrt(mse_valence_test)

print("Results for Valence on Test Data:")
print("Valence - Mean Squared Error:", mse_valence_test)
print("Valence - R-squared:", r2_valence_test)