In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
#1 SEC done

import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.utils.data import DataLoader, TensorDataset

# Function to load and preprocess audio dataset
def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values # Adjusted to slice till the end
    y = data[["valence", "arousal"]].values

    # Convert to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    return X_tensor, y_tensor

# Define a fully connected neural network for regression
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.tanh(x)
        return x

# RMSE Loss Function
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

# Function to predict on dev set
def predict_on_dev(model, dev_loader):
    y_valence_true, y_valence_pred = [], []
    y_arousal_true, y_arousal_pred = [], []

    model.eval()
    with torch.no_grad():
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            labels_valence, labels_arousal = labels[:, 0], labels[:, 1]
            outputs_valence, outputs_arousal = outputs[:, 0], outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal

train_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_train.csv"
dev_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_dev.csv"
test_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_test.csv"

# Load and preprocess datasets
X_train, y_train = load_and_preprocess_dataset(train_file)
X_dev, y_dev = load_and_preprocess_dataset(dev_file)
X_test, y_test = load_and_preprocess_dataset(test_file)

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(TensorDataset(X_dev, y_dev), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)

# Neural Network Hyperparameters
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 2
learning_rate = 0.001
epochs = 100

# Model, Loss Function, and Optimizer
model = FullyConnectedNN(input_dim, hidden_dim, output_dim)
criterion_arousal = RMSELoss()
criterion_valence = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Instantiate the scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, verbose=True)

# Initialize early stopping parameters
patience = 10
min_val_loss = float('inf')
counter = 0
best_epoch = 0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss_arousal = criterion_arousal(outputs[:, 0], labels[:, 0])
        loss_valence = criterion_valence(outputs[:, 1], labels[:, 1])
        loss = loss_arousal + loss_valence
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation phase
    val_mae_valence, val_rmse_valence, val_mae_arousal, val_rmse_arousal = predict_on_dev(model, dev_loader)

    # Early stopping checks
    val_loss = (val_rmse_valence + val_rmse_arousal) / 2
    print(f'Epoch {epoch+1}, Val Loss: {val_loss:.4f}')
    # Update the learning rate scheduler
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        counter = 0
        best_epoch = epoch
        # Save the best model
        torch.save(model.state_dict(), 'best_model_audio-ONE-SEC.pth')
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered")
            break

print(f"Training stopped after {best_epoch+1} epochs")

def evaluate_on_test(model, test_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            labels_valence = labels[:, 0]
            labels_arousal = labels[:, 1]
            outputs_valence = outputs[:, 0]
            outputs_arousal = outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    print(f"Test MAE Valence: {mae_valence:.4f}, RMSE Valence: {rmse_valence:.4f}")
    print(f"Test MAE Arousal: {mae_arousal:.4f}, RMSE Arousal: {rmse_arousal:.4f}")

# Load the best model
model.load_state_dict(torch.load('best_model_audio-ONE-SEC.pth'))

# Evaluate on the test dataset
evaluate_on_test(model, test_loader)



Epoch 1, Val Loss: 0.1266
Epoch 2, Val Loss: 0.1270
Epoch 3, Val Loss: 0.1267
Epoch 4, Val Loss: 0.1267
Epoch 5, Val Loss: 0.1258
Epoch 6, Val Loss: 0.1260
Epoch 7, Val Loss: 0.1265
Epoch 8, Val Loss: 0.1253
Epoch 9, Val Loss: 0.1251
Epoch 10, Val Loss: 0.1285
Epoch 11, Val Loss: 0.1255
Epoch 12, Val Loss: 0.1279
Epoch 13, Val Loss: 0.1276
Epoch 14, Val Loss: 0.1271
Epoch 15, Val Loss: 0.1254
Epoch 16, Val Loss: 0.1256
Epoch 17, Val Loss: 0.1254
Epoch 00017: reducing learning rate of group 0 to 1.0000e-04.
Epoch 18, Val Loss: 0.1252
Epoch 19, Val Loss: 0.1255
Early stopping triggered
Training stopped after 9 epochs
Test MAE Valence: 0.1326, RMSE Valence: 0.1669
Test MAE Arousal: 0.1339, RMSE Arousal: 0.1796


In [None]:
X_train, y_train = load_and_preprocess_dataset(train_file)
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(X_train)

X_train shape: torch.Size([9478, 768]), y_train shape: torch.Size([9478, 2])
tensor([[-0.0729,  0.0021,  0.1717,  ..., -0.1230, -0.0004, -0.1102],
        [-0.0766,  0.0117,  0.0313,  ...,  0.0146, -0.0082, -0.0802],
        [-0.0092,  0.0022, -0.0328,  ..., -0.1054, -0.0195, -0.0545],
        ...,
        [-0.2134,  0.0506,  0.0328,  ...,  0.0360,  0.0300, -0.0583],
        [-0.2995,  0.0570,  0.1754,  ..., -0.0725,  0.0316, -0.0728],
        [-0.0641, -0.0285,  0.0678,  ..., -0.0521, -0.0176, -0.1609]])


In [None]:
#2 SEC done

import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.utils.data import DataLoader, TensorDataset

# Function to load and preprocess audio dataset
def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values # Adjusted to slice till the end
    y = data[["valence", "arousal"]].values

    # Convert to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    return X_tensor, y_tensor

# Define a fully connected neural network for regression
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.tanh(x)
        return x

# RMSE Loss Function
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

# Function to predict on dev set
def predict_on_dev(model, dev_loader):
    y_valence_true, y_valence_pred = [], []
    y_arousal_true, y_arousal_pred = [], []

    model.eval()
    with torch.no_grad():
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            labels_valence, labels_arousal = labels[:, 0], labels[:, 1]
            outputs_valence, outputs_arousal = outputs[:, 0], outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal

train_file = "/content/drive/MyDrive/2sec/SEWA_features_wav2vec_2_seconds_train.csv"
dev_file = "/content/drive/MyDrive/2sec/SEWA_features_wav2vec_2_seconds_dev.csv"
test_file = "/content/drive/MyDrive/2sec/SEWA_features_wav2vec_2_seconds_test.csv"

# Load and preprocess datasets
X_train, y_train = load_and_preprocess_dataset(train_file)
X_dev, y_dev = load_and_preprocess_dataset(dev_file)
X_test, y_test = load_and_preprocess_dataset(test_file)

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(TensorDataset(X_dev, y_dev), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)

# Neural Network Hyperparameters
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 2
learning_rate = 0.001
epochs = 100

# Model, Loss Function, and Optimizer
model = FullyConnectedNN(input_dim, hidden_dim, output_dim)
criterion_arousal = RMSELoss()
criterion_valence = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Instantiate the scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, verbose=True)

# Initialize early stopping parameters
patience = 10
min_val_loss = float('inf')
counter = 0
best_epoch = 0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss_arousal = criterion_arousal(outputs[:, 0], labels[:, 0])
        loss_valence = criterion_valence(outputs[:, 1], labels[:, 1])
        loss = loss_arousal + loss_valence
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation phase
    val_mae_valence, val_rmse_valence, val_mae_arousal, val_rmse_arousal = predict_on_dev(model, dev_loader)

    # Early stopping checks
    val_loss = (val_rmse_valence + val_rmse_arousal) / 2
    print(f'Epoch {epoch+1}, Val Loss: {val_loss:.4f}')

    # Update the learning rate scheduler
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        counter = 0
        best_epoch = epoch
        # Save the best model
        torch.save(model.state_dict(), 'best_model_audio-TWO-SEC.pth')
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered")
            break

print(f"Training stopped after {best_epoch+1} epochs")

def evaluate_on_test(model, test_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            labels_valence = labels[:, 0]
            labels_arousal = labels[:, 1]
            outputs_valence = outputs[:, 0]
            outputs_arousal = outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    print(f"Test MAE Valence: {mae_valence:.4f}, RMSE Valence: {rmse_valence:.4f}")
    print(f"Test MAE Arousal: {mae_arousal:.4f}, RMSE Arousal: {rmse_arousal:.4f}")

# Load the best model
model.load_state_dict(torch.load('best_model_audio-TWO-SEC.pth'))

# Evaluate on the test dataset
evaluate_on_test(model, test_loader)



Epoch 1, Val Loss: 0.1258
Epoch 2, Val Loss: 0.1277
Epoch 3, Val Loss: 0.1261
Epoch 4, Val Loss: 0.1252
Epoch 5, Val Loss: 0.1254
Epoch 6, Val Loss: 0.1264
Epoch 7, Val Loss: 0.1257
Epoch 8, Val Loss: 0.1262
Epoch 9, Val Loss: 0.1248
Epoch 10, Val Loss: 0.1252
Epoch 11, Val Loss: 0.1250
Epoch 12, Val Loss: 0.1252
Epoch 13, Val Loss: 0.1265
Epoch 14, Val Loss: 0.1271
Epoch 15, Val Loss: 0.1260
Epoch 16, Val Loss: 0.1263
Epoch 17, Val Loss: 0.1273
Epoch 00017: reducing learning rate of group 0 to 1.0000e-04.
Epoch 18, Val Loss: 0.1252
Epoch 19, Val Loss: 0.1249
Early stopping triggered
Training stopped after 9 epochs
Test MAE Valence: 0.1347, RMSE Valence: 0.1698
Test MAE Arousal: 0.1376, RMSE Arousal: 0.1843


In [None]:
#3 SEC done

import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.utils.data import DataLoader, TensorDataset

# Function to load and preprocess audio dataset
def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values # Adjusted to slice till the end
    y = data[["valence", "arousal"]].values

    # Convert to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    return X_tensor, y_tensor

# Define a fully connected neural network for regression
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.tanh(x)
        return x

# RMSE Loss Function
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

# Function to predict on dev set
def predict_on_dev(model, dev_loader):
    y_valence_true, y_valence_pred = [], []
    y_arousal_true, y_arousal_pred = [], []

    model.eval()
    with torch.no_grad():
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            labels_valence, labels_arousal = labels[:, 0], labels[:, 1]
            outputs_valence, outputs_arousal = outputs[:, 0], outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal

train_file = "/content/drive/MyDrive/3sec/SEWA_features_wav2vec_3_seconds_train.csv"
dev_file = "/content/drive/MyDrive/3sec/SEWA_features_wav2vec_3_seconds_dev.csv"
test_file = "/content/drive/MyDrive/3sec/SEWA_features_wav2vec_3_seconds_test.csv"

# Load and preprocess datasets
X_train, y_train = load_and_preprocess_dataset(train_file)
X_dev, y_dev = load_and_preprocess_dataset(dev_file)
X_test, y_test = load_and_preprocess_dataset(test_file)

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(TensorDataset(X_dev, y_dev), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)

# Neural Network Hyperparameters
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 2
learning_rate = 0.001
epochs = 100

# Model, Loss Function, and Optimizer
model = FullyConnectedNN(input_dim, hidden_dim, output_dim)
criterion_arousal = RMSELoss()
criterion_valence = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Instantiate the scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, verbose=True)

# Initialize early stopping parameters
patience = 10
min_val_loss = float('inf')
counter = 0
best_epoch = 0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss_arousal = criterion_arousal(outputs[:, 0], labels[:, 0])
        loss_valence = criterion_valence(outputs[:, 1], labels[:, 1])
        loss = loss_arousal + loss_valence
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation phase
    val_mae_valence, val_rmse_valence, val_mae_arousal, val_rmse_arousal = predict_on_dev(model, dev_loader)

    # Early stopping checks
    val_loss = (val_rmse_valence + val_rmse_arousal) / 2
    print(f'Epoch {epoch+1}, Val Loss: {val_loss:.4f}')

    # Update the learning rate scheduler
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        counter = 0
        best_epoch = epoch
        # Save the best model
        torch.save(model.state_dict(), 'best_model_audio-THREE-SEC.pth')
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered")
            break

print(f"Training stopped after {best_epoch+1} epochs")

def evaluate_on_test(model, test_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            labels_valence = labels[:, 0]
            labels_arousal = labels[:, 1]
            outputs_valence = outputs[:, 0]
            outputs_arousal = outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    print(f"Test MAE Valence: {mae_valence:.4f}, RMSE Valence: {rmse_valence:.4f}")
    print(f"Test MAE Arousal: {mae_arousal:.4f}, RMSE Arousal: {rmse_arousal:.4f}")

# Load the best model
model.load_state_dict(torch.load('best_model_audio-THREE-SEC.pth'))

# Evaluate on the test dataset
evaluate_on_test(model, test_loader)



Epoch 1, Val Loss: 0.1257
Epoch 2, Val Loss: 0.1268
Epoch 3, Val Loss: 0.1255
Epoch 4, Val Loss: 0.1247
Epoch 5, Val Loss: 0.1241
Epoch 6, Val Loss: 0.1265
Epoch 7, Val Loss: 0.1248
Epoch 8, Val Loss: 0.1247
Epoch 9, Val Loss: 0.1247
Epoch 10, Val Loss: 0.1249
Epoch 11, Val Loss: 0.1275
Epoch 12, Val Loss: 0.1274
Epoch 13, Val Loss: 0.1253
Epoch 00013: reducing learning rate of group 0 to 1.0000e-04.
Epoch 14, Val Loss: 0.1251
Epoch 15, Val Loss: 0.1248
Early stopping triggered
Training stopped after 5 epochs
Test MAE Valence: 0.1325, RMSE Valence: 0.1665
Test MAE Arousal: 0.1379, RMSE Arousal: 0.1863


In [None]:
#4 SEC done

import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from torch.utils.data import DataLoader, TensorDataset

# Function to load and preprocess audio dataset
def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values # Adjusted to slice till the end
    y = data[["valence", "arousal"]].values

    # Convert to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    return X_tensor, y_tensor

# Define a fully connected neural network for regression
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.tanh(x)
        return x

# RMSE Loss Function
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

# Function to predict on dev set
def predict_on_dev(model, dev_loader):
    y_valence_true, y_valence_pred = [], []
    y_arousal_true, y_arousal_pred = [], []

    model.eval()
    with torch.no_grad():
        for inputs, labels in dev_loader:
            outputs = model(inputs)
            labels_valence, labels_arousal = labels[:, 0], labels[:, 1]
            outputs_valence, outputs_arousal = outputs[:, 0], outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    # Calculate metrics
    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    return mae_valence, rmse_valence, mae_arousal, rmse_arousal

train_file = "/content/drive/MyDrive/4sec/SEWA_features_wav2vec_4_seconds_train.csv"
dev_file = "/content/drive/MyDrive/4sec/SEWA_features_wav2vec_4_seconds_dev.csv"
test_file = "/content/drive/MyDrive/4sec/SEWA_features_wav2vec_4_seconds_test.csv"

# Load and preprocess datasets
X_train, y_train = load_and_preprocess_dataset(train_file)
X_dev, y_dev = load_and_preprocess_dataset(dev_file)
X_test, y_test = load_and_preprocess_dataset(test_file)

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(TensorDataset(X_dev, y_dev), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)

# Neural Network Hyperparameters
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 2
learning_rate = 0.001
epochs = 100

# Model, Loss Function, and Optimizer
model = FullyConnectedNN(input_dim, hidden_dim, output_dim)
criterion_arousal = RMSELoss()
criterion_valence = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Instantiate the scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, verbose=True)

# Initialize early stopping parameters
patience = 10
min_val_loss = float('inf')
counter = 0
best_epoch = 0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss_arousal = criterion_arousal(outputs[:, 0], labels[:, 0])
        loss_valence = criterion_valence(outputs[:, 1], labels[:, 1])
        loss = loss_arousal + loss_valence
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Validation phase
    val_mae_valence, val_rmse_valence, val_mae_arousal, val_rmse_arousal = predict_on_dev(model, dev_loader)

    # Early stopping checks
    val_loss = (val_rmse_valence + val_rmse_arousal) / 2
    print(f'Epoch {epoch+1}, Val Loss: {val_loss:.4f}')

    # Update the learning rate scheduler
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        counter = 0
        best_epoch = epoch
        # Save the best model
        torch.save(model.state_dict(), 'best_model_audio-FOUR-SEC.pth')
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered")
            break

print(f"Training stopped after {best_epoch+1} epochs")

def evaluate_on_test(model, test_loader):
    y_valence_true = []
    y_valence_pred = []
    y_arousal_true = []
    y_arousal_pred = []

    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            labels_valence = labels[:, 0]
            labels_arousal = labels[:, 1]
            outputs_valence = outputs[:, 0]
            outputs_arousal = outputs[:, 1]

            y_valence_true.extend(labels_valence.cpu().numpy())
            y_valence_pred.extend(outputs_valence.cpu().numpy())
            y_arousal_true.extend(labels_arousal.cpu().numpy())
            y_arousal_pred.extend(outputs_arousal.cpu().numpy())

    mae_valence = mean_absolute_error(y_valence_true, y_valence_pred)
    rmse_valence = sqrt(mean_squared_error(y_valence_true, y_valence_pred))
    mae_arousal = mean_absolute_error(y_arousal_true, y_arousal_pred)
    rmse_arousal = sqrt(mean_squared_error(y_arousal_true, y_arousal_pred))

    print(f"Test MAE Valence: {mae_valence:.4f}, RMSE Valence: {rmse_valence:.4f}")
    print(f"Test MAE Arousal: {mae_arousal:.4f}, RMSE Arousal: {rmse_arousal:.4f}")

# Load the best model
model.load_state_dict(torch.load('best_model_audio-FOUR-SEC.pth'))

# Evaluate on the test dataset
evaluate_on_test(model, test_loader)



Epoch 1, Val Loss: 0.1285
Epoch 2, Val Loss: 0.1259
Epoch 3, Val Loss: 0.1262
Epoch 4, Val Loss: 0.1256
Epoch 5, Val Loss: 0.1264
Epoch 6, Val Loss: 0.1257
Epoch 7, Val Loss: 0.1275
Epoch 8, Val Loss: 0.1249
Epoch 9, Val Loss: 0.1266
Epoch 10, Val Loss: 0.1258
Epoch 11, Val Loss: 0.1260
Epoch 12, Val Loss: 0.1260
Epoch 13, Val Loss: 0.1270
Epoch 14, Val Loss: 0.1250
Epoch 15, Val Loss: 0.1242
Epoch 16, Val Loss: 0.1260
Epoch 17, Val Loss: 0.1245
Epoch 18, Val Loss: 0.1244
Epoch 19, Val Loss: 0.1259
Epoch 20, Val Loss: 0.1249
Epoch 21, Val Loss: 0.1250
Epoch 22, Val Loss: 0.1256
Epoch 23, Val Loss: 0.1286
Epoch 00023: reducing learning rate of group 0 to 1.0000e-04.
Epoch 24, Val Loss: 0.1249
Epoch 25, Val Loss: 0.1252
Early stopping triggered
Training stopped after 15 epochs
Test MAE Valence: 0.1338, RMSE Valence: 0.1672
Test MAE Arousal: 0.1339, RMSE Arousal: 0.1810


In [None]:
#SVR 1 sec

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_train.csv"
dev_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_dev.csv"
test_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


KeyboardInterrupt: 

In [None]:
#1 sec, decision tree
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from numpy import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

def scale_features(X):
    scaler = StandardScaler()
    return scaler.fit_transform(X)

# Load and preprocess the datasets
train_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_train.csv"
dev_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_dev.csv"
test_file = "/content/drive/MyDrive/1sec/SEWA_features_wav2vec_1_seconds_test.csv"

X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Optionally scale features (if not already scaled)
X_train_scaled = scale_features(X_train)
X_dev_scaled = scale_features(X_dev)
X_test_scaled = scale_features(X_test)

# Decision Tree Regressor for Arousal
regressor_arousal = DecisionTreeRegressor(random_state=42)
regressor_arousal.fit(X_train_scaled, y_arousal_train)

# Arousal Predictions and Metrics
y_arousal_dev_pred = regressor_arousal.predict(X_dev_scaled)
mse_arousal_dev = mean_squared_error(y_arousal_dev, y_arousal_dev_pred)
rmse_arousal_dev = sqrt(mse_arousal_dev)

y_arousal_test_pred = regressor_arousal.predict(X_test_scaled)
mse_arousal_test = mean_squared_error(y_arousal_test, y_arousal_test_pred)
rmse_arousal_test = sqrt(mse_arousal_test)

# Decision Tree Regressor for Valence
regressor_valence = DecisionTreeRegressor(random_state=42)
regressor_valence.fit(X_train_scaled, y_valence_train)

# Valence Predictions and Metrics
y_valence_dev_pred = regressor_valence.predict(X_dev_scaled)
mse_valence_dev = mean_squared_error(y_valence_dev, y_valence_dev_pred)
rmse_valence_dev = sqrt(mse_valence_dev)

y_valence_test_pred = regressor_valence.predict(X_test_scaled)
mse_valence_test = mean_squared_error(y_valence_test, y_valence_test_pred)
rmse_valence_test = sqrt(mse_valence_test)

# Results Output
print("Arousal - Dev RMSE:", rmse_arousal_dev, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev RMSE:", rmse_valence_dev, "Test RMSE:", rmse_valence_test)


Arousal - Dev RMSE: 0.1973916847658415 Test RMSE: 0.24247219920806407
Valence - Dev RMSE: 0.21595636185233033 Test RMSE: 0.23319716413073385


In [None]:
#2 sec, decision tree
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from numpy import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

def scale_features(X):
    scaler = StandardScaler()
    return scaler.fit_transform(X)

# Load and preprocess the datasets
train_file = "/content/drive/MyDrive/2sec/SEWA_features_wav2vec_2_seconds_train.csv"
dev_file = "/content/drive/MyDrive/2sec/SEWA_features_wav2vec_2_seconds_dev.csv"
test_file = "/content/drive/MyDrive/2sec/SEWA_features_wav2vec_2_seconds_test.csv"

X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Optionally scale features (if not already scaled)
X_train_scaled = scale_features(X_train)
X_dev_scaled = scale_features(X_dev)
X_test_scaled = scale_features(X_test)

# Decision Tree Regressor for Arousal
regressor_arousal = DecisionTreeRegressor(random_state=42)
regressor_arousal.fit(X_train_scaled, y_arousal_train)

# Arousal Predictions and Metrics
y_arousal_dev_pred = regressor_arousal.predict(X_dev_scaled)
mse_arousal_dev = mean_squared_error(y_arousal_dev, y_arousal_dev_pred)
rmse_arousal_dev = sqrt(mse_arousal_dev)

y_arousal_test_pred = regressor_arousal.predict(X_test_scaled)
mse_arousal_test = mean_squared_error(y_arousal_test, y_arousal_test_pred)
rmse_arousal_test = sqrt(mse_arousal_test)

# Decision Tree Regressor for Valence
regressor_valence = DecisionTreeRegressor(random_state=42)
regressor_valence.fit(X_train_scaled, y_valence_train)

# Valence Predictions and Metrics
y_valence_dev_pred = regressor_valence.predict(X_dev_scaled)
mse_valence_dev = mean_squared_error(y_valence_dev, y_valence_dev_pred)
rmse_valence_dev = sqrt(mse_valence_dev)

y_valence_test_pred = regressor_valence.predict(X_test_scaled)
mse_valence_test = mean_squared_error(y_valence_test, y_valence_test_pred)
rmse_valence_test = sqrt(mse_valence_test)

# Results Output
print("Arousal - Dev RMSE:", rmse_arousal_dev, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev RMSE:", rmse_valence_dev, "Test RMSE:", rmse_valence_test)


Arousal - Dev RMSE: 0.19844405179331806 Test RMSE: 0.2416438084199608
Valence - Dev RMSE: 0.2526523052868014 Test RMSE: 0.2273068760610807


In [None]:
#3 sec, decision tree
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from numpy import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

def scale_features(X):
    scaler = StandardScaler()
    return scaler.fit_transform(X)

# Load and preprocess the datasets
train_file = "/content/drive/MyDrive/3sec/SEWA_features_wav2vec_3_seconds_train.csv"
dev_file = "/content/drive/MyDrive/3sec/SEWA_features_wav2vec_3_seconds_dev.csv"
test_file = "/content/drive/MyDrive/3sec/SEWA_features_wav2vec_3_seconds_test.csv"

X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Optionally scale features (if not already scaled)
X_train_scaled = scale_features(X_train)
X_dev_scaled = scale_features(X_dev)
X_test_scaled = scale_features(X_test)

# Decision Tree Regressor for Arousal
regressor_arousal = DecisionTreeRegressor(random_state=42)
regressor_arousal.fit(X_train_scaled, y_arousal_train)

# Arousal Predictions and Metrics
y_arousal_dev_pred = regressor_arousal.predict(X_dev_scaled)
mse_arousal_dev = mean_squared_error(y_arousal_dev, y_arousal_dev_pred)
rmse_arousal_dev = sqrt(mse_arousal_dev)

y_arousal_test_pred = regressor_arousal.predict(X_test_scaled)
mse_arousal_test = mean_squared_error(y_arousal_test, y_arousal_test_pred)
rmse_arousal_test = sqrt(mse_arousal_test)

# Decision Tree Regressor for Valence
regressor_valence = DecisionTreeRegressor(random_state=42)
regressor_valence.fit(X_train_scaled, y_valence_train)

# Valence Predictions and Metrics
y_valence_dev_pred = regressor_valence.predict(X_dev_scaled)
mse_valence_dev = mean_squared_error(y_valence_dev, y_valence_dev_pred)
rmse_valence_dev = sqrt(mse_valence_dev)

y_valence_test_pred = regressor_valence.predict(X_test_scaled)
mse_valence_test = mean_squared_error(y_valence_test, y_valence_test_pred)
rmse_valence_test = sqrt(mse_valence_test)

# Results Output
print("Arousal - Dev RMSE:", rmse_arousal_dev, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev RMSE:", rmse_valence_dev, "Test RMSE:", rmse_valence_test)


Arousal - Dev RMSE: 0.19954608690173675 Test RMSE: 0.229942982178242
Valence - Dev RMSE: 0.19275209310988592 Test RMSE: 0.2130777565836836


In [None]:
#4 sec, decision tree
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from numpy import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

def scale_features(X):
    scaler = StandardScaler()
    return scaler.fit_transform(X)

# Load and preprocess the datasets
train_file = "/content/drive/MyDrive/4sec/SEWA_features_wav2vec_4_seconds_train.csv"
dev_file = "/content/drive/MyDrive/4sec/SEWA_features_wav2vec_4_seconds_dev.csv"
test_file = "/content/drive/MyDrive/4sec/SEWA_features_wav2vec_4_seconds_test.csv"

X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Optionally scale features (if not already scaled)
X_train_scaled = scale_features(X_train)
X_dev_scaled = scale_features(X_dev)
X_test_scaled = scale_features(X_test)

# Decision Tree Regressor for Arousal
regressor_arousal = DecisionTreeRegressor(random_state=42)
regressor_arousal.fit(X_train_scaled, y_arousal_train)

# Arousal Predictions and Metrics
y_arousal_dev_pred = regressor_arousal.predict(X_dev_scaled)
mse_arousal_dev = mean_squared_error(y_arousal_dev, y_arousal_dev_pred)
rmse_arousal_dev = sqrt(mse_arousal_dev)

y_arousal_test_pred = regressor_arousal.predict(X_test_scaled)
mse_arousal_test = mean_squared_error(y_arousal_test, y_arousal_test_pred)
rmse_arousal_test = sqrt(mse_arousal_test)

# Decision Tree Regressor for Valence
regressor_valence = DecisionTreeRegressor(random_state=42)
regressor_valence.fit(X_train_scaled, y_valence_train)

# Valence Predictions and Metrics
y_valence_dev_pred = regressor_valence.predict(X_dev_scaled)
mse_valence_dev = mean_squared_error(y_valence_dev, y_valence_dev_pred)
rmse_valence_dev = sqrt(mse_valence_dev)

y_valence_test_pred = regressor_valence.predict(X_test_scaled)
mse_valence_test = mean_squared_error(y_valence_test, y_valence_test_pred)
rmse_valence_test = sqrt(mse_valence_test)

# Results Output
print("Arousal - Dev RMSE:", rmse_arousal_dev, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev RMSE:", rmse_valence_dev, "Test RMSE:", rmse_valence_test)


Arousal - Dev RMSE: 0.17957207692538585 Test RMSE: 0.22671376994901732
Valence - Dev RMSE: 0.19899053427001887 Test RMSE: 0.22913537936780798
