In [2]:
#SVR 1 sec TEST ONLY AROUSAL 

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    return X, y_arousal

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "1sec/SEWA_features_wav2vec_1_seconds_train.csv"
dev_file = "1sec/SEWA_features_wav2vec_1_seconds_dev.csv"
test_file = "1sec/SEWA_features_wav2vec_1_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train= load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev= load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test= load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)


Arousal - Dev MSE: 0.011893248013856774 Dev RMSE: 0.10905616907748399 Test MSE: 0.03340598663453017 Test RMSE: 0.18277304679446083


In [None]:
#1 sec only val
#SVR 1 sec 

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_valence = data['valence'].values
    return X, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "1sec/SEWA_features_wav2vec_1_seconds_train.csv"
dev_file = "1sec/SEWA_features_wav2vec_1_seconds_dev.csv"
test_file = "1sec/SEWA_features_wav2vec_1_seconds_test.csv"

# Load and preprocess datasets
X_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


In [None]:
#SVR 1 sec 

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "1sec/SEWA_features_wav2vec_1_seconds_train.csv"
dev_file = "1sec/SEWA_features_wav2vec_1_seconds_dev.csv"
test_file = "1sec/SEWA_features_wav2vec_1_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [None]:
#SVR 2 sec 

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "2sec/SEWA_features_wav2vec_2_seconds_train.csv"
dev_file = "2sec/SEWA_features_wav2vec_2_seconds_dev.csv"
test_file = "2sec/SEWA_features_wav2vec_2_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


In [None]:
#SVR 3 sec 

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "3sec/SEWA_features_wav2vec_3_seconds_train.csv"
dev_file = "3sec/SEWA_features_wav2vec_3_seconds_dev.csv"
test_file = "3sec/SEWA_features_wav2vec_3_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)


In [None]:
#SVR 4 sec 

import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from math import sqrt

def load_and_preprocess_dataset(filename):
    data = pd.read_csv(filename)
    features_start_col = data.columns.get_loc("x_0")
    X = data.iloc[:, features_start_col:].values  # Adjusted to slice till the end
    y_arousal = data['arousal'].values
    y_valence = data['valence'].values
    return X, y_arousal, y_valence

# Scale features (function)
def scale_features(X_train, X_dev, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_dev_scaled = scaler.transform(X_dev)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_dev_scaled, X_test_scaled

# SVR Grid Search (function)
def svr_grid_search(X_train, y_train, X_dev, y_dev, param_grid):
    concat_x_train_dev = np.concatenate((X_train, X_dev), axis=0)
    concat_y_train_dev = np.concatenate((y_train, y_dev), axis=0)
    split_index = [-1 for _ in X_train] + [0 for _ in X_dev]  # PredefinedSplit indices
    pds = PredefinedSplit(test_fold=split_index)

    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=pds, scoring='neg_mean_squared_error')
    grid_search.fit(concat_x_train_dev, concat_y_train_dev)
    return grid_search.best_estimator_

# Evaluate Model (function)
def evaluate_model(model, X_dev, y_dev, X_test, y_test):
    # Dev set
    y_dev_pred = model.predict(X_dev)
    mse_dev = mean_squared_error(y_dev, y_dev_pred)
    rmse_dev = sqrt(mse_dev)
    # Test set
    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_test = sqrt(mse_test)
    return mse_dev, rmse_dev, mse_test, rmse_test

# Paths to datasets
train_file = "4sec/SEWA_features_wav2vec_4_seconds_train.csv"
dev_file = "4sec/SEWA_features_wav2vec_4_seconds_dev.csv"
test_file = "4sec/SEWA_features_wav2vec_4_seconds_test.csv"

# Load and preprocess datasets
X_train, y_arousal_train, y_valence_train = load_and_preprocess_dataset(train_file)
X_dev, y_arousal_dev, y_valence_dev = load_and_preprocess_dataset(dev_file)
X_test, y_arousal_test, y_valence_test = load_and_preprocess_dataset(test_file)

# Scale features
X_train_scaled, X_dev_scaled, X_test_scaled = scale_features(X_train, X_dev, X_test)

# SVR parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Arousal Model
best_svr_arousal = svr_grid_search(X_train_scaled, y_arousal_train, X_dev_scaled, y_arousal_dev, param_grid)
mse_arousal_dev, rmse_arousal_dev, mse_arousal_test, rmse_arousal_test = evaluate_model(best_svr_arousal, X_dev_scaled, y_arousal_dev, X_test_scaled, y_arousal_test)

# Valence Model
best_svr_valence = svr_grid_search(X_train_scaled, y_valence_train, X_dev_scaled, y_valence_dev, param_grid)
mse_valence_dev, rmse_valence_dev, mse_valence_test, rmse_valence_test = evaluate_model(best_svr_valence, X_dev_scaled, y_valence_dev, X_test_scaled, y_valence_test)

# Results
print("Arousal - Dev MSE:", mse_arousal_dev, "Dev RMSE:", rmse_arousal_dev, "Test MSE:", mse_arousal_test, "Test RMSE:", rmse_arousal_test)
print("Valence - Dev MSE:", mse_valence_dev, "Dev RMSE:", rmse_valence_dev, "Test MSE:", mse_valence_test, "Test RMSE:", rmse_valence_test)
