In [None]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from pytorch_lightning import Trainer
from torch.utils.data import DataLoader, TensorDataset
from utils import EmbeddingsDatasetReg, MultipleRegression

from sklearn.model_selection import KFold

In [None]:
n_classes = 2
modality = 'music' # 'music', 'speech', or 'video'
which = 'openl3' # 'mfcc', 'msd' or 'openl3' for music, 'slow_fast' for video, 'hubert' for speech
voice = True 

fn_suffix = {
    'music': {
        'mfcc': '',
        'msd': '_backend', 
        'openl3': '_music', # '_music' or '_env'
    },
    'video': {
        'slow_fast': '_slow', # '_slow' or '_fast'
    },
    'speech': {
        'hubert': '_wave_encoder', # '_wave_encoder' or '_transformer'
    }
}

embedding_dimensions = {
    'video': {
        'slow_fast': 2048 if fn_suffix['video']['slow_fast']=='_slow' else 256,
    },
    'music': {
        'mfcc': 60,
        'msd': 256,
        'openl3': 512,
    },
    'speech': {
        'hubert': 1024 if fn_suffix['speech']['hubert']=='_transformer' else 512,
    }
}

## Load ground truth

In [None]:
groundtruth_df = pd.read_csv("groundtruth_merged.csv")
groundtruth_df.set_index("stimulus_id", inplace=True)
groundtruth_df.head()

In [None]:
not_found = 0
for stimulus_id in groundtruth_df.index:
    if os.path.exists(f"{modality}/embeddings_{which}/{stimulus_id}{fn_suffix[modality][which]}.npy"):
        continue
    else:
        print(f"Embedding for {stimulus_id} not found")
        not_found += 1

assert not_found == 0

In [None]:
mid_level_features = pd.read_csv("mid_level_features.csv")
mid_level_features.set_index("stimulus_id", inplace=True)
mid_level_features.head()

## Load embeddings

In [None]:
embedding_dim = embedding_dimensions[modality][which]

all_embeddings = np.empty((groundtruth_df.shape[0], embedding_dim))

for i,stimulus_id in enumerate(groundtruth_df.index):
    embedding = np.load(f"{modality}/embeddings_{which}{'' if voice else '_novoice'}/" +
                        f"{stimulus_id}{fn_suffix[modality][which]}.npy")
    all_embeddings[i] = embedding.mean(axis=0)

all_embeddings.shape

In [None]:
classes = ["Girls/women", "Boys/men"] if n_classes==2 else ["Girls/women", "Mixed", "Boys/men"]
mask = groundtruth_df.target.isin(classes) 

X = all_embeddings[mask]
y = groundtruth_df.target[mask].values

# convert to integers
le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
# k-fold cross-validation with xgboost
kf = KFold(n_splits=5, shuffle=True, random_state=42)

accuracies = []
f1s = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)
    
    param = {'max_depth': 3, 'eta': 0.3, 'objective': 'reg:squarederror'}
    num_round = 30
    bst = xgb.train(param, dtrain, num_round)
    
    y_pred = bst.predict(dtest)

    # print classification report
    # print(classification_report(y_test, y_pred, target_names=classes))



In [None]:
# print results
print(f"Accuracy: {np.mean(accuracies):.2f} ± {np.std(accuracies):.2f}")
print(f"F1: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")

In [None]:
# k-fold cross-validation with SVM
from sklearn.svm import SVC

accuracies = []
f1s = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    clf = SVC(gamma='auto')
    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    
    accuracies.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='weighted'))

    # print classification report
    # print(classification_report(y_test, y_pred, target_names=classes))

# print results
print(f"Accuracy: {np.mean(accuracies):.2f} ± {np.std(accuracies):.2f}")
print(f"F1: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")


In [None]:
# k-fold cross-validation with pytorch-lightning softmax classifier
accuracies = []
f1s = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
    test_dataset = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))
    
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)
    
    model = MultipleRegression(input_dim=X_train.shape[1], n_regressions=y_train.shape[1])
    
    trainer = pl.Trainer(max_epochs=30)
    trainer.fit(model, train_loader)
    
    y_pred = model(torch.from_numpy(X_test).float()).detach().numpy()
    
    r2_values = r2_score(y_test, y_pred, multioutput='raw_values')
    print(f'R-squared values for each target: {r2_values}')

    # print classification report
    # print(classification_report(y_test, y_pred, target_names=classes))


In [None]:
# print results
print(f"Accuracy: {np.mean(accuracies):.2f} ± {np.std(accuracies):.2f}")
print(f"F1: {np.mean(f1s):.2f} ± {np.std(f1s):.2f}")