In [1]:
import numpy as np
import pandas as pd

import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import DataLoader
from utils import (
    MegaModelV1,
    EmbeddingsDataset2,
    load_embeddings_and_labels,
)

from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import r2_score, accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from scipy.stats import pearsonr
from statsmodels.stats.multitest import multipletests
import yaml

""" # load config
with open("config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

# assign a variable to each key in the config dictionary for convenience
n_tar_cls = config["n_tar_cls"]
modality = config["modality"]
which = config["which"]
drop_non_significant = config["drop_non_significant"]
voice = config["voice"]
repetitions = config["repetitions"]
folds = config["folds"] """

modality = "music"
drop_non_significant = True
repetitions = 10
folds = 5

In [2]:
#####################
# Load ground truth #
#####################

groundtruth_df = pd.read_csv("groundtruth_merged.csv")
groundtruth_df.set_index("stimulus_id", inplace=True)

# load responses
emotions_and_mid_level = pd.read_csv("emotions_and_mid_level.csv")
emotions_and_mid_level.set_index("stimulus_id", inplace=True)

n_emotions = 7

if drop_non_significant:
    # drop columns that are not significant based on the ANOVA test
    to_drop = [
        "Amusing",  # Extremely low correlations with all the mid-level features
        "Wide/Narrow pitch variation",  # non significant differences between targets (ANOVA)
        "Repetitive/Non-repetitive",  # non significant differences between targets (ANOVA)
        "Fast tempo/Slow tempo",  # non significant differences between targets (ANOVA)
    ]
    emotions_and_mid_level = emotions_and_mid_level.drop(columns=to_drop)
    n_emotions -= 1  # we dropped Amusing


In [3]:
cls_dict = {
    "target": [
        "Girls/women", 
        "Boys/men"
        ], # skip mixed
    "voice_gender": [
        'Feminine', 
        'Masculine', 
        'There are no voices', 
        'BOTH feminine and masculine voices'
        ],
    "voice_exagg": [
        'No, all voices are normal-sounding',
        'Yes a masculine voice is gender exaggerated',
        'Yes a feminine voice is gender exaggerated',
        'There are no voices'
        ],
    "voice_age": [
        'Adults (including young adults)', 
        'There are no voices',
        'BOTH children and adults', 
        'Children'
        ],
    "voice_type": [
        'BOTH spoken and sung',
        'Spoken',
        'Sung',
        'There are no voices'
    ],
}

# same function as above, but now takes tasks_dict as input to load the correct labels
def load_embeddings_and_labels_multitask(groundtruth_df, emotions_and_mid_level, which, modality, voice, cls_dict):

    """ Load embeddings and labels for a given modality and embedding type.
    Args:
        groundtruth_df (pd.DataFrame): ground truth dataframe
        emotions_and_mid_level (pd.DataFrame): dataframe containing the regressors
        which (str): which embeddings to load
        modality (str): which modality to load
        voice (bool): whether to load voice or no-voice embeddings
        cls_dict (dict): dictionary containing the classes for each classification task
    Returns:
        X (np.array): embeddings
        y_reg (np.array): regressors
        y_cls (dict): dictionary containing the labels for each classification task

    Example of cls_dict:
    cls_dict = {
        "target": [
            "Girls/women", 
            "Boys/men"
            ], # skip mixed
        "this_is_another_task": [
            'class1',
            'class2',
            ...
            ],
        }
    """
    
    # load embeddings
    X = np.empty((groundtruth_df.shape[0], embedding_dimensions[modality][which]))
    y_reg = np.empty((emotions_and_mid_level.shape[0], emotions_and_mid_level.shape[1]))

    for i,stimulus_id in enumerate(groundtruth_df.index):
        embedding = np.load(f"{modality}/embeddings_{which}{'' if voice else '_novoice'}/" +
                            f"{stimulus_id}{fn_suffix[modality][which]}.npy")
        X[i] = embedding.mean(axis=0)
        y_reg[i] = emotions_and_mid_level.loc[stimulus_id].values

    y_cls = {}
    for task, classes in cls_dict.items():
        y_cls[task] = groundtruth_df[task].values
        y_cls[task] = [classes.index(x) if x in classes else -1 for x in y_cls[task]]
        y_cls[task] = np.array(y_cls[task])

    return X, y_reg, y_cls