In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import umap

from sklearn.preprocessing import StandardScaler

sns.set_theme(style="darkgrid")
sns.set(font_scale = 1.8)
colors = sns.color_palette("colorblind")


# Load groundtruth

In [None]:
# 'mm' for multimodal (aesthetic emotions), 'mf' for music-focused (perceptual descriptors)
experiment = 'mf'
groundtruth_df = pd.read_csv("groundtruth.csv", index_col="stimulus_id")

In [None]:
target_groundtruth = groundtruth_df['all_genders'].copy()
boys_commercials = target_groundtruth[target_groundtruth=='Boys/men'].index
girls_commercials = target_groundtruth[target_groundtruth=='Girls/women'].index
mixed_commercials = target_groundtruth[target_groundtruth=='Mixed'].index
no_actors_commercials = target_groundtruth[
    target_groundtruth=='There are no actors/presenters or you can never see their faces'
].index

# Load the ratings

In [None]:
participants_df = pd.read_csv(f"{experiment}_participants.csv")
ratings_df = pd.read_csv(f"{experiment}_ratings.csv")
control_ratings_df = pd.read_csv(f"{experiment}_control_ratings.csv")

In [None]:
# remove control ratings_df for the count of the number of ratings per stimulus
temp = ratings_df[~ratings_df.stimulus_id.isin(control_ratings_df.stimulus_id)]
ratings_per_stimulus = temp.groupby('stimulus_id').count().iloc[:,0].rename("ratings per stimulus")
ratings_per_stimulus.describe()

## Compute ratings means by `stimulus_id`

In [None]:
ratings_df = ratings_df.drop(["prolific_id"], axis=1)
ratings_means_df = ratings_df.groupby('stimulus_id').mean()
ratings_means_df.head()

In [None]:
ratings_means_df.loc[no_actors_commercials,'target'] = 'no_actors'
ratings_means_df.loc[boys_commercials,'target'] = 'Masc'
ratings_means_df.loc[girls_commercials,'target'] = 'Fem'
ratings_means_df.loc[mixed_commercials,'target'] = 'Mix'

# Manifold learning
Uncomment to reproduce the paper's figure

In [None]:
reducer = umap.UMAP(n_neighbors=20, random_state=42)
X = ratings_means_df.drop(["target"], axis=1).values
embeddings = StandardScaler().fit_transform(X)
embeddings = reducer.fit_transform(embeddings)

# plot by target
palette = {'Mix':'C2','Fem':'C3','Masc':'C0','no_actors':'C1'}
for groups in ['Fem/Mix/Masc']: #['Fem/Mix/Masc/no_actors', 'Fem/Mix/Masc', 'Fem/Masc']
    mask = ratings_means_df.target.isin(groups.split("/"))
    plt.figure(figsize=(12,8))
    g = sns.scatterplot(
            x=embeddings[mask,0],
            y=embeddings[mask,1],
            hue=ratings_means_df.target.values[mask],
            palette=palette, s=50
        )
    g.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
    g.grid(False)
    g.legend(loc='upper left' if experiment=="mf" else "lower left")
    plt.savefig(f"UMAP_{experiment}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
# save embeddings with target
embeddings_df = pd.DataFrame(embeddings, columns=["x","y"])
embeddings_df["target"] = ratings_means_df.target.values
embeddings_df["stimulus_id"] = ratings_means_df.index
embeddings_df = embeddings_df[~embeddings_df.target.isin(['no_actors'])]
embeddings_df = embeddings_df.reset_index(drop=True)
embeddings_df.to_csv(f"UMAP_{experiment}.csv", index=False)