In [None]:
import pandas as pd
#import seaborn as sns
#import matplotlib.pyplot as plt
import umap

from utils import anova_by_variable

#sns.set_theme(style="darkgrid")
#sns.set(font_scale = 1.8)
#colors = sns.color_palette("colorblind")


# Load groundtruth

In [None]:
# 'mm' for multimodal (aesthetic emotions), 'mf' for music-focused (perceptual descriptors)
experiment = 'mf'
groundtruth_df = pd.read_csv("groundtruth.csv", index_col="stimulus_id")

In [None]:
target_groundtruth = groundtruth_df['all_genders'].copy()
boys_commercials = target_groundtruth[target_groundtruth=='Boys/men'].index
girls_commercials = target_groundtruth[target_groundtruth=='Girls/women'].index
mixed_commercials = target_groundtruth[target_groundtruth=='Mixed'].index
no_actors_commercials = target_groundtruth[
    target_groundtruth=='There are no actors/presenters or you can never see their faces'
].index

# Load the ratings

In [None]:
ratings_df = pd.read_csv(f"{experiment}_ratings.csv")
control_ratings_df = pd.read_csv(f"{experiment}_control_ratings.csv")

In [None]:
# remove control ratings_df for the count of the number of ratings per stimulus
temp = ratings_df[~ratings_df.stimulus_id.isin(control_ratings_df.stimulus_id)]
ratings_per_stimulus = temp.groupby('stimulus_id').count().iloc[:,0].rename("ratings per stimulus")
ratings_per_stimulus.value_counts()

## Compute ratings means by `stimulus_id`

In [None]:
ratings_df = ratings_df.drop(["prolific_id"], axis=1)
ratings_means_df = ratings_df.groupby('stimulus_id').mean()
ratings_means_df.head()

In [None]:
ratings_means_df.loc[no_actors_commercials,'target'] = 'no_actors'
ratings_means_df.loc[boys_commercials,'target'] = 'masc'
ratings_means_df.loc[girls_commercials,'target'] = 'fem'
ratings_means_df.loc[mixed_commercials,'target'] = 'mix'

## One-way ANOVA by gender orientation 
On the average of the ratings (by stimulus)

NB: https://www.statology.org/anova-unequal-sample-size/

In [None]:
significant_fields = anova_by_variable(
    "all_genders",
    ratings_means_df,
    groundtruth_df,
    experiment,
    excluded_categories = [
        "There are no actors/presenters or you can never see their faces"
    ],
    alpha=1 # show also non-significant fields
)

# Swarm plots

In [None]:
_ = ''' uncomment to plot
palette = {'mix':'C2','fem':'C3','masc':'C0','no_actors':'C1'}
for field in significant_fields:
    print(f'\033[1m\033[91m.::{field}::.\033[0m')
    plt.figure(figsize=(6,4))
    sns.swarmplot(x='target',y=field, data=ratings_means_df[ratings_means_df["target"]!="no_actors"], palette=palette, size=4)
    plt.show()
'''

# ANOVA by voice type and target

### Fem
Fuse 'Sung' and 'Spoken and sung' into 'Sung', exclude 'There are no voices'

In [None]:
_ = anova_by_variable("voice_type",
                  ratings_means_df.loc[girls_commercials],
                  groundtruth_df.loc[girls_commercials],
                  experiment,
                  categories_to_fuse= ["Sung", "BOTH spoken and sung"],
                  fused_label= "Sung",
                  excluded_categories=['There are no voices'])

### Masc
Only spoken voices essentially, can’t do anything

### Mixed
Fuse 'Sung' and 'Spoken and sung' into 'Sung', exclude 'There are no voices'

In [None]:
_ = anova_by_variable("voice_type",
                  ratings_means_df.loc[mixed_commercials],
                  groundtruth_df.loc[mixed_commercials],
                  experiment,
                  categories_to_fuse= ["Sung", "BOTH spoken and sung"],
                  fused_label= "Sung",
                  excluded_categories=['There are no voices'])

# ANOVA by voice age and target

### Fem
Fuse "BOTH children and adults" and "Children", exclude 'There are no voices'

In [None]:
_ = anova_by_variable("voice_age",
                  ratings_means_df.loc[girls_commercials],
                  groundtruth_df.loc[girls_commercials],
                  experiment,
                  categories_to_fuse= ["BOTH children and adults", "Children"],
                  fused_label= "Including children",
                  excluded_categories=['There are no voices'])

### Masc
only adult voices essentially, can’t do anything

### Mixed
Fuse "BOTH children and adults" and "Children", exclude 'There are no voices'

In [None]:
_ = anova_by_variable("voice_age",
                  ratings_means_df.loc[mixed_commercials],
                  groundtruth_df.loc[mixed_commercials],
                  experiment,
                  categories_to_fuse= ["BOTH children and adults", "Children"],
                  fused_label= "Including children",
                  excluded_categories=['There are no voices'])

# ANOVA by voice gender and target

### Fem
only feminine voices essentially, can't do anything

### Masc
only masculine voices essentially, can’t do anything

### Mixed
Exclude 'There are no voices'

In [None]:
_ = anova_by_variable("voice_gender",
                  ratings_means_df.loc[mixed_commercials],
                  groundtruth_df.loc[mixed_commercials],
                  experiment,
                  categories_to_fuse = ["BOTH feminine and masculine voices", "Unclear"],
                  fused_label= "BOTH feminine and masculine voices",
                  excluded_categories=['There are no voices'])

# ANOVA by voice gender exaggeration and target

### Fem
Exclude 'There are no voices'

In [None]:
_ = anova_by_variable("voice_exagg",
                  ratings_means_df.loc[girls_commercials],
                  groundtruth_df.loc[girls_commercials],
                  experiment,
                  excluded_categories=['There are no voices'])

### Masc
Exclude ‘There are no voices’

In [None]:
_ = anova_by_variable("voice_exagg",
                  ratings_means_df.loc[boys_commercials],
                  groundtruth_df.loc[boys_commercials],
                  experiment,
                  excluded_categories=['There are no voices'])

### Mixed

not enough data

    _ = anova_by_variable("voice_exagg",
                    ratings_means_df.loc[mixed_commercials],
                    groundtruth_df.loc[mixed_commercials],
                    experiment,
                    excluded_categories=['There are no voices'])