### Mixed ANOVA

Based on [McDermott et al (2016)](https://www.nature.com/articles/nature18635)

This is the Python notebook I used to simulate the data and generate the figures for my lecture on repeated-measures ANOVA.
To run the code, click on each block (a "cell") of code and click the "play" button near the top of the page. There is also a button at the top of the page to run all of the cells (the two right arrows, or "fast-forward icon", at the top).

*This was partly written with the help of Microsoft Copilot.*

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg
from scipy.stats import f

# Set styles for plotting
sns.set_style('whitegrid')
sns.set_context('talk')

### Generate the data

In [None]:
# Set random seed for reproducibility
np.random.seed(42)

# Number of subjects and levels
n_subjects = [47,50] # number of participants for US and Tsimane respectively
groups = ['US','Tsimane']
n_groups = len(groups)
levels_chord = ['Consonant','Dissonant']
n_chord = len(levels_chord)

# Mean values and standard deviation for each level
# Based values on Fig 3
means = np.array([[2.9, 2], [3, 3]]) # columns are Cons / Diss, rows are US / Tsimane
std_dev_sbj = 0.5 # this is the st. dev. across participants
std_dev_wthn = 0.3 # this is the st. dev. within participants

data = []
for subject in range(0, sum(n_subjects)):
    sbj_avg = np.random.normal(0, std_dev_sbj)
    grp_idx = int((subject+1)>n_subjects[0])
    for chord in range(0, n_chord):
        value = np.random.normal(means[grp_idx,chord], std_dev_wthn)
        value = value+sbj_avg  
        value = max(1,min(4,value)) # make sure the value is between 0 and 4
        data.append([subject+1, groups[grp_idx], levels_chord[chord], value])

# Create a DataFrame
df = pd.DataFrame(data, columns=['Subject', 'Group', 'Chord', 'Pleasantness'])
df.describe(include='all')

In [None]:
# Plot the data
fig, ax = plt.subplots(1, 2, figsize=(9,5))
sns.lineplot(data=df.loc[df['Group']=='US'], ax=ax[0], x='Chord', y='Pleasantness',
             hue='Subject', marker='o', palette='crest')
ax[0].set_title('US')
ax[0].set_ylim([1, 4.2])
ax[0].get_legend().set_visible(False)
#ax[0].legend(title='Subject', bbox_to_anchor=(1.05, 1), loc='upper left')
sns.lineplot(data=df.loc[df['Group']=='Tsimane'], ax=ax[1], x='Chord', y='Pleasantness', hue='Subject', marker='o', palette='flare')
ax[1].set_title('Tsimane')
ax[1].set_ylim([1, 4.2])
ax[1].get_legend().set_visible(False)
#ax[1].legend(title='Subject', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# Overlay both conditions, using a boxplot and strip plot
plt.figure(figsize=(6,5))
boxplot = sns.boxplot(data=df, x='Group', hue='Chord', y='Pleasantness')
handles, labels = boxplot.get_legend_handles_labels()
sns.stripplot(data=df, x='Group', hue='Chord', y='Pleasantness', dodge=True, palette='dark')
plt.legend(handles, labels, bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
# Line plot
plt.figure(figsize=(5,4))
sns.catplot(data=df, x='Chord', y='Pleasantness', hue='Group', kind='point', errorbar='se')
plt.show()

### Run the mixed ANOVA

In [None]:
# Run the repeated-measures ANOVA
aov = pg.mixed_anova(dv='Pleasantness', between='Group', within='Chord', subject='Subject',
                      data=df, effsize="ng2")
# Display the overall results
print(aov)

In [None]:
# mixed_anova does not display the error. Calculate this effect manually
SS_total = sum(np.power(df['Pleasantness'] - df['Pleasantness'].mean(),2))
print(f'Total SS = {SS_total}')
sbj_mean = df[['Subject','Pleasantness']].groupby('Subject').mean()
SS_sbj = sum(np.power(sbj_mean['Pleasantness'] - df['Pleasantness'].mean(),2)) * n_chord
print(f'Between-subject SS = {SS_sbj}')
print(f'Within-subject SS = {SS_total - SS_sbj}')

# Calculate within-subject and between-subject errors
SSws_err = SS_total - SS_sbj - aov['SS'][1] - aov['SS'][2]
SSbs_err = SS_sbj - aov['SS'][0]
print(f"Within-subject error SS = {SSws_err}")
print(f"Between-subject error SS = {SSbs_err}")

# Re-calculate the F-statistics, make sure they match the expected values
F_group = aov['SS'][0]/(SSbs_err/aov['DF2'][0])
F_chord = aov['SS'][1]/(SSws_err/aov['DF2'][1])
print(f'Expected F_group = {F_group}')
print(f'Expected F_chord = {F_chord}')


### Multiple comparisons

In [None]:
# Pairwise Comparisons using paired t-tests
posthocs = pg.pairwise_tests(dv='Pleasantness', between='Group', within='Chord', subject='Subject', data=df, 
                             padjust='bonf', within_first=False)
print(posthocs)