### Repeated measures ANOVA with two within-subject factors

Based on [Breska & Deouell (2017)](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.2001665)

This is the Python notebook I used to simulate the data and generate the figures for my lecture on repeated-measures ANOVA.
To run the code, click on each block (a "cell") of code and click the "play" button near the top of the page. There is also a button at the top of the page to run all of the cells (the two right arrows, or "fast-forward icon", at the top).

*This was partly written with the help of Microsoft Copilot.*

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg
from scipy.stats import f

# Set styles for plotting
sns.set_style('whitegrid')
sns.set_context('talk')

### Generate the data

In [None]:
# Set random seed for reproducibility
np.random.seed(42)

# Number of subjects and levels
n_subjects = 21
levels_target = ['Valid','Invalid']
n_targets = len(levels_target)
levels_temporal = ['Rhythmic','Random']
n_temporal = len(levels_temporal)

# Mean values and standard deviation for each level
means = np.array([[225, 295], [280, 300]]) # columns are Valid / Invalid, rows are Rhythmic / Random
std_dev_sbj = 115 # this is the st. dev. across participants
std_dev_wthn = 40 # this is the st. dev. within participants

# Simulate data ensuring values are between 0 and 100
data = []
for subject in range(0, n_subjects):
    sbj_avg = np.random.normal(0, std_dev_sbj)
    for ltmp in range(0, n_temporal):
        for ltrgt in range(0, n_targets):
            value = np.random.normal(means[ltmp,ltrgt], std_dev_wthn)
            value = value+sbj_avg  # Ensure value is between 0 and 100
            data.append([subject+1, levels_temporal[ltmp], levels_target[ltrgt], value])

# Create a DataFrame
df = pd.DataFrame(data, columns=['Subject', 'Temporal', 'Target', 'Response time'])

In [None]:
# Plot the data
g = sns.FacetGrid(data=df, col='Temporal', height=5, aspect=0.7)
g.map_dataframe(sns.lineplot, x='Target', y='Response time', hue='Subject', marker='o')
g.map_dataframe(sns.lineplot, x='Target', y='Response time', hue='Subject', marker='o')
plt.tight_layout()
handles, labels = g.facet_axis(0,1).get_legend_handles_labels()
plt.legend(handles[0:5], labels[0:5], title='Subject', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
# Overlay both conditions, using a boxplot and strip plot
plt.figure(figsize=(7,5))
boxplot = sns.boxplot(data=df, x='Temporal', hue='Target', y='Response time')
handles, labels = boxplot.get_legend_handles_labels()
sns.stripplot(data=df, x='Temporal', hue='Target', y='Response time', dodge=True, palette='dark')
plt.legend(handles, labels, bbox_to_anchor=(1.05, 1), loc='upper left')
plt.ylabel('Response time (ms)')
plt.show()

### Run the 2x2 rmANOVA

In [None]:
# Run the repeated-measures ANOVA
aov = pg.rm_anova(dv='Response time', within=['Temporal','Target'], subject='Subject', data=df, detailed=True, effsize="ng2")
# Display the overall results
print(aov)

In [None]:
# rm_anova does not display the between subjects sum-of-squares. Calculate this effect manually
SS_total = sum(np.power(df['Response time'] - df['Response time'].mean(),2))
print(f'Total SS = {SS_total}')
sbj_mean = df[['Subject','Response time']].groupby('Subject').mean()
SS_sbj = sum(np.power(sbj_mean['Response time'] - df['Response time'].mean(),2)) * n_targets * n_temporal
print(f'Between-subject SS = {SS_sbj}')
print(f'Within-subject SS = {SS_total - SS_sbj}')
print(f"Error SS = {SS_total - SS_sbj - aov['SS'][0] - aov['SS'][1] - aov['SS'][2]}")

# Calculate individual WS error
SSerr_tmp = aov['SS'][0]/aov['F'][0]*aov['ddof2'][0]
SSerr_trg = aov['SS'][1]/aov['F'][1]*aov['ddof2'][1]
SSerr_int = aov['SS'][2]/aov['F'][2]*aov['ddof2'][2]
print(f"Err(Temp) SS = {SSerr_tmp}")
print(f"Err(Trgt) SS = {SSerr_trg}")
print(f"Err(TempXTrgt) SS = {SSerr_int}")
print(f"Expected WS Error SS = {SSerr_tmp + SSerr_trg + SSerr_int}")

# Does SS_sbj + SS_condition + SS_error = SS_total?
print(f"SS_sbj + SS_cond + SS_error = {SS_sbj + aov['SS'][0] + aov['SS'][1]}")

### Multiple comparisons

In [None]:
# Pairwise Comparisons using paired t-tests with Bonferroni correction (for 2 comparisons)
posthocs = pg.pairwise_tests(dv='Response time', within=['Temporal','Target'], subject='Subject', data=df, padjust='bonf')
print(posthocs)