In [2]:
import pingouin as pg
import os
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
import statsmodels.api as sm
from statsmodels.stats.anova import AnovaRM 

In [3]:
# Read in training data
current_directory = os.getcwd()

# data to read in: SN, BN
data = []

# participants
participants = ('SN001', 'SN002', 'SN003', 'SN004', 'SN005', 'SN006', 'SN007', 'SN008', 'SN009', 'SN010', 'SN011', 'SN012', 'SN013', 'SN014', 'SN015')

SN=0
for p in participants:
    SN = SN+1
    root_subject = os.path.join('data', p)
    control_folders = [f.path for f in os.scandir(root_subject) if f.is_dir() and f.name.startswith('control')]
    BN = 0
    for b in control_folders:
        fb = []
        # uncommnent if you read in everything
        BN = BN+1
        if os.path.isfile(os.path.join(b, 'trials.csv')):
            data_temp = pd.read_csv(os.path.join(b, 'trials.csv'),)
            data_temp.insert(0,'BN', BN)
            data_temp.insert(0,'SN',SN)
            # data_row = [SN, BN, data_temp]
            # data.append(data_row)
            data.append(data_temp)            # pd.concat([data, data_row])
            # print(control_folders)

merged_df = pd.concat(data, ignore_index=True)                        
merged_df

Unnamed: 0,SN,BN,noise,block,trial,emg,score,soa,gender
0,1,1,0.1,0,0,1,0.609251,4,0
1,1,1,0.0,0,1,1,0.621880,5,0
2,1,1,0.1,0,2,1,0.662875,5,0
3,1,1,0.0,0,3,1,0.650879,5,0
4,1,1,0.2,0,4,1,0.574849,4,0
...,...,...,...,...,...,...,...,...,...
1195,15,4,0.3,0,15,0,0.847151,9,1
1196,15,4,0.2,0,16,0,0.890040,8,1
1197,15,4,0.1,0,17,0,0.852177,9,1
1198,15,4,0.0,0,18,0,0.774337,9,1


In [4]:
grouped = merged_df.groupby(merged_df.emg)
stick_df = grouped.get_group(0)
stick_df

Unnamed: 0,SN,BN,noise,block,trial,emg,score,soa,gender
20,1,2,0.2,0,0,0,0.845762,8,0
21,1,2,0.3,0,1,0,0.872959,8,0
22,1,2,0.1,0,2,0,0.894951,9,0
23,1,2,0.0,0,3,0,0.898567,9,0
24,1,2,0.3,0,4,0,0.858086,8,0
...,...,...,...,...,...,...,...,...,...
1195,15,4,0.3,0,15,0,0.847151,9,1
1196,15,4,0.2,0,16,0,0.890040,8,1
1197,15,4,0.1,0,17,0,0.852177,9,1
1198,15,4,0.0,0,18,0,0.774337,9,1


In [5]:
emg_df = grouped.get_group(1)
emg_df

Unnamed: 0,SN,BN,noise,block,trial,emg,score,soa,gender
0,1,1,0.1,0,0,1,0.609251,4,0
1,1,1,0.0,0,1,1,0.621880,5,0
2,1,1,0.1,0,2,1,0.662875,5,0
3,1,1,0.0,0,3,1,0.650879,5,0
4,1,1,0.2,0,4,1,0.574849,4,0
...,...,...,...,...,...,...,...,...,...
1175,15,3,0.2,0,15,1,0.602124,4,1
1176,15,3,0.1,0,16,1,0.785187,6,1
1177,15,3,0.0,0,17,1,0.681725,8,1
1178,15,3,0.2,0,18,1,0.786586,9,1


In [6]:
model = ols('soa ~ C(emg) + C(noise) + C(emg):C(noise)', data=merged_df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(emg),2088.240833,1.0,1003.392664,2.835779e-160
C(noise),145.7425,3.0,23.342926,1.101258e-14
C(emg):C(noise),8.9825,3.0,1.438687,0.2298701
Residual,2480.766667,1192.0,,


Homoscedasticity, or homogeneity of variances, is an assumption of equal or similar variances in different groups being compared. This is an important assumption of parametric statistical tests because they are sensitive to any dissimilarities. Uneven variances in samples result in biased and skewed test results.

In [7]:
pg.homoscedasticity(data=merged_df , dv = 'soa', group = 'noise' , method='levene')

Unnamed: 0,W,pval,equal_var
levene,0.897233,0.441964,True


In [8]:
pg.homoscedasticity(data=merged_df , dv = 'soa', group = 'emg' , method='levene')

Unnamed: 0,W,pval,equal_var
levene,104.61243,1.3412360000000002e-23,False


In [9]:
# Between Groups ANOVA
aov = pg.anova(data=merged_df , dv='soa' , between='noise')
aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,np2
0,noise,3,1196,12.691744,3.61115e-08,0.030853


If the p-value is less than 0.05, we can reject the null hypothesis and conclude that there is a statistically significant difference for noise levels.

In [10]:
print(AnovaRM(data=merged_df, depvar='soa', subject='SN', within=['noise'], aggregate_func=np.mean).fit()) 

               Anova
      F Value Num DF  Den DF Pr > F
-----------------------------------
noise 25.6912 3.0000 42.0000 0.0000



In [11]:
print(AnovaRM(data=merged_df, depvar='soa', subject='SN', within=['emg'], aggregate_func=np.mean).fit()) 

              Anova
    F Value Num DF  Den DF Pr > F
---------------------------------
emg 94.0659 1.0000 14.0000 0.0000

