In [13]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
from scipy.stats import f_oneway

from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.multivariate.manova import MANOVA

from IPython.display import display

connectivity_kind = 'partial_correlation'

data  = pd.read_csv('data/julia2018/derivatives/connectivity/dosenbach2010_{}.csv'.format(connectivity_kind.replace(' ','_')))

region2network = data.groupby('region_src')['network_src'].first().to_dict()

data['group'] = data['subject'].apply(lambda x: 1 if 'AVGP' in x else 0)

data['connectivity_name'] = data['network_src'] + '_' + data['network_dst']
# data['connectivity'] = data['connectivity'].abs()

# cleanup
data = data.pivot_table(index=['subject', 'group'], columns='connectivity_name', values='connectivity', aggfunc='mean')
cols = [set(c.split('_')) for c in data.columns
        if c.split('_')[0] != c.split('_')[1]]
cc = []
[cc.append(c) for c in cols if c not in cc]
cc = [list(c) for c in cc]
cols = [f'{c[0]}_{c[1]}' for c in cc]
data = data[cols].reset_index()

# ANOVA
for col in cols:
    ols_model = ols(f'{col} ~ C(group)', data=data).fit()
    res = anova_lm(ols_model)
    if res.loc['C(group)', 'PR(>F)'] < .05:
        print(f'[ANOVA] {col} is significant (AVGP != NVGP)')
        display(res)

# MANOVA
manova_model = MANOVA.from_formula('+'.join(cols) + '~ C(group)', data)

print('[MANOVA]')
manova_model.mv_test().summary()

[ANOVA] CER_CON is significant (AVGP != NVGP)


Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(group),1.0,1e-05,1e-05,5.020268,0.032609
Residual,30.0,5.8e-05,2e-06,,


[ANOVA] SMN_CON is significant (AVGP != NVGP)


Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(group),1.0,3e-06,3.208056e-06,5.801592,0.022361
Residual,30.0,1.7e-05,5.529613e-07,,


[MANOVA]


0,1,2,3
,,,

0,1,2,3,4,5,6
,Intercept,Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.0066,15.0000,16.0000,159.7114,0.0000
,Pillai's trace,0.9934,15.0000,16.0000,159.7114,0.0000
,Hotelling-Lawley trace,149.7294,15.0000,16.0000,159.7114,0.0000
,Roy's greatest root,149.7294,15.0000,16.0000,159.7114,0.0000

0,1,2,3
,,,

0,1,2,3,4,5,6
,C(group),Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.4699,15.0000,16.0000,1.2032,0.3580
,Pillai's trace,0.5301,15.0000,16.0000,1.2032,0.3580
,Hotelling-Lawley trace,1.1280,15.0000,16.0000,1.2032,0.3580
,Roy's greatest root,1.1280,15.0000,16.0000,1.2032,0.3580


In [10]:
# F oneway anova (using scipy)

avgp = data.query('subject.str.contains("AVGP")')
nvgp = data.query('subject.str.contains("NVGP")')

for col in data.columns:
    if '_' in col:
        f = f_oneway(avgp[col].tolist(), nvgp[col].tolist())
        if f.pvalue < 0.05:
            print(col, f)


CER_VIS F_onewayResult(statistic=4.332338414387276, pvalue=0.046027338580300024)
