In [29]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import xarray as xr
from scipy.stats import f_oneway

from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.multivariate.manova import MANOVA

from sklearn.pipeline import Pipeline
from src.multimodal.preprocessing import TimeseriesAggregator

preproc_pipe = Pipeline([
    ('time_agg', TimeseriesAggregator(strategy='network')),
])

atlas = 'dosenbach2010'
connectivity_kind = 'tangent'

with xr.open_dataset(f'data/Julia2018/timeseries_{atlas}.nc5') as ds:
    ds.load()
    ds = preproc_pipe.fit_transform(ds)

timeseries = []
y = []
for subject in ds['timeseries'].coords['subject'].values:
    subject_ts = ds['timeseries'].sel(subject=subject).to_pandas()
    subject_ts.insert(0, 'subject', subject)
    y.append((subject, subject[:4]))
    timeseries.append(subject_ts)
timeseries = pd.concat(timeseries).reset_index()
y = pd.DataFrame(y, columns=['subject', 'group']).set_index('subject')['group']

from tsfresh import extract_features
features = extract_features(
    timeseries,
    column_id='subject',
    column_sort='timepoint')

valid_features_mask = (features.std() != 0)
feature = features.loc[:, valid_features_mask]

features['y'] = features.index.map(lambda x: x[:4])
features.dropna(axis=1, inplace=True)
features.columns = [
    c.replace('.','_').replace('"','_').replace('(','_').replace(')','_').replace(',','_').replace('-','_').replace(' ','_') for c in features.columns]



Feature Extraction: 100%|██████████| 20/20 [00:11<00:00,  1.80it/s]


In [33]:
# ANOVA

significant_features = []
for feature in features.columns:
    if feature == 'y':
        continue
    ols_model = ols(f'{feature} ~ C(y)', data=features).fit()
    res = anova_lm(ols_model)
    if res.loc['C(y)', 'PR(>F)'] < .05:
        # print(f'[ANOVA] {feature} is significant (AVGP != NVGP)')
        # display(res)
        significant_features.append(feature)

print(f'{len(significant_features)}/{len(features.columns) - 1} '  # minus 'y'
      'features are significantly different.')


149/3804 features are significantly different.


In [None]:

features['y'] = features.index.apply(lambda x: x[:4])

data['connectivity_name'] = data['network_src'] + '_' + data['network_dst']
# data['connectivity'] = data['connectivity'].abs()

# cleanup
data = data.pivot_table(index=['subject', 'group'], columns='connectivity_name', values='connectivity', aggfunc='mean')
# cols = [set(c.split('_')) for c in data.columns
#         if c.split('_')[0] != c.split('_')[1]]
# cc = []
# [cc.append(c) for c in cols if c not in cc]
# cc = [list(c) for c in cc]
# cols = [f'{c[0]}_{c[1]}' for c in cc]

# data = data[cols].reset_index()

# ANOVA
for col in data.columns:
    ols_model = ols(f'{col} ~ C(group)', data=data.reset_index()).fit()
    res = anova_lm(ols_model)
    if res.loc['C(group)', 'PR(>F)'] < .05:
        print(f'[ANOVA] {col} is significant (AVGP != NVGP)')
        display(res)

# MANOVA
manova_model = MANOVA.from_formula('+'.join(data.columns) + '~ C(group)',
                                   data.reset_index())

print('[MANOVA]')
manova_model.mv_test().summary()

In [None]:
# F oneway anova (using scipy)
import seaborn as sns
import matplotlib.pyplot as plt

data.columns = data.columns.str.replace('_', '\N{left right arrow}')
avgp = data.query('subject.str.contains("AVGP")')
nvgp = data.query('subject.str.contains("NVGP")')


significant_cols = []

for col in data.columns:
    if '\N{left right arrow}' in col:
        f = f_oneway(avgp[col].tolist(), nvgp[col].tolist())
        if f.pvalue < 0.05:
            print(col, f)
            significant_cols.append(col)

In [None]:
from matplotlib.patches import Rectangle

data_long = data.reset_index().melt(id_vars=['subject', 'group'], var_name='conn', value_name=connectivity_kind)

# \N{downwards arrow}


sns.set_theme('talk', 'ticks', font_scale=2)
g = sns.displot(data=data_long, hue='group',
                x=connectivity_kind,
                col='conn', col_wrap=4,
                element='step',
                facet_kws=dict(sharex=True, sharey=False),
                aspect=1.5, edgecolor='white',
                kind='hist', kde=True, fill=True)

g.set_titles(col_template='{col_name}', fontweight='bold')

for ax in g.axes.flatten():
    if ax.get_title() in significant_cols:
        for s in ax.spines:
            ax.spines[s].set_visible(True)
            ax.spines[s].set_linewidth(8)
            ax.spines[s].set_color('red')
    ax.set_xlabel(None)
    ax.set_ylabel('count')

# move legend of the g to lower right
g._legend.set_bbox_to_anchor([0.83, 0.12])
g._legend.set_title(None)
plt.suptitle(f'Dosenbach2010 {connectivity_kind} (ConnAgg-between)',
             fontweight='bold', fontsize='xx-large')
plt.tight_layout()


# Within Network

In [None]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
from scipy.stats import f_oneway

from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.multivariate.manova import MANOVA

from IPython.display import display

data  = pd.read_csv('data/julia2018/derivatives/connectivity/dosenbach2010_{}_cagg-within.csv'.format(connectivity_kind.replace(' ','_')))

data['group'] = data['subject'].apply(lambda x: x[:4])

# cleanup
data = data.pivot_table(index=['subject', 'group'], columns='network', values='connectivity', aggfunc='mean')
# cols = [set(c.split('_')) for c in data.columns
#         if c.split('_')[0] != c.split('_')[1]]
# cc = []
# [cc.append(c) for c in cols if c not in cc]
# cc = [list(c) for c in cc]
# cols = [f'{c[0]}_{c[1]}' for c in cc]

# data = data[cols].reset_index()

# ANOVA
for col in data.columns:
    ols_model = ols(f'{col} ~ C(group)', data=data.reset_index()).fit()
    res = anova_lm(ols_model)
    if res.loc['C(group)', 'PR(>F)'] < .05:
        print(f'[ANOVA] {col} is significant (AVGP != NVGP)')
        display(res)

# MANOVA
manova_model = MANOVA.from_formula('+'.join(data.columns) + '~ C(group)',
                                   data.reset_index())

print('[MANOVA]')
display(manova_model.mv_test().summary())

# F oneway anova (using scipy)
import seaborn as sns
import matplotlib.pyplot as plt

avgp = data.query('subject.str.contains("AVGP")')
nvgp = data.query('subject.str.contains("NVGP")')

significant_cols = []

for col in data.columns:
    f = f_oneway(avgp[col].tolist(), nvgp[col].tolist())
    if f.pvalue < 0.05:
        print(col, f)
        significant_cols.append(col)
    else:
        print(f'{col} is not significant', f)


from matplotlib.patches import Rectangle

data_long = data.reset_index().melt(id_vars=['subject', 'group'], var_name='conn', value_name=connectivity_kind)

# \N{downwards arrow}
sns.set_theme('talk', 'ticks', font_scale=2)
g = sns.displot(data=data_long, hue='group',
                x=connectivity_kind,
                col='conn', col_wrap=4,
                element='step',
                facet_kws=dict(sharex=True, sharey=False),
                aspect=1.5, edgecolor='white',
                kind='hist', kde=True, fill=True)

g.set_titles(col_template='{col_name}', fontweight='bold')

for ax in g.axes.flatten():
    if ax.get_title() in significant_cols:
        for s in ax.spines:
            ax.spines[s].set_visible(True)
            ax.spines[s].set_linewidth(8)
            ax.spines[s].set_color('red')
    ax.set_xlabel(None)
    ax.set_ylabel('count')

# move legend of the g to lower right
g._legend.set_bbox_to_anchor([0.83, 0.12])
g._legend.set_title(None)
plt.suptitle(f'Dosenbach2010 {connectivity_kind} (ConnAgg-within)',
             fontweight='bold', fontsize='xx-large')
plt.tight_layout()
