# Nilearn connectivity features


In [1]:
%reload_ext autoreload
%autoreload 2

import xarray as xr
import pandas as pd
from sklearn.pipeline import Pipeline
from src.multimodal.preprocessing import TimeseriesAggregator, ConnectivityExtractor
import statsmodels.formula.api as smf
import statsmodels.api as sm

In [7]:

atlas = 'dosenbach2010'
connectivity_kinds = ['correlation', 'partial correlation', 'tangent', 'covariance', 'precision']


all_features = []  # store all the connectivity measures in one dataframe

for connectivity_kind in connectivity_kinds:
    print(f'[{connectivity_kind}]')
    preproc_pipe = Pipeline([
        ('time_agg', TimeseriesAggregator(strategy='network')),
        ('conn', ConnectivityExtractor(kind=connectivity_kind))
    ])

    with xr.open_dataset(f'data/Julia2018/timeseries_{atlas}.nc5') as ds:
        ds.load()
        ds = preproc_pipe.fit_transform(ds)

    features = ds['connectivity'].to_dataframe().reset_index()

    features['process'] = features.apply(lambda x: set(x[['network_src', 'network_dst']]), axis=1)
    features = features.groupby('subject').apply(lambda x: x.drop_duplicates('process'), include_groups=False)
    features = features[features['process'].map(len) == 2]
    features['process'] = features['process'].map(lambda x: '_'.join(x))
    features = features.drop(columns=['network_src', 'network_dst'])

    features = features.reset_index(level=1, drop=True).reset_index()
    features['label'] = features['subject'].map(lambda x: x[:4])


    features = features.pivot(index=['subject', 'label'], columns='process', values='connectivity')
    features = features.reset_index()
    features.columns.name = ds.attrs['connectivity_kind']
    
    # ANOVA

    significant_features = []
    for feature in features.columns:
        if feature in ['subject', 'label', 'spi']:
            continue
        model = smf.glm(f'label ~ {feature}', features, family=sm.families.Binomial()).fit()
        res = model.summary2()
        pvalue = res.tables[1]['P>|z|'][feature]
        if pvalue < .05:
            # display(res)
            significant_features.append(feature)

    print('{} features (out of {}) are significant: {}'.format(
              len(significant_features),
              len(features.columns) - 1,
              significant_features))

    features['spi'] = connectivity_kind.replace(' ', '_')
    features.insert(2, 'spi', features.pop('spi'))   # move spi to the front

    all_features.append(features)

pd.concat(all_features).to_csv(f'data/Julia2018/spis_{atlas}_network_nilearn.csv', index=False)

[correlation]
3 features (out of 16) are significant: ['CON_SMN', 'FPN_SMN', 'VIS_DMN']
[partial correlation]
3 features (out of 16) are significant: ['CER_CON', 'CON_SMN', 'FPN_VIS']
[tangent]
2 features (out of 16) are significant: ['CON_SMN', 'FPN_SMN']
[covariance]
0 features (out of 16) are significant: []
[precision]
3 features (out of 16) are significant: ['CON_SMN', 'FPN_SMN', 'FPN_VIS']
