**Note:** use pyspi conda environment to run this code.

In [1]:
from tqdm.auto import tqdm
import xarray as xr
from pyspi.calculator import CalculatorFrame
from pyspi.data import Data

from src.multimodal.preprocessing import TimeseriesAggregator

# we only aggregate the region-wise timeseries into network-wise timeseries
preproc_pipe = TimeseriesAggregator(strategy='network')

atlas = 'dosenbach2010'
connectivity_kind = 'tangent'

with xr.open_dataset(f'data/Julia2018/timeseries_{atlas}.nc5') as ds:
    ds.load()
    ds = preproc_pipe.fit_transform(ds)


datasets = []
for subject in ds.coords['subject'].values:
    ts = ds.sel(subject=subject)['timeseries'].values.T
    feature_names = ds['network'].values
    dataset = Data(ts, procnames=feature_names, name=subject)
    datasets.append(dataset)

calc = CalculatorFrame(datasets=datasets, subset='fast',
                       name=f'Julia2018_{atlas}',
                       names=[d.name for d in datasets])
calc.compute()

Checking if optional dependencies exist...
Starting JVM with java class /home/morteza/miniforge3/envs/pyspi/lib/python3.9/site-packages/pyspi/lib/jidt/infodynamics.jar.
Loading configuration file: /home/morteza/miniforge3/envs/pyspi/lib/python3.9/site-packages/pyspi/fast_config.yaml
*** Importing module .statistics.basic
[0] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'EmpiricalCovariance'})
Succesfully initialised SPI with identifier "cov_EmpiricalCovariance" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[1] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLasso'})
Succesfully initialised SPI with identifier "cov_GraphicalLasso" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[2] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLassoCV'})
Succesfully initialised SPI with identifier "cov_GraphicalLassoCV" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[3] Adding SPI .stati

Detecting 1 CUDA device(s).


[46] Adding SPI .statistics.causal.AdditiveNoiseModel(x,y)...
Succesfully initialised SPI with identifier "anm" and labels ['unsigned', 'causal', 'unordered', 'linear', 'directed']
[47] Adding SPI .statistics.causal.ConditionalDistributionSimilarity(x,y)...
Succesfully initialised SPI with identifier "cds" and labels ['unsigned', 'causal', 'unordered', 'nonlinear', 'directed']
[48] Adding SPI .statistics.causal.RegressionErrorCausalInference(x,y)...
Succesfully initialised SPI with identifier "reci" and labels ['unsigned', 'causal', 'unordered', 'nonlinear', 'directed']
[49] Adding SPI .statistics.causal.InformationGeometricConditionalIndependence(x,y)...
Succesfully initialised SPI with identifier "igci" and labels ['causal', 'directed', 'nonlinear', 'unsigned', 'unordered']
*** Importing module .statistics.infotheory
[50] Adding SPI .statistics.infotheory.JointEntropy(x,y,{'estimator': 'gaussian'})
Succesfully initialised SPI with identifier "je_gaussian" and labels ['unsigned', 'inf

Frequency minimum set to 0; overriding to 1e-5.


[188] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fs': 1})
Succesfully initialised SPI with identifier "psi_wavelet_mean_fs-1_fmin-0_fmax-0-5_mean" and labels ['unsigned', 'wavelet', 'undirected']
[189] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0, 'fmax': 0.25})
Succesfully initialised SPI with identifier "psi_wavelet_mean_fs-1_fmin-0_fmax-0-25_mean" and labels ['unsigned', 'wavelet', 'undirected']
[190] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0.25, 'fmax': 0.5})
Succesfully initialised SPI with identifier "psi_wavelet_mean_fs-1_fmin-0-25_fmax-0-5_mean" and labels ['unsigned', 'wavelet', 'undirected']
[191] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0, 'fmax': 0.5, 'statistic': 'max'})
Succesfully initialised SPI with identifier "psi_wavelet_max_fs-1_fmin-0_fmax-0-5_max" and labels ['unsigned', 'wavelet', 'undirected']
[192] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0, 'fmax': 0.25, 'statistic': 'max'})
S

pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
Processing [AVGP01: phase_multitaper_mean_fs-1_fmin-0_fmax-0-5]:  34%|███▍      | 73/216 [00:04<00:22,  6.39it/s]Mean of empty slice
Processing [AVGP01: phase_multitaper_max_fs-1_fmin-0_fmax-0-5]:  34%|███▍      | 73/216 [00:04<00:22,  6.39it/s]    All-NaN slice encountered
Processing [AVGP01: sgc_nonparametric_mean_fs-1_fmin-0_fmax-0-5]:  72%|███████▏  | 156/216 [00:08<00:05, 10.37it/s]   Mean of empty slice
Processing [AVGP01: sgc_nonparametric_max_fs-1_fmin-0_fmax-0-5]:  76%|███████▋  | 165/216 [00:08<00:03, 12.85it/s]    All-NaN slice encountered
Processing [AVGP01: pec_orth_log_abs]: 100%|██████████| 216/216 [00:09<00:00, 23.92it/s]                                      
Processing [AVGP02: phase_multitaper_m

In [2]:

# spis = spi_calc.table
# spis = s.columns.get_level_values(0).unique()

# spi_calc._get_correlation_df()

tables = {
    c.name: c.table
    for i, c in calc.calculators.itertuples()
}

import pandas as pd

spis = []

for i, c in tqdm(calc.calculators.itertuples(), total=calc.n_calculators):
    feats = c.table
    feats.index.name = 'process_1'
    feats = feats.reset_index()
    feats.columns.names = ['spi', 'process_2']
    melted = pd.melt(feats, id_vars='process_1', var_name=['spi', 'process_2'], value_name='value')
    melted['process'] = melted.apply(lambda x: set(x[['process_1', 'process_2']]), axis=1)
    melted = melted.groupby('spi').apply(lambda x: x.drop_duplicates('process'))
    # melted.dropna(subset=['value'], inplace=True)
    melted['process'] = melted['process_1'] + '-' + melted['process_2']
    melted.drop(columns=['spi', 'process_1', 'process_2'], inplace=True)
    melted.reset_index(level=0, inplace=True)
    melted.reset_index(drop=True, inplace=True)
    melted = melted.assign(subject=c.name, label=c.name[:4])
    spis.append(melted)
spi_df = pd.concat(spis)
spi_df_wide = spi_df.pivot_table(index=['subject', 'label', 'spi'], columns=['process'], values='value', aggfunc='mean').reset_index()
spi_df_wide.to_csv(f'data/Julia2018/spis_{atlas}.csv', index=False)

100%|██████████| 32/32 [01:33<00:00,  2.93s/it]


## SPI Classifiers

In [31]:
from IPython.display import clear_output
import pandas as pd
from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit, LeaveOneOut
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from xgboost import XGBClassifier

def score_spi(s):
    print(s.name, '...', end=' ')
    estimator = Pipeline([
        ('scaler', StandardScaler()),
        # ('scale', MinMaxScaler()),
        ('clf', SVC())
        # ('clf', XGBClassifier())
    ])
    X = s.drop(columns=['subject', 'spi', 'label']).values
    y = LabelEncoder().fit_transform(s['label'].values)
    CV = StratifiedShuffleSplit(n_splits=1000, test_size=8)
    # CV = LeaveOneOut()
    score = cross_val_score(estimator, X, y, cv=CV, n_jobs=-1, scoring='accuracy')
    print('Done!')
    return score.mean()

atlas = 'dosenbach2010'
spi_df_wide = pd.read_csv(f'data/Julia2018/spis_{atlas}.csv')
s = spi_df_wide.groupby(['spi']).apply(lambda x: x.isna().sum().sum())

null_spis = s[s>0].index
spi_df_wide = spi_df_wide.query('spi not in @null_spis')
scores = spi_df_wide.groupby(['spi']).apply(score_spi).sort_values(ascending=False)
clear_output()
scores

spi
dcoh_multitaper_mean_fs-1_fmin-0_fmax-0-5        0.738250
phase_multitaper_max_fs-1_fmin-0-25_fmax-0-5     0.722000
gpdcoh_multitaper_mean_fs-1_fmin-0_fmax-0-25     0.721500
pdcoh_multitaper_mean_fs-1_fmin-0_fmax-0-25      0.718250
ddtf_multitaper_mean_fs-1_fmin-0_fmax-0-5        0.716750
                                                   ...   
coint_johansen_max_eig_stat_order-1_ardiff-1     0.353750
coint_johansen_max_eig_stat_order-0_ardiff-1     0.345625
xcorr_mean_sig-False                             0.342750
dswpli_multitaper_max_fs-1_fmin-0-25_fmax-0-5    0.304375
psi_multitaper_mean_fs-1_fmin-0_fmax-0-5         0.282000
Length: 209, dtype: float64