# Node profile dissimilarity between conditions

Aim of node profile dissimilarity analysis is to find network nodes (ROIs) with highest variability of module assignemnt between task conditions. Highly variable nodes has different node profies depending on the task condtion. **Node profile** for node $i$ is a single row / column of group level agreement matrix. $i$th element of node profile vector reflects probability that nodes $i$ and $j$ will be placed inside the same community in randomly selected individual network. Correlation between node profile vectors from different conditions are calculated to assess similairy between node profiles. Average between all six condition pairs (rew+ ↔ rew-, rew+ ↔ pun+, ...) is calculated as mean similarity. Since raw connectivity values are hard to interpret, z-score is calculated for all ROIs mean similarity. These values are stored in `dissim` vector. 

Lower `dissim` values indicate ROIs with most between-condition variability in node profile. Dissimilarity significance is calculated using Monte Carlo procedure. First, individual module assignemnt vectors are randomly shuffled. Then the same procedure is applied to calculate null distribution of dissimilaity: agreement is calculated for individual conditions, then for each ROI node profile vectors are correlated across conditions yielding dissimilarity values, dissimilarity values are averaged and z-scored. This procedure is repeated `n_reps` times. Entire procedure is applied to single gamma and independent on other gammas. 

Finally dissimilary p-values are FDR corrected to reveal (for each gamma) set of ROIs with significantly variable node profile. 


> **Analysis type**: Multiple γ (calculations)

In [None]:
import json
from itertools import combinations
from os.path import join

import numpy as np
import pandas as pd
from bct.algorithms.clustering import agreement
from dn_utils.networks import zscore_vector
from dn_utils.path import path
from statsmodels.stats.multitest import fdrcorrection
from tqdm.notebook import tqdm

In [None]:
atlas = "combined_roi"
gamma_range = np.arange(0.5, 2.5, 0.5)

In [None]:
path_corrmats = join(path["bsc"], "corrmats")
path_corrmats_unthr = join(path_corrmats, atlas, "unthr")

m = {}
for gamma in gamma_range:
    gamma_str = str(float(gamma)).replace(".", "_")
    path_corrmats_unthr_gamma = join(path_corrmats_unthr, f"gamma_{gamma_str}")
    m[gamma] = np.load(join(path_corrmats_unthr_gamma, "m_aggregated.npy"))
    
# Load subject exclusion
df_exclusion = pd.read_csv(join(path["nistats"], "exclusion/exclusion.csv"), 
                           index_col=0)
ok_index = df_exclusion["ok_all"]

# Load ROI table
df_roi = pd.read_csv(join(path_corrmats, atlas, "roi_table_filtered.csv"))

# Meta information about corrmats dimensions
with open(join(path_corrmats, atlas, "corrmats_aggregated.json"), "r") as f:
    corrmats_meta = json.loads(f.read()) 
    
n_subjects_ok = sum(ok_index)
n_conditions = len(corrmats_meta["dim2"])
n_perr_sign = len(corrmats_meta["dim3"])
n_roi = len(corrmats_meta["dim4"])

In [None]:
def corr_rowwise(arr1, arr2):
    """Calculate correlations between corresponding rows of two arrays."""
    n = len(arr1)
    return np.diag(np.corrcoef(arr1, arr2)[:n, n:])

def shuffle_along_axis(a, axis):
    """Shuffle array along specific axis."""
    idx = np.random.rand(*a.shape).argsort(axis=axis)
    return np.take_along_axis(a,idx,axis=axis)

def calculate_dissimiliarty(m):
    """..."""
    # Condition dependent agreements
    d_rew_inc = agreement(m[:, 0, 0].T)
    d_rew_dec = agreement(m[:, 0, 1].T)
    d_pun_inc = agreement(m[:, 1, 0].T)
    d_pun_dec = agreement(m[:, 1, 1].T)
    
    for d in [d_rew_inc, d_rew_dec, d_pun_inc, d_pun_dec]:
        np.fill_diagonal(d, n_subjects_ok)

    # All combinations
    dissim = np.zeros((m.shape[-1]))
    for d1, d2 in combinations([d_rew_inc, d_rew_dec, d_pun_inc, d_pun_dec], 2):
        dissim = dissim + corr_rowwise(d1, d2)
    dissim = dissim / 6
    
    # Between prediction errors
    dissim_perr = np.zeros((m.shape[-1]))
    dissim_perr = dissim_perr + corr_rowwise(d_rew_inc, d_rew_dec)
    dissim_perr = dissim_perr + corr_rowwise(d_pun_inc, d_pun_dec)
    dissim_perr = dissim_perr / 2
    
    # Between conditions
    dissim_con = np.zeros((m.shape[-1]))
    dissim_con = dissim_con + corr_rowwise(d_rew_inc, d_pun_inc)
    dissim_con = dissim_con + corr_rowwise(d_rew_dec, d_pun_dec)
    dissim_con = dissim_con / 2
    
    return dissim, dissim_perr, dissim_con

In [None]:
n_nulls = 10_000
np.random.seed(0)

for gamma in gamma_range:
    print(f"γ = {gamma}")

    gamma_str = str(float(gamma)).replace(".", "_")
    path_corrmats_unthr_gamma = join(path_corrmats_unthr, f"gamma_{gamma_str}")
    mt = m[gamma][ok_index]

    # Real dissimilarity values
    dissim, dissim_perr, dissim_con = calculate_dissimiliarty(mt)
    dissim_zscore = zscore_vector(dissim) 
    dissim_perr_zscore = zscore_vector(dissim_perr) 
    dissim_con_zscore = zscore_vector(dissim_con) 
    
    # Monte-Carlo distribution of dissimilarity z-scores
    dissim_null = np.zeros((n_nulls, n_roi))
    dissim_perr_null = np.zeros((n_nulls, n_roi))
    dissim_con_null = np.zeros((n_nulls, n_roi))
    for rep in tqdm(range(n_nulls)):
        m_null = shuffle_along_axis(mt, 3)
        tmp_dissim, tmp_dissim_perr, tmp_dissim_con = calculate_dissimiliarty(m_null)
        dissim_null[rep] = zscore_vector(tmp_dissim)
        dissim_perr_null[rep] = zscore_vector(tmp_dissim_perr)
        dissim_con_null[rep] = zscore_vector(tmp_dissim_con)

    # Calculate significance
    pval = np.mean(dissim_zscore > dissim_null, axis=0)
    pval_perr = np.mean(dissim_perr_zscore > dissim_perr_null, axis=0)
    pval_con = np.mean(dissim_con_zscore > dissim_con_null, axis=0)

    # Save values
    df_dissim = df_roi.copy()
    df_dissim[f"dissim_{gamma_str}"] = dissim
    df_dissim[f"dissim_perr_{gamma_str}"] = dissim_perr
    df_dissim[f"dissim_con_{gamma_str}"] = dissim_con
    
    df_dissim[f"dissim_zscore_{gamma_str}"] = dissim_zscore
    df_dissim[f"dissim_perr_zscore_{gamma_str}"] = dissim_perr_zscore
    df_dissim[f"dissim_con_zscore_{gamma_str}"] = dissim_con_zscore
    
    df_dissim[f"pval_unc_{gamma_str}"] = pval
    df_dissim[f"pval_perr_unc_{gamma_str}"] = pval_perr
    df_dissim[f"pval_con_unc_{gamma_str}"] = pval_con
    
    df_dissim[f"pval_fdr_{gamma_str}"] = fdrcorrection(pval)[1]
    df_dissim[f"pval_perr_fdr_{gamma_str}"] = fdrcorrection(pval_perr)[1]
    df_dissim[f"pval_con_fdr_{gamma_str}"] = fdrcorrection(pval_con)[1]
    
    df_dissim.to_csv(join(path_corrmats_unthr_gamma, 
                          "node_profile_dissimilarity.csv"))