In [None]:
from mvpa2.suite import *
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from itertools import product, combinations
from numpy.testing import assert_array_equal

In [2]:
%matplotlib inline

# Some functions to filter matrices and to compute averages of that matrices

In [3]:
def compute_within_between_mean(mat, wanted_labels, labels):
    assert(mat.shape[0] == mat.shape[1])
    assert(len(labels) == mat.shape[0])
    
    mask_labels = [True if lbl in wanted_labels else False for lbl in labels]

    # XXX: this assumes that the matrices are NOT symmetric, but it
    # should work anyway with symmetric matrices, since we're taking
    # the average
    # take mask within areas, without diagonal
    mask_mat_within = np.zeros(mat.shape, dtype=bool)
    mask_mat_within[np.ix_(mask_labels, mask_labels)] = True
    np.fill_diagonal(mask_mat_within, False)
    mean_within = mat[mask_mat_within].mean()

    # take mask between areas
    mask_mat_between = np.zeros(mat.shape, dtype=bool)
    mask_mat_between[np.ix_(mask_labels, np.logical_not(mask_labels))] = True
    mask_mat_between[np.ix_(np.logical_not(mask_labels), mask_labels)] = True
    mean_between = mat[mask_mat_between].mean()
    
    return (mean_within, mean_between)

In [4]:
def filter_matrix(mat, wanted_labels, labels):
    assert(mat.shape[0] == mat.shape[1])
    assert(len(labels) == mat.shape[0])
    mask_labels = [True if lbl in wanted_labels else False for lbl in labels]
    subset_labels = filter(lambda x: x in wanted_labels, labels)
    return pd.DataFrame(mat[np.ix_(mask_labels, mask_labels)], index=subset_labels, columns=subset_labels)

In [5]:
# save correlations within/between across core and extended systems
def compute_correlations(mean_rdms, systems_to_compute, labels):
    systems_correlations = dict()
    for system in systems_to_compute:
        corrs = []
        wanted_labels = systems[system]
        for rdm in mean_rdms:
            corrs.append(compute_within_between_mean(rdm.values, wanted_labels, labels))
        corrs = pd.DataFrame(np.asarray(corrs), columns=['within', 'between'])
        systems_correlations[system] = corrs
    return systems_correlations

In [6]:
# let's make a dictionary containing infos on the different systems
systems = {
    'early_visual': ['EV1 - L', 'EV1 - R', 'EV2 - L', 'EV2 - R'],
    'precuneus': ['dPreCun - L', 'dPreCun - R',
                  'mPreCun - L', 'mPreCun - R',
                  'vPreCun - L', 'vPreCun - R'],
    'dorsal_core': ['pMTG - L', 'pMTG - R',
                    'mMTG - L', 'mMTG - R',
                    'aMTG - L', 'aMTG - R'],
    'ventral_core': ['OccFus - L', 'OccFus - R',
                     'pFus - L', 'pFus - R',
                     'mFus - L', 'mFus - R',
                     'aFus - R'],
    'anterior_core': ['IFG1 - L', 'IFG2 - L', 'IFG2 - R'],
    'theory_of_mind': [ 'TPJ - L', 'TPJ - R', 'MPFC - L', 'MPFC - R']
}

systems['core'] = systems['dorsal_core'] + systems['ventral_core'] + systems['anterior_core']
systems['extended'] = systems['precuneus'] + systems['theory_of_mind']
systems['core+extended'] = systems['core'] + systems['extended']

# First using Raiders

In [7]:
part1 = h5load('raidershpal_rdm_level2_part1.hdf5')
part2 = h5load('raidershpal_rdm_level2_part2.hdf5')

In [8]:
labels = pd.read_csv('roi_coord.csv')['Abbreviation'].tolist()

In [9]:
len(part1) == len(part2)

True

In [10]:
# they are in order, so we can average them right away
mean_rdms = [(np.arctanh(p1) + np.arctanh(p2))/2 for p1, p2 in zip(part1, part2)]

In [11]:
# make them symmetric and also normalize them by the diagonal
mean_rdms_norm = []
for rdm in mean_rdms:
    tmp = rdm.copy()
    tmp += rdm.T
    tmp /= 2
    
    # go back to correlation
    tmp = np.tanh(tmp)
    # normalize by noise values, as in Guntupalli et al., 2016
    diag = np.diag(tmp).reshape((1, -1))
    sqrt_diag = np.sqrt((diag * diag.T))
    tmp /= sqrt_diag
    mean_rdms_norm.append(tmp)

In [12]:
# save a pandas dataframe with all the data in there
# since we're taking only the upper triangular matrix, let's make sure we're assigning
# the right labels
pairs = list(product(labels, labels))
pairs_mat = np.array([' + '.join(p) for p in pairs]).reshape((len(labels), -1))

pairs_mat_triu = pairs_mat[np.triu_indices_from(pairs_mat, k=1)]
mean_rdms_triu = [r[np.triu_indices_from(r, k=1)] for r in mean_rdms_norm]

# make also a "system" label
label2system = dict()
for key, value in systems.iteritems():
    if key in ['core', 'extended', 'core+extended']:
        continue
    else:
        for lbl in value:
            label2system[lbl] = key
        
pairs_system = [(label2system[p1], label2system[p2]) for p1, p2 in pairs]
pairs_system_mat = np.array(['+'.join(p) for p in pairs_system]).reshape((len(labels), -1))
pairs_system_mat_triu = pairs_system_mat[np.triu_indices_from(pairs_system_mat, k=1)]

npairs = len(pairs_mat_triu)
subj_pairs = ['+'.join(c) for c in combinations(['sub{0:02}'.format(i) for i in range(1, 12)], 2)]
nsubj_pairs = len(subj_pairs)
data = {
    'corr': np.hstack(mean_rdms_triu),
    'subj': np.repeat(subj_pairs, npairs),
    'rois': np.tile(pairs_mat_triu, nsubj_pairs),
    'systems': np.tile(pairs_system_mat_triu, nsubj_pairs)
}

df_hpal = pd.DataFrame(data, columns=['subj', 'rois', 'systems', 'corr'])

In [13]:
df_hpal.head()

Unnamed: 0,subj,rois,systems,corr
0,sub01+sub02,EV1 - L + EV1 - R,early_visual+early_visual,0.430905
1,sub01+sub02,EV1 - L + EV2 - L,early_visual+early_visual,0.553327
2,sub01+sub02,EV1 - L + EV2 - R,early_visual+early_visual,0.414231
3,sub01+sub02,EV1 - L + OccFus - L,early_visual+ventral_core,0.303334
4,sub01+sub02,EV1 - L + OccFus - R,early_visual+ventral_core,0.296197


In [14]:
df_hpal.to_csv('hpal_pairwise_corr.csv', index=False)

In [15]:
# remove early visual
mean_rdms_norm_noev = [filter_matrix(rdm, systems['core+extended'], labels) for rdm in mean_rdms_norm]

In [16]:
labels_noev = mean_rdms_norm_noev[0].columns.tolist()

In [17]:
systems_to_compute = ['core', 'extended']

In [18]:
correlations_all = compute_correlations(mean_rdms_norm_noev, systems_to_compute, labels_noev)

In [19]:
for key, df in correlations_all.iteritems():
    df.to_csv('{0}_hpal_withinbetween_correlations.csv'.format(key), index=False)

## Now do the same within each main system

In [20]:
mean_rdms_norm_core = [filter_matrix(rdm.values, systems['core'], labels_noev) for rdm in mean_rdms_norm_noev]

systems_to_compute = ['dorsal_core', 'ventral_core', 'anterior_core']
correlations_core = compute_correlations(mean_rdms_norm_core, systems_to_compute, mean_rdms_norm_core[0].columns.tolist())

for key, df in correlations_core.iteritems():
    df.to_csv('{0}-withincore_hpal_withinbetween_correlations.csv'.format(key), index=False)

In [21]:
mean_rdms_norm_ext = [filter_matrix(rdm.values, systems['extended'], labels_noev) for rdm in mean_rdms_norm_noev]

systems_to_compute = ['theory_of_mind', 'precuneus']
correlations_ext = compute_correlations(mean_rdms_norm_ext, systems_to_compute, mean_rdms_norm_ext[0].columns.tolist())

for key, df in correlations_ext.iteritems():
    df.to_csv('{0}-withinext_hpal_withinbetween_correlations.csv'.format(key), index=False)

# Do the same for the task data

In [22]:
task_data = h5load('taskdata_rdm_level2.hdf5')

In [23]:
assert_array_equal(task_data[0].sa.targets, task_data[0].fa.roi)

In [24]:
labels_task = task_data[0].sa.targets

These are per subject distance matrices

In [25]:
len(task_data), task_data[0].shape, task_data[0]

(33, (30, 30), <Dataset: 30x30@float64, <sa: centers,targets>, <fa: roi>>)

In [26]:
# these are distances, not correlations, so go back to correlation 
task_data = [1. - rdm.samples for rdm in task_data]

In [27]:
# save a pandas dataframe with all the data in there
# since we're taking only the upper triangular matrix, let's make sure we're assigning
# the right labels
pairs = list(product(labels_task, labels_task))
pairs_mat = np.array([' + '.join(p) for p in pairs]).reshape((len(labels_task), -1))

pairs_mat_triu = pairs_mat[np.triu_indices_from(pairs_mat, k=1)]
task_data_triu = [r[np.triu_indices_from(r, k=1)] for r in task_data]

# make also a "system" label
label2system = dict()
for key, value in systems.iteritems():
    if key in ['core', 'extended', 'core+extended']:
        continue
    else:
        for lbl in value:
            label2system[lbl] = key
        
pairs_system = [(label2system[p1], label2system[p2]) for p1, p2 in pairs]
pairs_system_mat = np.array(['+'.join(p) for p in pairs_system]).reshape((len(labels), -1))
pairs_system_mat_triu = pairs_system_mat[np.triu_indices_from(pairs_system_mat, k=1)]

npairs = len(pairs_mat_triu)
subj = ['sub{0:02}'.format(i) for i in range(1, 34)]
nsubj = len(subj)
data_task = {
    'corr': np.hstack(task_data_triu),
    'subj': np.repeat(subj, npairs),
    'rois': np.tile(pairs_mat_triu, nsubj),
    'systems': np.tile(pairs_system_mat_triu, nsubj)
}

df_task = pd.DataFrame(data_task, columns=['subj', 'rois', 'systems', 'corr'])

In [28]:
df_task.head()

Unnamed: 0,subj,rois,systems,corr
0,sub01,IFG1 - L + EV2 - L,anterior_core+early_visual,0.15864
1,sub01,IFG1 - L + EV1 - R,anterior_core+early_visual,0.057679
2,sub01,IFG1 - L + mFus - R,anterior_core+ventral_core,0.015006
3,sub01,IFG1 - L + mFus - L,anterior_core+ventral_core,0.194959
4,sub01,IFG1 - L + EV1 - L,anterior_core+early_visual,0.299822


In [29]:
df_task.to_csv('task_pairwise_corr.csv', index=False)

In [30]:
# remove early visual
task_data_noev = [filter_matrix(rdm, systems['core+extended'], labels_task) for rdm in task_data]

In [31]:
labels_task_noev = task_data_noev[0].columns.tolist()

In [32]:
systems_to_compute = ['core', 'extended']

In [33]:
correlations_all_task = compute_correlations(task_data_noev, systems_to_compute, labels_task_noev)

In [34]:
for key, df in correlations_all_task.iteritems():
    df.to_csv('{0}_task_withinbetween_correlations.csv'.format(key), index=False)

## Now do the same within each main system

In [35]:
task_data_core = [filter_matrix(rdm.values, systems['core'], labels_noev) for rdm in task_data_noev]

systems_to_compute = ['dorsal_core', 'ventral_core', 'anterior_core']
correlations_core_task = compute_correlations(task_data_core, systems_to_compute, task_data_core[0].columns.tolist())

for key, df in correlations_core_task.iteritems():
    df.to_csv('{0}-withincore_task_withinbetween_correlations.csv'.format(key), index=False)

In [36]:
task_data_ext = [filter_matrix(rdm.values, systems['extended'], labels_noev) for rdm in task_data_noev]

systems_to_compute = ['theory_of_mind', 'precuneus']
correlations_ext_task = compute_correlations(task_data_ext, systems_to_compute, task_data_ext[0].columns.tolist())

for key, df in correlations_ext_task.iteritems():
    df.to_csv('{0}-withinext_task_withinbetween_correlations.csv'.format(key), index=False)

# Tests

In [37]:
from numpy.testing import assert_array_equal

In [38]:
def test_filter_matrix():
    mat = np.ones((6, 6))
    mat[np.ix_([3, 4, 5], [3, 4, 5])] = 3

    labels = range(6)
    
    wanted_labels_ = [range(3), [3, 4, 5], [0, 1, 3, 5]]
    for wanted_labels in wanted_labels_:
        mat_filt = filter_matrix(mat, wanted_labels, labels)
        assert_array_equal(mat_filt, mat[np.ix_(wanted_labels, wanted_labels)])

test_filter_matrix()

In [39]:
def test_compute_within_between_mean():
    mat = np.zeros((10, 10))
    mat[:5, :5] = 5
    mat[5:, 5:] = 20
    assert_array_equal(compute_within_between_mean(mat, range(5), range(10)), [5.0, 0.0])
    assert_array_equal(compute_within_between_mean(mat, range(5, 10), range(10)), [20.0, 0.0])
test_compute_within_between_mean()