# Investigating which areas show significant second level effects at which times

In [1]:
%matplotlib notebook
import numpy as np
import helpers
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import source_visualisations as sv
import source_statistics as ss
import mne

## Data preparation
Which data?

In [2]:
# baseline [-0.3, 0], trialregs_dot=5
# basefile = 'source_HCPMMP1_allsubs_201703301614.h5'

# baseline None, trialregs_dot=5
#basefile = 'source_HCPMMP1_allsubs_201706091054.h5'

# baseline (-0.3, 0), trialregs_dot=5, GLM in source space
#basefile = 'source_sequential_201706141650.h5'

# baseline (-0.3, 0), trialregs_dot=5, GLM in source space, move_dist, sum_dot_y
#basefile = 'source_sequential_201706191442.h5'

# baseline (-0.3, 0), trialregs_dot=5, GLM in source space, move_dist, 
# sum_dot_y, constregs=0 for 1st dot
#basefile = 'source_sequential_201706201654.h5'

# baseline (-0.3, 0), only first 3 dots, trialregs_dot=3, GLM in source space, move_dist, 
# sum_dot_y, constregs=0 for 1st dot
#basefile = 'source_sequential_201706261151.h5'

# label mode = (abs)max, baseline (-0.3, 0), only first 3 dots, trialregs_dot=3, GLM in source space, 
# move_dist, sum_dot_y, constregs=0 for 1st dot
#basefile = 'source_sequential_201706281100.h5'

# baseline (-0.3, 0), only first 3 dots, trialregs_dot=3, move_dist, 
# sum_dot_y, constregs=0 for 1st dot, 
# label_tc normalised across trials, times and subjects
#basefile = 'source_sequential_201707031206.h5'

# label mode = mean, baseline (-0.3, 0), only first 3 dots, 
# trialregs_dot=3, source GLM, move_dist, sum_dot_y, constregs=0 for 1st dot, 
# label_tc normalised across trials, times and subjects
#basefile = 'source_sequential_201708241011.h5'

# label mode = mean, baseline (-0.3, 0), only first 3 dots, 
# trialregs_dot=0, source GLM, sum_dot_y, motoprep, constregs=0 for 1st dot, 
# subject-specific normalisation of DM without centering and scaling by std
# label_tc normalised across trials, times and subjects
basefile = 'source_sequential_201709011758.h5'

choose measure and identify threshold

In [3]:
# e.g.: mu_p_large, mu_testval, mu_t, mu_mean
#measure = 'mu_mean'
measure = 'mu_p_large'

# which regressors should define the empirical value distribution?
# set regressors = None for all
#regressors = ['accev', 'dot_x', 'dot_y', 'abs_dot_x', 'abs_dot_y']
regressors = ['motoprep', 'response', 'trial_time', 'entropy']

# do not use these regressors to define empirical value distribution
#exclude = 'trialregs'
#exclude = 'dotregs'

# the empiric error threshold for a single time point and area
# (the selected threshold will be the quantile 1-alpha of the 
# empirical distribution of measure values)
alpha = 0.05

threshold, measure_cdf = ss.find_slabs_threshold(
    basefile, measure, quantile=1-alpha, regressors=regressors, 
    verbose=1, return_cdf=True)

print('\nthreshold = {:.4f}'.format(threshold))

adding motoprep
adding response
adding trial_time
adding entropy
N = 101360

threshold = 0.8070


what are large values for the possible measures (used for setting y-lim in plotting)

In [4]:
ylims = {'mu_p_large': 1, 'mu_t': 8, 'mu_z': 8, 'mu_testval': 0.02, 'mu_mean': 0.04}
logy = {'mu_p_large': False, 'mu_t': False, 'mu_z': False, 'mu_testval': True, 'mu_mean': False}

## Load data for all selected regressors and identify significant clusters in time
The idea is that you identify sequences of high measure values that cannot happen by chance. This is all based on the empirical measure distribution used above. This distribution defines the probability with which I would get a particular measure value, if I were to draw one randomly across locations, time points and regressors. So it is a permutation distribution. I then use the cumulative density function of this distribution to first select a cluster threshold, i.e., a measure value which has to be exceeded in order to form a sequence cluster. Then I identify clusters, i.e., sequences of measure values within an area that exceed the threshold. The p-value of the found cluster is the product of 1-cdf(value) for the measure values in the sequence.

In [5]:
clusters = ss.get_fdrcorr_clusters(basefile, regressors, measure, threshold, measure_cdf,
                                   fdr_alpha=0.001)

print('cluster counts:')
print(clusters.label.groupby(level='regressor').count())

clusters.sort_values('start_t').sort_index(level='regressor', sort_remaining=False)

cluster counts:
regressor
motoprep      304
response       78
trial_time    260
entropy        13
Name: label, dtype: int64


Unnamed: 0_level_0,Unnamed: 1_level_0,label,start_t,end_t,log10p,pval_corrected,region
regressor,cluster,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
motoprep,577,R_EC_ROI-rh,0,40,-7.974979,1.151800e-07,medial temporal cortex
motoprep,148,L_FST_ROI-lh,0,80,-16.758410,7.068667e-16,MT+ complex and neighboring visual areas
motoprep,434,R_13l_ROI-rh,0,20,-5.586145,1.570787e-05,orbital and polar frontal cortex
motoprep,157,L_IP0_ROI-lh,0,10,-3.602604,8.696131e-04,inferior parietal cortex
motoprep,693,R_PHA1_ROI-rh,0,30,-7.760767,1.790150e-07,medial temporal cortex
motoprep,173,L_LO1_ROI-lh,0,20,-5.394607,2.358185e-05,MT+ complex and neighboring visual areas
motoprep,177,L_LO3_ROI-lh,0,80,-20.300524,2.789497e-19,MT+ complex and neighboring visual areas
motoprep,696,R_PHA2_ROI-rh,0,30,-6.732450,1.528610e-06,medial temporal cortex
motoprep,824,R_V7_ROI-rh,0,30,-6.006772,6.711040e-06,dorsal stream visual cortex
motoprep,397,L_p24_ROI-lh,0,50,-9.042400,1.247945e-08,anterior cingulate and medial prefrontal cortex


## Find all clusters of a given area or region

In [7]:
area = 'v23ab'
clusters[clusters.label.apply(lambda s: s.find(area) >= 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,label,start_t,end_t,log10p,pval_corrected,region
regressor,cluster,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
accev,516,L_v23ab_ROI-lh,50,440,-84.896615,9.668077e-83,posterior cingulate cortex
accev,1084,R_v23ab_ROI-rh,40,450,-94.620351,7.305735e-92,posterior cingulate cortex
dot_x,382,L_v23ab_ROI-lh,110,130,-6.50416,3.458962e-06,posterior cingulate cortex
dot_x,383,L_v23ab_ROI-lh,150,190,-11.076668,1.950184e-10,posterior cingulate cortex
dot_x,384,L_v23ab_ROI-lh,300,490,-51.541657,3.5028169999999997e-50,posterior cingulate cortex
dot_x,747,R_v23ab_ROI-rh,110,120,-4.476397,0.0002207585,posterior cingulate cortex
dot_x,748,R_v23ab_ROI-rh,150,200,-12.001178,2.512187e-11,posterior cingulate cortex
dot_x,749,R_v23ab_ROI-rh,300,490,-57.797886,2.311581e-56,posterior cingulate cortex


In [8]:
region = 'frontal'
clusters[clusters.region.apply(lambda s: s.find(region) >= 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,label,start_t,end_t,log10p,pval_corrected,region
regressor,cluster,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
accev,7,L_10r_ROI-lh,560,580,-4.762206,1.242919e-04,anterior cingulate and medial prefrontal cortex
accev,12,L_10v_ROI-lh,460,480,-4.814143,1.137707e-04,anterior cingulate and medial prefrontal cortex
accev,14,L_10v_ROI-lh,560,580,-5.258838,4.526926e-05,anterior cingulate and medial prefrontal cortex
accev,44,L_25_ROI-lh,440,470,-6.270397,5.415157e-06,anterior cingulate and medial prefrontal cortex
accev,46,L_25_ROI-lh,530,570,-9.594306,4.645072e-09,anterior cingulate and medial prefrontal cortex
accev,47,L_25_ROI-lh,650,670,-4.714587,1.349181e-04,anterior cingulate and medial prefrontal cortex
accev,69,L_33pr_ROI-lh,200,450,-55.565895,3.450714e-54,anterior cingulate and medial prefrontal cortex
accev,70,L_33pr_ROI-lh,470,490,-4.849118,1.059991e-04,anterior cingulate and medial prefrontal cortex
accev,81,L_44_ROI-lh,110,120,-3.952338,6.604948e-04,inferior frontal cortex
accev,152,L_8Ad_ROI-lh,220,260,-8.475903,5.198233e-08,dorsolateral prefrontal cortex


## Investigate individual regressors

In [14]:
r_name = 'motoprep'
#r_name = 'trial_time'
r_clusters = clusters.xs(r_name, level='regressor').copy()

src_df = ss.load_src_df(basefile, r_name)

In [17]:
src_df_masked = ss.load_src_df?

In [23]:
src_df_masked = ss.load_src_df(basefile, r_name)

In [24]:
src_df_masked.mu_mean.reset_index('label').pivot(columns='label')

Unnamed: 0_level_0,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean,mu_mean
label,???-lh,???-rh,L_10d_ROI-lh,L_10pp_ROI-lh,L_10r_ROI-lh,L_10v_ROI-lh,L_11l_ROI-lh,L_13l_ROI-lh,L_1_ROI-lh,L_23c_ROI-lh,...,R_p24_ROI-rh,R_p24pr_ROI-rh,R_p32_ROI-rh,R_p32pr_ROI-rh,R_p47r_ROI-rh,R_p9-46v_ROI-rh,R_pOFC_ROI-rh,R_s32_ROI-rh,R_s6-8_ROI-rh,R_v23ab_ROI-rh
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,0.043901,0.034538,0.046118,0.053425,0.038276,0.028967,0.051728,0.036065,0.048666,0.025735,...,0.035931,0.048893,0.034143,0.033547,0.032161,0.044650,0.064914,0.038109,0.035353,0.039839
10,0.361928,0.086091,0.243153,0.284743,0.124399,0.094229,0.150971,0.325207,0.055046,0.168448,...,0.178863,0.060567,0.101554,0.029349,0.039108,0.184785,0.156507,0.061704,0.116651,0.100442
20,0.213201,0.083428,0.123182,0.127035,0.041220,0.034687,0.045120,0.150629,0.048058,0.088870,...,0.096501,0.047914,0.040346,0.040847,0.045606,0.061716,0.055350,0.039095,0.061056,0.114951
30,0.059133,0.040350,0.055692,0.071923,0.037995,0.038349,0.035020,0.077627,0.038964,0.069150,...,0.062199,0.045166,0.039034,0.052979,0.044601,0.037651,0.053455,0.027458,0.029884,0.053695
40,0.035101,0.035025,0.073773,0.066980,0.049376,0.055246,0.071471,0.063822,0.053890,0.041604,...,0.067802,0.036841,0.058219,0.038430,0.040709,0.041532,0.066340,0.039334,0.030546,0.049699
50,0.049290,0.035809,0.036917,0.046169,0.047343,0.048515,0.044333,0.062273,0.050518,0.039478,...,0.067856,0.040214,0.054218,0.044033,0.045197,0.047164,0.056145,0.043067,0.040480,0.043627
60,0.034262,0.039269,0.042352,0.046851,0.045065,0.051625,0.028577,0.057608,0.047129,0.037774,...,0.044884,0.037356,0.039834,0.024751,0.038570,0.035187,0.048663,0.037184,0.032014,0.047193
70,0.042730,0.039907,0.047408,0.054914,0.054107,0.049137,0.041558,0.044806,0.035794,0.039180,...,0.039736,0.040075,0.041901,0.028160,0.037209,0.034811,0.048676,0.032606,0.032962,0.036028
80,0.044111,0.029706,0.036830,0.061747,0.050120,0.041056,0.040877,0.044763,0.038810,0.029337,...,0.041607,0.039270,0.030441,0.026107,0.033813,0.037267,0.056471,0.025352,0.024697,0.032573
90,0.047772,0.034577,0.023789,0.033627,0.030807,0.027592,0.029444,0.025636,0.032111,0.026979,...,0.034283,0.031488,0.026093,0.027170,0.025513,0.031022,0.052749,0.024507,0.021462,0.027207


### Check the times at which we see significant clusters

In [15]:
times = src_df[measure].index.levels[1]
sigareas = r_clusters.label.unique()
significant = pd.DataFrame(np.zeros((sigareas.size, times.size), bool), 
                           index=sigareas, columns=times)
for row in r_clusters.itertuples():
    significant.loc[row.label, slice(row.start_t, row.end_t)] = True
    
fig, ax = plt.subplots()
left = significant.columns - 5
colors = plt.rcParams['axes.prop_cycle']._left

# plot left hemi 
numlh = significant[significant.index.map(lambda l: l[0]) == 'L'].sum()
ax.bar(left, height=numlh, width=10, bottom=0, color=colors[0]['color'])
# plot right hemi
ax.bar(left, 
       height=significant[significant.index.map(lambda l: l[0]) == 'R'].sum(), 
       width=10, bottom=numlh, color=colors[1]['color'])
ax.set_ylabel('number of areas with significant effect');
ax.legend(['L', 'R']);
ax.set_xlim(left[0], left[-1]+10);

<IPython.core.display.Javascript object>

### Count number of time points at which areas are in significant cluster

In [16]:
atmp = significant.sum(axis=1).sort_values(ascending=False)
atmp = pd.concat([atmp, pd.Series(list(map(ss.get_Glasser_section, atmp.index)), index=atmp.index)], 
                 axis=1, keys=['count', 'region'])
print(atmp.head(20))
del atmp

               count                                           region
R_OFC_ROI-rh      36                 orbital and polar frontal cortex
R_TA2_ROI-rh      31                      auditory association cortex
R_pOFC_ROI-rh     31                 orbital and polar frontal cortex
R_p24_ROI-rh      27  anterior cingulate and medial prefrontal cortex
L_TGv_ROI-lh      27                          lateral temporal cortex
R_TGd_ROI-rh      26                          lateral temporal cortex
R_13l_ROI-rh      26                 orbital and polar frontal cortex
L_LO3_ROI-lh      24         MT+ complex and neighboring visual areas
R_23c_ROI-rh      21                       posterior cingulate cortex
R_EC_ROI-rh       20                           medial temporal cortex
R_46_ROI-rh       19                   dorsolateral prefrontal cortex
R_DVT_ROI-rh      17                       posterior cingulate cortex
L_2_ROI-lh        17                   somatosensory and motor cortex
R_V6A_ROI-rh      16

### Identify areas

In [11]:
def get_areas(times):
    if type(times) is not list:
        times = [times]
    
    return significant[np.all(significant[times], axis=1)].index

def active_areas(times, times2=None):
    """Returns areas that are reliably activated at all time points in times.
       If times2 is provided, intersection and set differences of areas 
       associated with times and times2 are indicated."""
    
    print(r_name)
    
    areas = get_areas(times)
    
    if times2 is None:
        areas2 = pd.Index([])
    else:
        areas2 = get_areas(times2)
    
    areas = pd.concat([pd.Series(areas.difference(areas2)), 
                       pd.Series(areas.intersection(areas2)), 
                       pd.Series(areas2.difference(areas))],
                      keys=['only 1', 'intersect', 'only 2'])
    areas = pd.DataFrame(areas, columns=['area'])
    
    areas['region'] = areas.area.apply(ss.get_Glasser_section)
    
    return areas

In [12]:
active_areas(120, 220)

response


Unnamed: 0,Unnamed: 1,area,region
only 1,0,L_23d_ROI-lh,posterior cingulate cortex
only 1,1,R_V6_ROI-rh,dorsal stream visual cortex
intersect,0,L_d23ab_ROI-lh,posterior cingulate cortex
intersect,1,R_23d_ROI-rh,posterior cingulate cortex
intersect,2,R_d23ab_ROI-rh,posterior cingulate cortex
only 2,0,L_V6_ROI-lh,dorsal stream visual cortex


In [13]:
active_areas(550)

response


Unnamed: 0,Unnamed: 1,area,region
only 1,0,L_24dd_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,1,L_24dv_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,2,L_3a_ROI-lh,somatosensory and motor cortex
only 1,3,L_SCEF_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,4,L_V3CD_ROI-lh,MT+ complex and neighboring visual areas
only 1,5,L_VVC_ROI-lh,ventral stream visual cortex
only 1,6,R_24dv_ROI-rh,paracentral lobular and mid cingulate cortex
only 1,7,R_p24pr_ROI-rh,anterior cingulate and medial prefrontal cortex


In [91]:
aa = active_areas(200)
df = pd.concat([aa.area.apply(lambda s: s[0]), aa.area.apply(lambda s: s[2:-7])],
               axis=1, keys=['hemi', 'area'])

lh = df[df.hemi == 'L'].area
rh = df[df.hemi == 'R'].area

print('\nonly L:')
print(', '.join(np.setdiff1d(lh.values, rh.values)))

print('\nonly R:')
print(', '.join(np.setdiff1d(rh.values, lh.values)))

print('\ncommon:')
print(', '.join(np.intersect1d(rh.values, lh.values)))

abs_dot_y

only L:
31pd, 31pv, 7m, d23ab

only R:
PHA1, VVC, v23ab

common:
23d


In [12]:
aa = active_areas(580, 650)
print('\nonly 1:')
print(aa.loc['only 1'])
print('\nintersect:')
print(aa.loc['intersect'])
print('\nonly 2:')
print(aa.loc['only 2'])

response

only 1:
            area                                    region
0   L_MST_ROI-lh  MT+ complex and neighboring visual areas
1  L_V3CD_ROI-lh  MT+ complex and neighboring visual areas

intersect:
              area                                           region
0       L_1_ROI-lh                   somatosensory and motor cortex
1     L_23d_ROI-lh                       posterior cingulate cortex
2    L_24dd_ROI-lh     paracentral lobular and mid cingulate cortex
3    L_24dv_ROI-lh     paracentral lobular and mid cingulate cortex
4       L_2_ROI-lh                   somatosensory and motor cortex
5     L_31a_ROI-lh                       posterior cingulate cortex
6    L_33pr_ROI-lh  anterior cingulate and medial prefrontal cortex
7      L_3a_ROI-lh                   somatosensory and motor cortex
8      L_3b_ROI-lh                   somatosensory and motor cortex
9       L_4_ROI-lh                   somatosensory and motor cortex
10     L_5m_ROI-lh     paracentral lobular an

In [None]:
active_areas(490, 600)

In [None]:
active_areas(580, 650)

### Show significant clusters

In [12]:
def get_cluster_tcs(clus, baseval=0):

    mask = pd.Series(np.zeros_like(src_df[measure], dtype=bool), index=src_df[measure].index)
    for row in clus.itertuples():
        mask.loc[row.label, slice(row.start_t, row.end_t)] = True

    active = src_df[measure].where(mask, other=baseval)
    active = active.reset_index('label')
    active = active.pivot(columns='label')[measure]

    # get labels of active areas in given clusters
    maxvals = active.max()
    labels = maxvals.index[maxvals > baseval]

    # select only active areas
    active = active[labels]

    # get hemisphere of labels
    hemis = labels.map(lambda l: l[-2].upper())
    labels = labels.map(lambda l: l[2:-7])

    active.columns = pd.MultiIndex.from_arrays([hemis, labels],
                                               names=['hemi', 'label'])

    return active

### clusters from a particular region

In [14]:
print(r_name)

region = 'dorso'

sv.show_timecourses(get_cluster_tcs(r_clusters[r_clusters.region.apply(lambda s: s.startswith(region))]), 
                    [threshold, ylims[measure]], logy=logy[measure])

dot_x


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### clusters associated with a particular area

In [None]:
print(r_name)

# put the full area name in underscores such as '_1_' to match exactly
area = '_VMV1_'

sv.show_timecourses(get_cluster_tcs(r_clusters[r_clusters.label.apply(lambda s: s.find(area)>0)]), 
                    [threshold, ylims[measure]], logy=logy[measure])

### clusters with reliable effect around a particular time

In [15]:
print(r_name)

time = 230

sv.show_timecourses(get_cluster_tcs(r_clusters[(time >= r_clusters.start_t) & (time <= r_clusters.end_t)]), 
                    [threshold, ylims[measure]], logy=logy[measure])

dot_x


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Investigate consistency

In [15]:
src_df.loc[('L_8Ad_ROI-lh', 350)]

mu_mean         0.024773
mu_std          0.006247
mu_t            3.965776
mu_testval      0.001365
mu_p_large      0.791000
sigma_mean      0.024500
sigma_std       0.005728
theta_mean      0.802082
theta_std       0.102769
lp_mean        61.247414
lp_std          1.619361
overlap        -1.858091
consistency     0.691500
Name: (L_8Ad_ROI-lh, 350), dtype: float64

In [16]:
for cl in r_clusters.itertuples():
    r_clusters.loc[cl.Index, 'mean_consistency'] = (
        src_df.loc[(cl.label, slice(cl.start_t, cl.end_t)), 'consistency'].mean())
    r_clusters.loc[cl.Index, 'max_consistency'] = (
        src_df.loc[(cl.label, slice(cl.start_t, cl.end_t)), 'consistency'].max())

r_clusters[r_clusters.max_consistency < 0.5].sort_values('start_t')

Unnamed: 0_level_0,label,start_t,end_t,log10p,pval_corrected,region,mean_consistency,max_consistency
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
95,L_52_ROI-lh,180,190,-4.736629,0.000198,insular and frontal opercular cortex,0.043,0.058
475,L_a32pr_ROI-lh,300,320,-5.077124,0.0001,anterior cingulate and medial prefrontal cortex,0.288667,0.383
724,R_EC_ROI-rh,490,510,-4.929821,0.000136,medial temporal cortex,0.168167,0.2185
772,R_LBelt_ROI-rh,510,530,-4.513193,0.000307,early auditory cortex,0.306667,0.4055
511,L_s32_ROI-lh,540,570,-5.9676,1.5e-05,anterior cingulate and medial prefrontal cortex,0.317125,0.368
709,R_AIP_ROI-rh,570,580,-3.978405,0.000918,superior parietal cortex,0.0825,0.141
335,L_POS2_ROI-lh,640,660,-4.692268,0.000213,posterior cingulate cortex,0.401667,0.438
