# Investigating which areas show significant second level effects at which times

In [1]:
%matplotlib notebook
import numpy as np
import helpers
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import source_visualisations as sv
import mne

## Data preparation
Which data?

In [2]:
# baseline [-0.3, 0], trialregs_dot=5
# basefile = 'source_HCPMMP1_allsubs_201703301614.h5'

# baseline None, trialregs_dot=5
#basefile = 'source_HCPMMP1_allsubs_201706091054.h5'

# baseline (-0.3, 0), trialregs_dot=5, GLM in source space
#basefile = 'source_sequential_201706141650.h5'

# baseline (-0.3, 0), trialregs_dot=5, GLM in source space, move_dist, sum_dot_y
#basefile = 'source_sequential_201706191442.h5'

# baseline (-0.3, 0), trialregs_dot=5, GLM in source space, move_dist, 
# sum_dot_y, constregs=0 for 1st dot
#basefile = 'source_sequential_201706201654.h5'

# baseline (-0.3, 0), only first 3 dots, trialregs_dot=3, GLM in source space, move_dist, 
# sum_dot_y, constregs=0 for 1st dot
#basefile = 'source_sequential_201706261151.h5'

# label mode = (abs)max, baseline (-0.3, 0), only first 3 dots, trialregs_dot=3, GLM in source space, 
# move_dist, sum_dot_y, constregs=0 for 1st dot
#basefile = 'source_sequential_201706281100.h5'

# baseline (-0.3, 0), only first 3 dots, trialregs_dot=3, move_dist, 
# sum_dot_y, constregs=0 for 1st dot, 
# label_tc normalised across trials, times and subjects
basefile = 'source_sequential_201707031206.h5'

choose measure and identify threshold

In [3]:
# e.g.: mu_p_large, mu_testval, mu_t, mu_mean
measure = 'mu_p_large'

# which regressors should define the empirical value distribution?
# set regressors = None for all
regressors = ['dot_x', 'dot_y', 'abs_dot_x', 'abs_dot_y']

# do not use these regressors to define empirical value distribution
exclude = 'trialregs'

# the empiric error threshold for a single time point and area
# (the selected threshold will be the quantile 1-alpha of the 
# empirical distribution of measure values)
alpha = 0.05

threshold, measure_cdf = sv.find_slabs_threshold(
    basefile, measure, quantile=1-alpha, regressors=regressors, 
    exclude=exclude, verbose=1, return_cdf=True)

print('\nthreshold = {:.4f}'.format(threshold))

excluding:
intercept
entropy
response
trial_time

adding abs_dot_x
adding abs_dot_y
adding dot_x
adding dot_y
N = 101360

threshold = 0.5100


what are large values for the possible measures (used for setting y-lim in plotting)

In [4]:
ylims = {'mu_p_large': 1, 'mu_t': 8, 'mu_z': 8, 'mu_testval': 0.02, 'mu_mean': 0.04}
logy = {'mu_p_large': False, 'mu_t': False, 'mu_z': False, 'mu_testval': True, 'mu_mean': False}

## Load data for all selected regressors and identify significant clusters in time
The idea is that you identify sequences of high measure values that cannot happen by chance. This is all based on the empirical measure distribution used above. This distribution defines the probability with which I would get a particular measure value, if I were to draw one randomly across locations, time points and regressors. So it is a permutation distribution. I then use the cumulative density function of this distribution to first select a cluster threshold, i.e., a measure value which has to be exceeded in order to form a sequence cluster. Then I identify clusters, i.e., sequences of measure values within an area that exceed the threshold. The p-value of the found cluster is the product of 1-cdf(value) for the measure values in the sequence.

In [5]:
clusters = []
for r_name in regressors:
    srcfile = basefile[:-3] + '_slabs_%s.h5' % r_name
    file = 'mne_subjects/fsaverage/bem/' + srcfile
    src_df = pd.read_hdf(file, 'second_level_src')
    clusters.append(sv.get_time_clusters(src_df[measure], threshold, measure_cdf))

clusters = pd.concat(clusters, keys=regressors, names=['regressor', 'cluster'])
clusters.label.groupby(level='regressor').count()

regressor
dot_x        1088
dot_y         512
abs_dot_x     852
abs_dot_y     573
Name: label, dtype: int64

now FDR-correct the found clusters

In [6]:
cluster_alpha = 0.001
reject, pval = mne.stats.fdr_correction(10**clusters.log10p, cluster_alpha)
clusters['pval_corrected'] = pval
clusters = clusters[reject]

# add region of area for information
def get_region(area):
    if area.startswith('??'):
        section = 'whole hemisphere'
    else:
        section = sv.Glasser_areas[sv.Glasser_areas['area name'] == area[2:-7]]['main section']
        section = sv.Glasser_sections.loc[section].name.values[0]
    
    return section
clusters['region'] = clusters.label.apply(get_region)

clusters.sort_values('start_t').sort_index(level='regressor', sort_remaining=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,label,start_t,end_t,log10p,pval_corrected,region
regressor,cluster,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
dot_x,103,L_5L_ROI-lh,30,80,-9.612352,6.535747e-09,paracentral lobular and mid cingulate cortex
dot_x,979,R_V2_ROI-rh,100,130,-10.386901,1.193414e-09,early visual cortex
dot_x,718,R_DVT_ROI-rh,100,130,-6.822627,2.556690e-06,posterior cingulate cortex
dot_x,421,L_V3A_ROI-lh,110,130,-5.260115,7.055753e-05,dorsal stream visual cortex
dot_x,908,R_ProS_ROI-rh,110,130,-4.809204,1.784704e-04,posterior cingulate cortex
dot_x,415,L_V2_ROI-lh,110,130,-6.834924,2.544640e-06,early visual cortex
dot_x,998,R_V3_ROI-rh,110,130,-6.442309,5.905340e-06,early visual cortex
dot_x,518,L_v23ab_ROI-lh,110,130,-7.546101,5.659540e-07,posterior cingulate cortex
dot_x,424,L_V3B_ROI-lh,110,140,-6.832061,2.544640e-06,dorsal stream visual cortex
dot_x,1084,R_v23ab_ROI-rh,110,120,-5.361814,5.717196e-05,posterior cingulate cortex


In [7]:
clusters.label.groupby(level='regressor').count()

regressor
dot_x        234
dot_y         46
abs_dot_x     50
abs_dot_y     21
Name: label, dtype: int64

## Find all clusters of a given area or region

In [8]:
area = 'v23ab'
clusters[clusters.label.apply(lambda s: s.find(area) >= 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,label,start_t,end_t,log10p,pval_corrected,region
regressor,cluster,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
dot_x,518,L_v23ab_ROI-lh,110,130,-7.546101,5.65954e-07,posterior cingulate cortex
dot_x,519,L_v23ab_ROI-lh,150,190,-11.856874,4.725651e-11,posterior cingulate cortex
dot_x,521,L_v23ab_ROI-lh,290,560,-72.412068,5.85635e-70,posterior cingulate cortex
dot_x,1084,R_v23ab_ROI-rh,110,120,-5.361814,5.717196e-05,posterior cingulate cortex
dot_x,1085,R_v23ab_ROI-rh,150,200,-12.16807,2.416775e-11,posterior cingulate cortex
dot_x,1087,R_v23ab_ROI-rh,290,560,-74.232264,1.7719910000000001e-71,posterior cingulate cortex
abs_dot_x,850,R_v23ab_ROI-rh,580,600,-4.351296,0.000424982,posterior cingulate cortex
abs_dot_y,570,R_v23ab_ROI-rh,190,210,-4.325864,0.0004477858,posterior cingulate cortex


In [9]:
region = 'dorsolat'
clusters[clusters.region.apply(lambda s: s.find(region) >= 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,label,start_t,end_t,log10p,pval_corrected,region
regressor,cluster,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
dot_x,147,L_8BL_ROI-lh,420,430,-4.026945,0.000824,dorsolateral prefrontal cortex
dot_x,155,L_8C_ROI-lh,440,460,-5.489748,4.3e-05,dorsolateral prefrontal cortex
dot_x,377,L_SFL_ROI-lh,450,480,-6.862985,2e-06,dorsolateral prefrontal cortex
dot_x,1049,R_i6-8_ROI-rh,440,460,-5.257434,7.1e-05,dorsolateral prefrontal cortex
abs_dot_x,348,L_a9-46v_ROI-lh,290,310,-4.002392,0.000869,dorsolateral prefrontal cortex
abs_dot_x,370,L_s6-8_ROI-lh,310,340,-6.66279,4e-06,dorsolateral prefrontal cortex
abs_dot_x,489,R_8Av_ROI-rh,310,320,-3.950332,0.000969,dorsolateral prefrontal cortex
abs_dot_x,494,R_8C_ROI-rh,330,350,-4.882728,0.000152,dorsolateral prefrontal cortex
abs_dot_x,503,R_9p_ROI-rh,320,330,-3.951344,0.000969,dorsolateral prefrontal cortex


## Investigate individual regressors

In [10]:
r_name = 'dot_x'
r_clusters = clusters.xs(r_name, level='regressor').copy()

srcfile = basefile[:-3] + '_slabs_%s.h5' % r_name
file = 'mne_subjects/fsaverage/bem/' + srcfile
src_df = pd.read_hdf(file, 'second_level_src')

### Check the times at which we see significant clusters

In [11]:
times = src_df[measure].index.levels[1]
sigareas = r_clusters.label.unique()
significant = pd.DataFrame(np.zeros((sigareas.size, times.size), bool), 
                           index=sigareas, columns=times)
for row in r_clusters.itertuples():
    significant.loc[row.label, slice(row.start_t, row.end_t)] = True
    
fig, ax = plt.subplots()
left = significant.columns - 5
colors = plt.rcParams['axes.prop_cycle']._left

# plot left hemi 
numlh = significant[significant.index.map(lambda l: l[0]) == 'L'].sum()
ax.bar(left, height=numlh, width=10, bottom=0, color=colors[0]['color'])
# plot right hemi
ax.bar(left, 
       height=significant[significant.index.map(lambda l: l[0]) == 'R'].sum(), 
       width=10, bottom=numlh, color=colors[1]['color'])
ax.set_ylabel('number of areas with significant effect');
ax.legend(['L', 'R']);
ax.set_xlim(left[0], left[-1]+10);

<IPython.core.display.Javascript object>

### Identify areas

In [12]:
def get_areas(times):
    if type(times) is not list:
        times = [times]
    
    return significant[np.all(significant[times], axis=1)].index

def active_areas(times, times2=None):
    """Returns areas that are reliably activated at all time points in times.
       If times2 is provided, intersection and set differences of areas 
       associated with times and times2 are indicated."""
    
    print(r_name)
    
    areas = get_areas(times)
    
    if times2 is None:
        areas2 = pd.Index([])
    else:
        areas2 = get_areas(times2)
    
    areas = pd.concat([pd.Series(areas.difference(areas2)), 
                       pd.Series(areas.intersection(areas2)), 
                       pd.Series(areas2.difference(areas))],
                      keys=['only 1', 'intersect', 'only 2'])
    areas = pd.DataFrame(areas, columns=['area'])
    
    areas['region'] = areas.area.apply(get_region)
    
    return areas

In [13]:
active_areas(120, 170)

dot_x


Unnamed: 0,Unnamed: 1,area,region
only 1,0,L_V1_ROI-lh,primary visual cortex
only 1,1,L_V3B_ROI-lh,dorsal stream visual cortex
only 1,2,R_ProS_ROI-rh,posterior cingulate cortex
only 1,3,R_RI_ROI-rh,early auditory cortex
only 1,4,R_V3_ROI-rh,early visual cortex
intersect,0,L_V2_ROI-lh,early visual cortex
intersect,1,L_V3A_ROI-lh,dorsal stream visual cortex
intersect,2,L_V3_ROI-lh,early visual cortex
intersect,3,L_v23ab_ROI-lh,posterior cingulate cortex
intersect,4,R_DVT_ROI-rh,posterior cingulate cortex


In [14]:
active_areas(320, 490)

dot_x


Unnamed: 0,Unnamed: 1,area,region
only 1,0,L_31a_ROI-lh,posterior cingulate cortex
only 1,1,L_31pd_ROI-lh,posterior cingulate cortex
only 1,2,L_5L_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,3,L_5m_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,4,L_5mv_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,5,L_7Am_ROI-lh,superior parietal cortex
only 1,6,L_LO1_ROI-lh,MT+ complex and neighboring visual areas
only 1,7,L_MST_ROI-lh,MT+ complex and neighboring visual areas
only 1,8,L_PCV_ROI-lh,posterior cingulate cortex
only 1,9,L_PIT_ROI-lh,ventral stream visual cortex


In [15]:
active_areas(490, 600)

dot_x


Unnamed: 0,Unnamed: 1,area,region
only 1,0,L_1_ROI-lh,somatosensory and motor cortex
only 1,1,L_24dv_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,2,L_2_ROI-lh,somatosensory and motor cortex
only 1,3,L_31pv_ROI-lh,posterior cingulate cortex
only 1,4,L_33pr_ROI-lh,anterior cingulate and medial prefrontal cortex
only 1,5,L_3a_ROI-lh,somatosensory and motor cortex
only 1,6,L_3b_ROI-lh,somatosensory and motor cortex
only 1,7,L_4_ROI-lh,somatosensory and motor cortex
only 1,8,L_6d_ROI-lh,premotor cortex
only 1,9,L_7m_ROI-lh,posterior cingulate cortex


In [16]:
active_areas(300)

dot_x


Unnamed: 0,Unnamed: 1,area,region
only 1,0,L_24dv_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,1,L_31pd_ROI-lh,posterior cingulate cortex
only 1,2,L_31pv_ROI-lh,posterior cingulate cortex
only 1,3,L_33pr_ROI-lh,anterior cingulate and medial prefrontal cortex
only 1,4,L_3a_ROI-lh,somatosensory and motor cortex
only 1,5,L_3b_ROI-lh,somatosensory and motor cortex
only 1,6,L_4_ROI-lh,somatosensory and motor cortex
only 1,7,L_5L_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,8,L_5mv_ROI-lh,paracentral lobular and mid cingulate cortex
only 1,9,L_7Am_ROI-lh,superior parietal cortex


### Show significant clusters

In [17]:
def get_cluster_tcs(clus, baseval=0):

    mask = pd.Series(np.zeros_like(src_df[measure], dtype=bool), index=src_df[measure].index)
    for row in clus.itertuples():
        mask.loc[row.label, slice(row.start_t, row.end_t)] = True

    active = src_df[measure].where(mask, other=baseval)
    active = active.reset_index('label')
    active = active.pivot(columns='label')[measure]

    # get labels of active areas in given clusters
    maxvals = active.max()
    labels = maxvals.index[maxvals > baseval]

    # select only active areas
    active = active[labels]

    # get hemisphere of labels
    hemis = labels.map(lambda l: l[-2].upper())
    labels = labels.map(lambda l: l[2:-7])

    active.columns = pd.MultiIndex.from_arrays([hemis, labels],
                                               names=['hemi', 'label'])

    return active

### clusters from a particular region

In [18]:
print(r_name)

region = 'premotor'

sv.show_timecourses(get_cluster_tcs(r_clusters[r_clusters.region.apply(lambda s: s.startswith(region))]), 
                    [threshold, ylims[measure]], logy=logy[measure])

dot_x


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### clusters associated with a particular area

In [19]:
print(r_name)

# put the full area name in underscores such as '_1_' to match exactly
area = '_24dv_'

sv.show_timecourses(get_cluster_tcs(r_clusters[r_clusters.label.apply(lambda s: s.find(area)>0)]), 
                    [threshold, ylims[measure]], logy=logy[measure])

dot_x


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### clusters with reliable effect around a particular time

In [20]:
print(r_name)

time = 200

sv.show_timecourses(get_cluster_tcs(r_clusters[(time >= r_clusters.start_t) & (time <= r_clusters.end_t)]), 
                    [threshold, ylims[measure]], logy=logy[measure])

dot_x


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Investigate consistency

In [21]:
src_df.loc[('L_8C_ROI-lh', 450)]

mu_mean         0.024796
mu_std          0.004097
mu_t            6.051603
mu_testval      0.000550
mu_p_large      0.936000
sigma_mean      0.014791
sigma_std       0.003744
theta_mean      0.582226
theta_std       0.092782
lp_mean        67.354911
lp_std          1.756538
overlap        -3.061309
consistency     0.032500
Name: (L_8C_ROI-lh, 450), dtype: float64

In [22]:
for cl in r_clusters.itertuples():
    r_clusters.loc[cl.Index, 'mean_consistency'] = (
        src_df.loc[(cl.label, slice(cl.start_t, cl.end_t)), 'consistency'].mean())

r_clusters[r_clusters.mean_consistency < 0.5]

Unnamed: 0_level_0,label,start_t,end_t,log10p,pval_corrected,region,mean_consistency
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
11,L_10v_ROI-lh,550,570,-4.155024,0.0006338043,anterior cingulate and medial prefrontal cortex,0.374167
66,L_33pr_ROI-lh,610,630,-4.680578,0.0002270399,anterior cingulate and medial prefrontal cortex,0.477667
98,L_52_ROI-lh,180,190,-4.520593,0.0003082058,insular and frontal opercular cortex,0.05275
109,L_5m_ROI-lh,360,390,-5.982309,1.522111e-05,paracentral lobular and mid cingulate cortex,0.47875
147,L_8BL_ROI-lh,420,430,-4.026945,0.0008240648,dorsolateral prefrontal cortex,0.474
152,L_8BM_ROI-lh,370,390,-4.411649,0.0003781938,anterior cingulate and medial prefrontal cortex,0.415167
155,L_8C_ROI-lh,440,460,-5.489748,4.333801e-05,dorsolateral prefrontal cortex,0.0925
177,L_FOP2_ROI-lh,580,600,-4.927244,0.0001391708,insular and frontal opercular cortex,0.137833
272,L_OP2-3_ROI-lh,180,190,-5.115736,9.575768e-05,posterior opercular cortex,0.12325
334,L_POS2_ROI-lh,640,660,-4.71996,0.0002088605,posterior cingulate cortex,0.404167
