### Recalcualte distance from primary cortex including gustatory and olfactory primary cortex

In [1]:
# adapted from 3_regionwise_dist.py


import pandas as pd, numpy as np, nibabel as nib
import xml.etree.ElementTree as ET

data_dir = '/Users/sabine/workspace/distconnect_macaque/data'
yerkes_dir = '/Users/sabine/data/Yerkes19'

# data
df = pd.read_pickle('%s/df_pairwise.pkl' % data_dir)
areas_annot = pd.read_csv('%s/downloads/M132LH/areas.csv' % data_dir, header=None).values[:,1]
areas_key = pd.read_csv('%s/downloads/M132LH/key.csv' % data_dir, header=None).values[:,1]
cort = np.array([n for n, val in enumerate(areas_annot) if val != np.where(areas_key == 'MedialWall')[0]])


df_regionwise = pd.DataFrame.from_dict(data = {'area': df.source.unique()})


# vertices of injections sites
f = '%s/M132.L.InjSites_20150701.32k_fs.wb.foci' % yerkes_dir

tree = ET.parse(f)
root = tree.getroot()

areas = list()
nodes = list()

for t in root.findall('Focus'):
    areas.append(t[5].text[:t[5].text.index('_')])
    #nodes.append(t[16][3][1].text.split())
    nodes.append([int(n) for n in t[16][3][1].text.split()])
    
df_inj = pd.DataFrame({'area' : areas, 'injection sites' : nodes})



for area in set(df_inj.area[df_inj.area.duplicated()]):
    
    l = list()
    for nodes in df_inj['injection sites'][df_inj.area == area]:
        l.extend(nodes)
    df_inj['injection sites'].iloc[df_inj.index[df_inj.area == area][0]] = list(set(l))
    
df_inj.drop(df_inj.index[df_inj.area.duplicated()], inplace=True)
df_inj.replace({'area': {'9-46d': '9_46d',
                         '9-46v': '9_46v'}}, inplace=True)

df_inj.set_index([range(len(df_inj.index))], inplace=True)

df_regionwise = pd.merge(df_regionwise, df_inj, how='left')
del df_inj


# Yerkes19 surface
mwall = np.load('%s/medial_wall.lh.label.npy' % yerkes_dir)
cort = np.array([node for node in range(32492) if node not in mwall])
gdist_mat = np.load('%s/yerkes19_gdist_node-by-node.npy' % yerkes_dir)

f_M132 = '%s/MarkovCC12_M132_91-area.32k_fs_LR.dlabel.nii' % yerkes_dir
areas_annot = nib.load(f_M132).get_data().squeeze()[:28261]
areas_key_yerkes = np.load('%s/M132_key.npy' % yerkes_dir)


# vertices of each region
df_regionwise['vertices'] = np.nan
vertices = list()

for area in df_regionwise.area:
    area_idx = np.where(areas_key_yerkes == area)[0][0]
    area_nodes = cort[np.where(areas_annot == area_idx)[0]]
    vertices.append(list(area_nodes))
df_regionwise.vertices = vertices



# distance from pimary
primary_regions = list(df_regionwise.area[df_regionwise.area.isin(['V1', 'F1', 
                                                                   'Core', '3',
                                                                   'Pir', 'Gu'])])

# minimum distance of injection from nearest primary cortex
df_regionwise['nearest primary region from injection site'] = np.nan
df_regionwise['injection distance from nearest primary region'] = np.nan

for idx in df_regionwise[~df_regionwise['injection sites'].isnull()].index:
    inj_site = df_regionwise['injection sites'].iloc[idx]
    dists = list()
    for primary_nodes in df_regionwise.vertices[df_regionwise.area.isin(primary_regions)]:
        dists.append(gdist_mat[inj_site,:][:,primary_nodes].min(axis=1).mean())

    df_regionwise['nearest primary region from injection site'].iloc[idx] = primary_regions[np.argmin(np.array(dists))]
    df_regionwise['injection distance from nearest primary region'].iloc[idx] = np.min(np.array(dists))
    

# average minimum distance from all vertices within region to closest primary cortex
df_regionwise['average nearest primary region'] = np.nan
df_regionwise['average distance from nearest primary region'] = np.nan

for idx in df_regionwise[~df_regionwise['vertices'].isnull()].index:
    vertices = df_regionwise['vertices'].iloc[idx]
    dists = list()
    for primary_nodes in df_regionwise.vertices[df_regionwise.area.isin(primary_regions)]:
        dists.append(gdist_mat[vertices,:][:,primary_nodes].min(axis=1).mean())
        
    df_regionwise['average nearest primary region'].iloc[idx] = primary_regions[np.argmin(np.array(dists))]
    df_regionwise['average distance from nearest primary region'].iloc[idx] = np.min(np.array(dists))

    
# save
df_regionwise.to_pickle('%s/df_revrequest_regionwise.pkl' % data_dir)


In [17]:
# adapted from 4_pairwise_dist.py

import pandas as pd, numpy as np

df_regionwise = pd.read_pickle('%s/df_revrequest_regionwise.pkl' % data_dir)
df_pairwise = pd.read_pickle('%s/df_pairwise.pkl' % data_dir)

df_pairwise['GDIST'] = np.nan
df_pairwise['EuclDIST'] = np.nan
df_pairwise['GDIST min'] = np.nan


# gdist
#dist_mat = np.load('/nobackup/hunte1/sabine/data/tractdist/yerkes19_gdist_node-by-node.npy')

for target in df_regionwise.area[~df_regionwise['injection sites'].isnull()]:
    target_nodes = list(df_regionwise['injection sites'][df_regionwise.area == target])[0]
    
    for source in df_pairwise.source[df_pairwise.target == target]:  
        source_nodes = list(df_regionwise['vertices'][df_regionwise.area == source])[0]
        
        dist_mean = gdist_mat[target_nodes,:][:,source_nodes].astype('float32').mean()
        dist_min = gdist_mat[target_nodes,:][:,source_nodes].astype('float32').min()
        
        df_pairwise['GDIST'][(df_pairwise.source == source) &
                                  (df_pairwise.target == target)] = dist_mean
        df_pairwise['GDIST min'][(df_pairwise.source == source) &
                                  (df_pairwise.target == target)] = dist_min
     
    
    
df_pairwise.to_pickle('%s/df_revrequest_pairwise.pkl' % data_dir)

In [None]:
# adapted from 5_regionwise_DLN.py

df_regionwise = pd.read_pickle('%s/df_revrequest_regionwise.pkl' % data_dir)
df_pairwise = pd.read_pickle('%s/df_revrequest_pairwise.pkl' % data_dir)



df_regionwise['incoming'] = np.nan
df_regionwise['incoming_mindist'] = np.nan

for target in df_regionwise.area[~df_regionwise['injection sites'].isnull()]:
    sources = df_pairwise.source[df_pairwise.target == target]
    
    temp = df_pairwise[(df_pairwise.target == target) &
                   (df_pairwise.source.isin(sources))]
    
    dist = np.sum(temp.NEURONS * temp['GDIST'])/temp.NEURONS.sum()
    df_regionwise['incoming'][df_regionwise.area == target] = dist
    
    dist = np.sum(temp.NEURONS * temp['GDIST min'])/temp.NEURONS.sum()
    df_regionwise['incoming_mindist'][df_regionwise.area == target] = dist

    

df_regionwise['outgoing'] = np.nan
df_regionwise['outgoing_mindist'] = np.nan

for source in df_regionwise.area:    
    targets = df_pairwise.target[df_pairwise.source == source]
    
    temp = df_pairwise[(df_pairwise.source == source) &
                       (df_pairwise.target.isin(targets))]
    
    dist = np.sum(temp.NEURONS * temp['GDIST'])/temp.NEURONS.sum()
    df_regionwise['outgoing'][df_regionwise.area == source] = dist
    
    dist = np.sum(temp.NEURONS * temp['GDIST min'])/temp.NEURONS.sum()
    df_regionwise['outgoing_mindist'][df_regionwise.area == source] = dist
    
    
    
# save
df_regionwise.to_pickle('%s/df_revrequest_regionwise.pkl' % data_dir)

In [30]:
# adapted from 6_regionwise_covariates.py


import pandas as pd, numpy as np

df_r = pd.read_pickle('%s/df_revrequest_regionwise.pkl' % data_dir)
df_p = pd.read_pickle('%s/df_revrequest_pairwise.pkl' % data_dir)


# region size
df_r['region_size'] = np.nan

for area in df_r.area:
    n = len(list(df_r['vertices'][df_r.area == area])[0])
    df_r['region_size'][df_r.area == area] = n
    
# location of injection site within the cortex
#yerkes_dir ='/nobackup/hunte1/sabine/data/macaque/Yerkes19'
#dist_mat = np.load('/nobackup/hunte1/sabine/data/tractdist/yerkes19_gdist_node-by-node.npy')
#mwall = np.load('%s/medial_wall.lh.label.npy' % yerkes_dir)
#cort = np.array([node for node in range(32492) if node not in mwall])



df_r['location'] = np.nan
for i in df_r.index:   
    inj_nodes = df_r['injection sites'].iloc[i]
    if type(inj_nodes) == list:
        df_r['location'].iloc[i] = gdist_mat[inj_nodes][:,cort].astype('float32').mean()


        
# region's average geodesic distance from injection sites (covariate for outgoing)
df_r['temp'] = [list() for _ in df_r.index]

for i in df_r.index:
    src = df_r.area.iloc[i]
    df_r['temp'].iloc[i] = list(df_p['GDIST'][(df_p.source == src) &
                                              (~df_p['GDIST'].isnull())])
df_r['gdist_from_injs'] = np.nan
for i in df_r.index:
    df_r['gdist_from_injs'].iloc[i] = np.mean(df_r['temp'].iloc[i]) 
    
df_r.drop('temp', axis=1, inplace=True)


# save
df_r.to_pickle('%s/df_revrequest_regionwise.pkl' % data_dir)

# Analysis / Notebook

In [37]:
% matplotlib inline

import pandas as pd, numpy as np
import seaborn as sns
import utils.stats as stats
import utils.viz as viz
import statsmodels.formula.api as smf
import utils.viz as viz
from scipy.stats import shapiro
import matplotlib.pyplot as plt

sns.set_style('whitegrid')

df_r = pd.read_pickle('%s/df_revrequest_regionwise.pkl' % data_dir)
df_p = pd.read_pickle('%s/df_revrequest_pairwise.pkl' % data_dir)

In [38]:
df_r = df_r.rename(columns={'injection distance from nearest primary region': "dist_inj_prim",
                            'incoming': "connectivity_distance"})

# Main analysis

In [45]:
excl_areas = ['9', '10', '11', '12', '9_46d', '9_46v', '46d', '46v', 'TEpd'] 

fltr = df_r.area.isin(excl_areas)

df_r[['area', 'connectivity_distance', 'dist_inj_prim']][fltr]

Unnamed: 0,area,connectivity_distance,dist_inj_prim
21,TEpd,14.170338,27.890625
42,9_46d,17.711961,19.8125
44,9_46v,20.402974,19.125
63,46d,11.692618,25.078125
64,46v,,
68,11,,
69,12,,
81,10,16.634001,24.53125
82,9,,


In [46]:
fltr = (df_r.area.isin(excl_areas) | df_r.connectivity_distance.isnull())


In [None]:
null = smf.ols(formula='connectivity_distance ~ location + region_size', data=df_r[~fltr]).fit()

In [48]:
model = smf.ols(formula='connectivity_distance ~ location \
                                                 + region_size \
                                                 + dist_inj_prim', data=df_r[~fltr]).fit()
print model.summary()

                              OLS Regression Results                             
Dep. Variable:     connectivity_distance   R-squared:                       0.725
Model:                               OLS   Adj. R-squared:                  0.684
Method:                    Least Squares   F-statistic:                     17.61
Date:                   Sat, 14 Apr 2018   Prob (F-statistic):           7.83e-06
Time:                           14:31:58   Log-Likelihood:                -54.952
No. Observations:                     24   AIC:                             117.9
Df Residuals:                         20   BIC:                             122.6
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept       