In [3]:
# Load the neuroelectro data dump.  
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer,Imputer
import pandas
df = pandas.read_csv('http://dev.neuroelectro.org/static/src/article_ephys_metadata_curated.csv',delimiter='\t')

In [4]:
# define some python functions for working with the neurotree API

import urllib2
import json

def get_neurotree_node_id_from_pmid(pmid):
    query_url = 'http://neurotree.org/beta/include/check_pmid.php?term=%s' % pmid
    response = urllib2.urlopen(query_url)
    data = json.load(response)   
    if data:
        for author in data:
            if author['authorRank'] == 0 and author['value'] > .3:
                node_id = data[0]['pid']
                return node_id
        return None

def get_neurotree_node_info(neurotree_node_id):
    query_url = 'http://neurotree.org/neurotree/jsonQuery.php?querytype=node&pid=%s' % neurotree_node_id
    response = urllib2.urlopen(query_url)
    data = json.load(response)[0]
    return data
    
def get_investigator_path_len(neurotree_node_id_1, neurotree_node_id_2):
    if neurotree_node_id_1 == neurotree_node_id_2:
        return 0, None
    if neurotree_node_id_1 is None or neurotree_node_id_2 is None:
        return None, None
    DEFAULT_MAX_STEPS = 20
    query_url = 'http://neurotree.org/neurotree/distance.php?pid1=%s&pid2=%s&refresh=1&includera=1&includepd=1&includers=1&backonly=1&dispformat=json&maxsteps=%s' %  (neurotree_node_id_1, neurotree_node_id_2, DEFAULT_MAX_STEPS)
    try:
        response = urllib2.urlopen(query_url)
    except:
        return None, None
    try:
        data = json.load(response)   
    except:
        return None, None
    print data
    if data:
        if data['stepstaken'] == DEFAULT_MAX_STEPS:
            path_len = np.inf
            common_inv_name = None
        elif 'stepcount' in data and data['stepcount']:
            path_len = data['stepcount']
            common_inv = data['path1'][0]
            #common_inv_name = get_neurotree_node_info(common_inv)['lastname']
            common_inv_name = common_inv
        else:
            path_len = None
            common_inv_name = None
        #print common_inv_name
    else:
        path_len = np.inf
        common_inv_name = None
        
    return path_len, common_inv_name
    

get_neurotree_node_info(4800)
node_id = get_neurotree_node_id_from_pmid(9185537)
get_investigator_path_len(77014, 4800)
get_investigator_path_len(2218, 196)

{u'path2': [2715, 77014], u'pathcount': 2, u'stepstaken': 1, u'stepcount': 2, u'path1': [2715, 4800], u'name2': u'Richard C Gerkin', u'name1': u'Shreejoy J Tripathy', u'pid1': 77014, u'pid2': 4800, u'backonly': u'1'}
{u'path2': None, u'pathcount': None, u'stepstaken': 8, u'stepcount': None, u'path1': None, u'name2': u'Claude  Bernard', u'name1': u'Daniel L Alkon', u'pid1': 2218, u'pid2': 196, u'backonly': u'1'}


(None, None)

In [5]:
neurotree_id_list = []

ca1_match = df['NeuronName'].isin(['Hippocampus CA1 pyramidal cell'])
pmid_list = list(set(df['Pmid'][ca1_match]))

for pmid in pmid_list:
    print pmid
    node_id = get_neurotree_node_id_from_pmid(pmid)
    neurotree_id_list.append(node_id)

9185537
17884930
14645379
11495958
8931019
21160001
18353290
20646048
20510860
15107470
10864941
17991696
12895506
22171027
22279188
16002454
11080262
10878106
10218778
16481438
20639007
20421280
11283235
10799755
12815013
11287462
17202472
9120553
11483306
19302151
15381293
12843280
20014384
24166578
21215795
9405530
16367774
22423094
19261881
24399930
16542781
12867518
11247989
23536065
22157122
19103683
15140933
19675296
16533575
17291464
9503336
16237175
20592204
11067982
15190096
18674519
21289178
19914331
10087347
12091536
22131424
12699792
21593314
26224869
9310439
18683240
23177962
16571755
14573548
22367983
21635953
11404402
12379251
12379253
19710327
25628536
15661817
16192378
11739583
17715197
10924671


In [6]:
# print some simple stats on how many (CA1 pyr cell) authors have neurotree nodes

print neurotree_id_list
unique_neurotree_nodes = sum(x is not None for x in neurotree_id_list)

unique_pubs_len = len(pmid_list)
unique_neurotree_nodes

print 'unique authors publishing on CA1 pyramidal cells: %s' % unique_pubs_len
print 'found neurotree author ids: %s' % unique_neurotree_nodes

[u'1157', u'3677', None, u'2218', None, u'2613', u'18036', None, u'2223', None, u'9675', None, None, u'2492', None, u'1484', u'3776', u'13466', None, u'10162', u'81966', None, u'9675', None, u'458', u'5362', None, u'1645', None, u'50988', u'1645', None, u'900', u'64854', u'8474', None, u'29322', u'196', u'455', u'3776', None, None, u'9024', u'458', u'2701', u'2347', u'9675', u'1404', None, u'3355', u'4836', u'9675', None, u'1484', u'458', None, u'28752', u'7080', None, u'5067', u'2084', u'10080', None, None, u'8152', None, u'1484', None, u'3542', u'28749', u'2534', u'9675', u'1645', u'30514', u'5894', u'6909', None, u'4810', u'42508', u'5594', None]
unique authors publishing on CA1 pyramidal cells: 81
found neurotree author ids: 55


In [None]:
get_investigator_path_len(neurotree_id_list[0], neurotree_id_list[5])

{u'path2': [146, 24589, 3924, 24618, 2017, 1835, 1157], u'pathcount': 2, u'stepstaken': 6, u'stepcount': 12, u'path1': [146, 6684, 115, 114, 1545, 4452, 2613], u'name2': u'Allan T Gulledge', u'name1': u'Karel  Svoboda', u'pid1': 1157, u'pid2': 2613, u'backonly': u'1'}


(12, 146)

In [None]:
# for every pair of nodes in neurotree, calculate their path length and adjoining adviser

unique_neurotree_id_list = list(set(neurotree_id_list))
print unique_neurotree_id_list

unique_neurotree_id_list = unique_neurotree_id_list
num_nodes = len(unique_neurotree_id_list)
common_ancestor_list = []
path_len_mat = np.empty((num_nodes, num_nodes))
path_len_mat[:] = np.NAN
for ind1 in range(num_nodes):
    print ind1
    id1 = unique_neurotree_id_list[ind1]
    for ind2 in range(ind1,num_nodes):
        id2 = unique_neurotree_id_list[ind2]
    for id2 in unique_neurotree_id_list:
        pathlen, common_ancestor = get_investigator_path_len(id1, id2)
        common_ancestor_list.append(common_ancestor)
        path_len_mat[ind1,ind2] = pathlen

[u'458', u'2218', u'2613', u'3776', u'30514', u'196', u'8152', u'10162', u'13466', u'9675', u'18036', u'1484', u'42508', u'4836', u'3677', u'6909', u'2347', u'10080', u'2223', u'28749', u'1404', u'1645', u'5067', None, u'1157', u'2701', u'455', u'3355', u'50988', u'81966', u'5894', u'8474', u'5362', u'7080', u'900', u'4810', u'5594', u'3542', u'2492', u'64854', u'2084', u'29322', u'28752', u'2534', u'9024']
0
{u'path2': None, u'pathcount': None, u'stepstaken': 20, u'stepcount': None, u'path1': None, u'name2': u'Daniel L Alkon', u'name1': u'John  Disterhoft', u'pid1': 458, u'pid2': 2218, u'backonly': u'1'}
{u'path2': [115, 110, 111, 143, 458], u'pathcount': 2, u'stepstaken': 4, u'stepcount': 8, u'path1': [115, 114, 1545, 4452, 2613], u'name2': u'Allan T Gulledge', u'name1': u'John  Disterhoft', u'pid1': 458, u'pid2': 2613, u'backonly': u'1'}
{u'path2': [115, 110, 111, 143, 458], u'pathcount': 2, u'stepstaken': 4, u'stepcount': 7, u'path1': [115, 114, 600, 3776], u'name2': u'Ole  Paulsen

In [None]:
path_len_mat