In [1]:
from similarity import similarity

# Computes the intra and inter semantic similarity between two neurons
# in: n1@str, n2@str: the two neurons to measure similarity
# out: (score:float, [(matching_properties, explanation@str)])
#similarity('PV Layer 1 cortex acetylcholine neuron', 'Layer 1a striatum gaba fast-spiking neuron')
similarity('Periventricular zone neuron', 'Hypothalamic medial zone neuron')


('s_intra', (0.5, [[], (['ABA_REGION:1097'], 'sibling regions')]), 's_inter', (0, []))


(0.5, [[], (['ABA_REGION:1097'], 'sibling regions')])

In [None]:
import pandas as pd


# download neuroelectro summary spreadsheet from URL and import into pandas data frame
neuroelectro_dataset_url = "http://dev.neuroelectro.org/static/src/article_ephys_metadata_curated.csv"
table = pd.read_csv(neuroelectro_dataset_url, sep='\t', index_col = 0, header=0)

In [49]:
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache


In [50]:
mcc = MouseConnectivityCache()
onto = mcc.get_ontology()
dataframe = onto.df

In [7]:
dataframe.id

id
997                997
8                    8
567                567
688                688
695                695
315                315
184                184
68                  68
667                667
500                500
107                107
219                219
299                299
644                644
947                947
985                985
320                320
943                943
648                648
844                844
882                882
993                993
656                656
962                962
767                767
1021              1021
1085              1085
453                453
12993            12993
12994            12994
               ...    
116                116
124                124
129                129
140                140
145                145
153                153
164                164
1024              1024
1032              1032
1055              1055
1063              1063
1071              1071
1078    

In [1]:
from sherlok import Sherlok
s = Sherlok('neuroner')

from similarity import _cleanup, _normalize

neuron_name = 'somatostatin-expressing with depolarizing fast spiking AHP Layer 5a pyramidal cell'
r = s.annotate(neuron_name)
#annot_list = format_annots(neuron_name, r.annotations)
al = _cleanup(r.annotations, neuron_name)
print _normalize(al, shorten = True)

#print annot_list
print al

Sst with depolarizing F S AHP L5a Pyr
[u'NCBI_GENE:20604', 'Missing:with depolarizing', u'HBP_EPHYS:0000080', u'HBP_EPHYS_TRIGGER:0000003', 'Missing:AHP', u'HBP_LAYER:0000050', u'HBP_MORPHOLOGY:0000001']


In [3]:
from operator import itemgetter
from itertools import groupby

def format_annots(neuron_long_name, annotations):
    """Turns sherlok neuroner annotations into a nicely formatted annotation list"""
    
    filt_attrib_list = ['Neuron', 'PreNeuron', 'PostNeuron', 'Electrophysiology', 'ProteinTrigger']
    new_annots = []
    for a in annotations:
        if a[3] not in filt_attrib_list:
            new_annots.append(a)
    new_annots = sorted(new_annots, key= lambda tup: tup[0])
    
    struct_annots = []
    for a in new_annots:
        l = [a[0], a[1], a[2]]
        if 'ontologyId' in a[4]:
             l.append(a[4]['ontologyId'])
        else:
            l.append(a[3])
        struct_annots.append(l)
    
    # do some stuff to return missing terms - really crappy code
    nl = []
    for a in struct_annots:
        for i in range(a[0],a[1]):
            nl.append(i)
        nl.append(i+1)
    
    unfound_inds = []
    for i in range(1,len(neuron_long_name)):
        if i in nl:
            continue
        else:
            unfound_inds.append(i)
            
    data = unfound_inds
    ranges = []
    for k, g in groupby(enumerate(data), lambda (i,x):i-x):
        group = map(itemgetter(1), g)
        ranges.append((group[0], group[-1]))
        
    for r in ranges:
        l = [r[0], r[1], neuron_long_name[r[0]:(r[1]+1)], 'Missing']
        struct_annots.append(l)
    struct_annots = sorted(struct_annots, key= lambda tup: tup[0])
    
    return struct_annots

In [38]:
import oboparser

import yaml
import glob

with open("config.yml", 'r') as ymlfile:
    cfg = yaml.load(ymlfile)
onto_root = cfg['onto_root']

big_onto = {}

file_name_list = [f for f in glob.glob(onto_root+ "*.robo")]
file_name_list.extend([f for f in glob.glob(onto_root+ "*.obo")])
#onto_file_names = [onto_root + 'GeneNames.obo', onto_root + 'hbp_morphology_ontology.obo']
for fn in file_name_list:
    for o in oboparser.parse(fn):
        if 'id' in o:
            big_onto[o['id']] = o

In [39]:
big_onto[u'HBP_LAYER:0000001']

defaultdict(<type 'list'>, {'acronym': ['L1'], 'rsynonym': ['"[Ll]ayer[- ]?[1I]" EXACT ALTERNATE_SPELLING []', '"[Ll]1" BROAD ACRONYM []'], '@type': 'Term', 'name': 'layer 1', 'id': 'HBP_LAYER:0000001'})

In [54]:
def shorten_annotation(annot_list):
    for a in annot_list:
        if 'HBP' in a[3]:
            if 'acronym' in big_onto[a[3]]:
                print big_onto[a[3]]['acronym'][0]
            else:
                print big_onto[a[3]]['name']
        elif 'ABA' in a[3]:
            reg_id = int(a[3][11:])
            print onto[reg_id].acronym.item()
        else:
            print a[2]

Periventricular zone
PVZ
neuron
neuron
neuron
L1
Pyr
cell


In [1]:
from similarity_intra import load_ontologies

ontologies = load_ontologies()


defaultdict(<type 'list'>, {'acronym': id
8    grey
Name: acronym, dtype: object, 'synonym': ['"grey matter" []', '"basic cell groups" []', '"gray matter" []', '"Basic cell groups and regions" []', '"basic cell groups and regions" []', '"central nervous system gray matter" []', '"Substantia grisea" []'], '@type': 'Term', 'name': 'Basic cell groups and regions', 'id': 'ABA_REGION:8'})
defaultdict(<type 'list'>, {'acronym': id
694    AIv2/3
Name: acronym, dtype: object, 'synonym': ['"Agranular insular area, ventral part, layer 2/3" []'], '@type': 'Term', 'name': 'Agranular insular area, ventral part, layer 2/3', 'id': 'ABA_REGION:694'})
defaultdict(<type 'list'>, {'acronym': id
695    CTXpl
Name: acronym, dtype: object, 'synonym': ['"Cortex cerebralis" []', '"cerebral cortex" []', '"cortical plate" []', '"Cortical plate" []', '"Cortex cerebri" []'], '@type': 'Term', 'name': 'Cortical plate', 'id': 'ABA_REGION:695'})
defaultdict(<type 'list'>, {'acronym': id
696    AUDpo1
Name: acronym, d