# What are the preexisting neuronal cell type identities? 

In [2]:
import pymongo 
import seaborn as sns
import numpy as np
import collections
import pandas as pd
sns.set()

In [3]:
client = pymongo.MongoClient('mongodb://localhost/:27017')
db = client.aibs 
print db.collection_names()


[u'nmorpho', u'morphology', u'neurons']


In [4]:
n_coll = db.neurons
m_coll = db.morphology
nm_coll = db.nmorpho

In [5]:
print("{} records in neuromorpho collection".format(nm_coll.count()))
print("{} records in morphology collection".format(m_coll.count()))


20567 records in neuromorpho collection
20568 records in morphology collection


In [6]:
# sample neuromorpho neuron entry 
nm_coll.find_one()

{u'_id': ObjectId('5a99bcfa9d477a4072e45622'),
 u'_links': {u'measurements': {u'href': u'http://neuromorpho.org/api/morphometry/id/860'},
  u'self': {u'href': u'http://neuromorpho.org/api/neuron/id/860'}},
 u'age_classification': u'adult',
 u'archive': u'Martone',
 u'attributes': u'Diameter, 3D, Angles',
 u'brain_region': [u'ventral striatum', u'nucleus accumbens'],
 u'cell_type': [u'medium spiny', u'principal cell'],
 u'corrected_value': None,
 u'corrected_xy': None,
 u'corrected_z': None,
 u'deposition_date': u'2005-12-31',
 u'domain': u'Dendrites, Soma, Axon',
 u'experiment_condition': u'Control',
 u'magnification': u'60',
 u'max_age': u'2.0',
 u'max_weight': None,
 u'min_age': u'2.0',
 u'min_weight': None,
 u'neuron_id': 860,
 u'neuron_name': u'ACC1',
 u'note': u'When originally released, this reconstruction had been incompletely processed, and this issue was fixed in release 6.1 (May 2015). The pre-6.1 version of the processed file is available for download <a href=" dableFiles/pr

In [7]:
# Sample entry in morphology collection 
m_coll.find_one()

{u'_id': ObjectId('5a99bd509d477a4072e4a679'),
 u'bif_ampl_local': 93.2768,
 u'branch_Order': 113.0,
 u'contraction': 0.97921,
 u'depth': 100.5,
 u'diameter': 0.554892,
 u'eucDistance': 93.2874,
 u'fractal_Dim': 1.05337,
 u'fragmentation': 1427.0,
 u'height': 102.321,
 u'length': 1851.27,
 u'n_bifs': 394.0,
 u'n_branch': 793.0,
 u'n_stems': 5.0,
 u'neuron_id': 860,
 u'neuron_name': u'ACC1',
 u'partition_asymmetry': 0.957281,
 u'pathDistance': 162.339,
 u'pk_classic': 1.79627,
 u'soma_Surface': 174.071,
 u'surface': 3489.29,
 u'volume': 892.123,
 u'width': 66.7084}

In [8]:
labels = collections.defaultdict(tuple)
for a in nm_coll.find(): 
    labels[a['neuron_id']] = (a['cell_type'],a['age_classification'],a['brain_region'])

In [9]:
labels.items()[:5]

[(79899,
  ([u'Glia', u'microglia', u'Iba1-positive'],
   u'young',
   [u'basal ganglia', u'striatum'])),
 (75724, ([u'principal cell', u'pyramidal'], u'embryonic', [u'neocortex'])),
 (71360, ([u'principal cell', u'pyramidal'], u'embryonic', [u'hippocampus'])),
 (71142,
  ([u'principal cell', u'pyramidal'],
   u'adult',
   [u'neocortex', u'layer 5', u'left'])),
 (75943, ([u'principal cell', u'ganglion'], u'adult', [u'retina', u'right']))]

In [23]:
df = pd.DataFrame(columns = ['neuron_id','cell_type','age_class','brain_region','species'],data=[(a['neuron_id'],a['cell_type'],a['age_classification'],a['brain_region'],a['species']) for a in nm_coll.find({'neuron_id':{'$exists':True}})])

In [24]:
df.head()

Unnamed: 0,neuron_id,cell_type,age_class,brain_region,species
0,860,"[medium spiny, principal cell]",adult,"[ventral striatum, nucleus accumbens]",mouse
1,862,"[Purkinje, principal cell]",adult,"[cerebellum, cerebellar cortex, vermis, Purkin...",mouse
2,863,"[Purkinje, principal cell]",adult,"[cerebellum, cerebellar cortex, vermis, Purkin...",mouse
3,864,"[Purkinje, principal cell]",adult,"[cerebellum, cerebellar cortex, vermis, Purkin...",mouse
4,865,"[Purkinje, principal cell]",adult,"[cerebellum, cerebellar cortex, vermis, Purkin...",mouse


In [28]:
pd.DataFrame(df.cell_type.value_counts())

Unnamed: 0,cell_type
"[principal cell, pyramidal]",3296
"[pyramidal, principal cell]",2731
[interneuron],603
"[ganglion, principal cell]",565
"[Glia, microglia, Iba1-positive]",557
"[medium spiny, principal cell]",524
"[principal cell, glutamatergic]",496
"[granule, interneuron]",478
"[interneuron, Inhibitory]",442
"[principal cell, medium spiny, projection]",440


In [29]:
pd.DataFrame(df.brain_region.value_counts())

Unnamed: 0,brain_region
[retina],1638
"[hippocampus, dentate gyrus, granule layer]",1520
[neocortex],1027
"[retina, ganglion layer]",949
"[spinal cord, lumbar, dorsal horn, low-threshold mechanoreceptor-recipient zone (LTMR-RZ)]",650
"[neocortex, frontal, layer 3]",634
[hippocampus],582
"[hippocampus, CA1]",576
"[peripheral nervous system, dorsal root ganglion]",482
"[neocortex, somatosensory, layer 4]",453


In [30]:
pd.DataFrame(df.age_class.value_counts())

Unnamed: 0,age_class
adult,10094
young,4338
young adult,2766
embryonic,1230
not reported,1206
neonatal,553
old,378
fetus,2


## Cell type observations 
Cells aren't tightly labeled.  Need to look into the latest in ontology representations