In [2]:
import pymongo
import pandas as pd
import collections

## Find the labels of cells in the *drosophila* database

from [note](../raw/notes/180517thu.ipynb)

In [3]:
# Access the mongodb database containg neuromorpho collection
client = pymongo.MongoClient("mongodb://localhost/27017/")
db = client.aibs
nm_coll = db.nmorpho
nm_coll.find_one()

{'_id': ObjectId('5a99bcfa9d477a4072e45622'),
 'corrected_z': None,
 'domain': 'Dendrites, Soma, Axon',
 'protocol': 'in vitro',
 'experiment_condition': 'Control',
 'scientific_name': '',
 'original_format': 'Neurolucida.asc',
 'reference_doi': None,
 'strain': 'C57BL/6',
 'archive': 'Martone',
 'max_age': '2.0',
 'cell_type': ['medium spiny', 'principal cell'],
 'slicing_thickness': '100.00',
 'objective_type': 'water',
 'reported_value': None,
 'note': 'When originally released, this reconstruction had been incompletely processed, and this issue was fixed in release 6.1 (May 2015). The pre-6.1 version of the processed file is available for download <a href=" dableFiles/previous/v6.1/martone/ACC1.CNG.swc ">here</a>.',
 'magnification': '60',
 '_links': {'self': {'href': 'http://neuromorpho.org/api/neuron/id/860'},
  'measurements': {'href': 'http://neuromorpho.org/api/morphometry/id/860'}},
 'age_classification': 'adult',
 'shrinkage_reported': 'Not reported',
 'stain': 'lucifer yell

In [19]:
# Get only the drosophila data
drosophila = nm_coll.find({"species":"drosophila melanogaster"})
print(f"There are {drosophila.count()} drosophila cells.")

There are 27934 drosophila cells.


### `brain_region` and `cell_type` are label candidates

In [5]:
# Load labels into pandas dataframe
neuron_id, brain_region, cell_type = zip(*[(a["neuron_id"],a["brain_region"],a["cell_type"]) for a in drosophila])
data = {"brain_region":brain_region, "cell_type":cell_type}
df = pd.DataFrame(data, index=neuron_id)
df.head()

Unnamed: 0,brain_region,cell_type
1173,"[antennal lobe, glomerulus VC2]","[iPN, uniglomerular projection, principal cell]"
1174,"[antennal lobe, glomerulus DL5]","[iPN, uniglomerular projection, principal cell]"
1175,"[antennal lobe, glomerulus DL6]","[iPN, uniglomerular projection, principal cell]"
1176,"[antennal lobe, glomerulus VM3]","[iPN, uniglomerular projection, principal cell]"
1177,"[antennal lobe, glomerulus VM3]","[iPN, uniglomerular projection, principal cell]"


### There are multiple labels per neuron.  

Find the most common labels for cell_type and brain_region

In [9]:
collections.Counter([" ".join(a) for a in cell_type]).most_common(20)

[('Embryo-born Unknown neurotransmitter principal cell', 2620),
 ('Embryo-born GABAergic principal cell', 1616),
 ('day7 Born Unknown neurotransmitter principal cell', 854),
 ('Embryo-born cholinergic principal cell', 622),
 ('Embryo-born GABAergic interneuron', 617),
 ('Embryo-born Unknown neurotransmitter interneuron', 567),
 ('Embryo-born cholinergic interneuron', 567),
 ('day5 Born glutamatergic principal cell', 555),
 ('day4 Born glutamatergic principal cell', 532),
 ('principal cell Kenyon cell alpha-beta surface-targeting', 480),
 ('Embryo-born glutamatergic principal cell', 470),
 ('day6 Born glutamatergic principal cell', 452),
 ('day7 Born GABAergic principal cell', 450),
 ('day5 Born Unknown neurotransmitter principal cell', 407),
 ('day2 Born Unknown neurotransmitter principal cell', 377),
 ('day4 Born glutamatergic interneuron', 373),
 ('day4 Born Unknown neurotransmitter principal cell', 371),
 ('day7 Born glutamatergic principal cell', 368),
 ('day3 Born Unknown neurotra

In [27]:
nm_coll.find_one({"cell_type":['Embryo-born', "Unknown neurotransmitter", "principal cell"]})

{'_id': ObjectId('5ad653a59d477a7106d1c7ad'),
 'neuron_id': 11377,
 'neuron_name': '5-HT1B-F-000004',
 'archive': 'Chiang',
 'note': 'Cell types and Brain regions were assigned with a <a href=" techDocFlyData.jsp?code=1">heuristic process</a> based on available metadata. This dataset was processed with a <a href=" techDocFlyData.jsp?code=2">streamlined automated variant</a> of the standardization procedure, additional details of which are published <a href=" http://www.ncbi.nlm.nih.gov/pubmed/?term=25576225" target="_blank"">here</a>. Digital reconstruction used a <a href=" http://www.ncbi.nlm.nih.gov/pubmed/?term=23028271" target="_blank">custom method</a> after image segmentation by Amira. The SWC units for the neurons in this dataset are not expressed in micrometers. The exact scaling factors, which are different for each neuron, are not known, but our approximate estimate is 0.32 in XY and 0.26 in Z. Thus, the morphometric measurements can be compared within other neurons of this a

In [None]:
nm_coll.find({"cell_type": })

In [13]:
celltypes = set(a for b in cell_type for a in b)
brainregion = set(a for b in brain_region for a in b)
celltypes_list = list(a for b in cell_type for a in b)

celltypes_counter = collections.Counter(celltypes_list)
celltypes_counter.most_common(n=20)

[('principal cell', 18488),
 ('interneuron', 8202),
 ('Embryo-born', 7931),
 ('Unknown neurotransmitter', 7237),
 ('glutamatergic', 6016),
 ('GABAergic', 3702),
 ('cholinergic', 3379),
 ('day7 Born', 3032),
 ('serotonergic', 2212),
 ('day2 Born', 2164),
 ('day5 Born', 2146),
 ('day3 Born', 2078),
 ('day4 Born', 2067),
 ('day1 Born', 1583),
 ('day6 Born', 1521),
 ('Kenyon cell', 1293),
 ('sensory receptor', 1090),
 ('alpha-beta', 1081),
 ('Multidendritic-dendritic arborization (DA)', 984),
 ('dopaminergic', 862)]

In [14]:
brainregion_list = list(a for b in brain_region for a in b)
brainregion_counter = collections.Counter(brainregion_list)

brainregion_counter.most_common(n=20)

[('protocerebrum', 9269),
 ('optic Lobe', 7106),
 ('medulla', 3853),
 ('ventrolateral neuropils', 2166),
 ('antennal lobe', 1972),
 ('Right Medulla', 1807),
 ('left', 1729),
 ('mushroom body', 1659),
 ('right', 1582),
 ('Right Mushroom Body', 1579),
 ('Left Lobula Complex', 1528),
 ('Left Medulla', 1512),
 ('Right Lobula Complex', 1511),
 ('Right Adult Central Complex', 1376),
 ('Left Adult Central Complex', 1315),
 ('Right Protocerebral Bridge', 1231),
 ('peripheral nervous system', 1177),
 ('left Lobula', 1140),
 ('Left Protocerebral Bridge', 1133),
 ('right Lobula', 1117)]

### Todo: 
- find out how these cells were labeled  
 * from Neuroinformatics. 2015 January ; 13(1): 127–129. doi:10.1007/s12021-014-9257-y: 
> * Neuron Type Assignment  
The distinction between principal (projection) cells and (local) interneurons was based on the flycircuit.tw list of regions invaded by the neurite terminals of every neuron. We considered a neuron as an interneuron if 95% or more of its terminals were contained within the somatic region and its adjacent brain regions. Conversely, we marked a neuron as a principal cell if more than 5% of its terminals were found in non-adjacent regions. This definition yielded 10,079 principal cells and 5971 interneurons. We further sub-divided all neurons on the basis of their putative neurotransmitter and, lastly, by their birth date.

- principal (projection) neurons should be relatively easy to distinguish from interneurons.  Create a binary classifier.  
- How were putative neurontransmitters determined? See if neuron clusters form according to neurotransmitter labels.  