In [2]:
import sys
import os
import pandas as pd
import requests

In [3]:
# all genes in Allen Brain Atlas... by default, this will only return the first 50 rows of the query
all_genes_query = "http://api.brain-map.org/api/v2/data/Gene/query.json"
all_genes_response = requests.get(all_genes_query).json()

# put the results into a pandas dataframe
all_genes = pd.DataFrame(all_genes_response['msg'])
all_genes.head()

Unnamed: 0,acronym,alias_tags,chromosome_id,ensembl_id,entrez_id,genomic_reference_update_id,homologene_id,id,legacy_ensembl_gene_id,name,organism_id,original_name,original_symbol,reference_genome_id,sphinx_id,version_status
0,A1BG,A1B ABG DKFZp686F0970 GAB HYST2477,12,,1,491928275,11167.0,2,,alpha-1-B glycoprotein,1,alpha-1-B glycoprotein,A1BG,,131297,no change
1,A2M,A2MD CPAMD5 DKFZp779B086 FWP007 S863-7,5,,2,491928275,37248.0,3,,alpha-2-macroglobulin,1,alpha-2-macroglobulin,A2M,,109391,no change
2,A2MP1,A2MP,5,,3,491928275,,4,,alpha-2-macroglobulin pseudogene 1,1,alpha-2-macroglobulin pseudogene 1,A2MP1,,131617,no change
3,NAT1,AAC1 MNAT NAT-1 NATI,27,,9,491928275,37329.0,6,,N-acetyltransferase 1 (arylamine N-acetyltrans...,1,N-acetyltransferase 1 (arylamine N-acetyltrans...,NAT1,,110027,no change
4,NAT2,AAC2 NAT-2 PNAT,27,,10,491928275,115468.0,7,,N-acetyltransferase 2 (arylamine N-acetyltrans...,1,N-acetyltransferase 2 (arylamine N-acetyltrans...,NAT2,,110308,no change


In [5]:
# all genes in developmental mouse: there are 2107
dev_mouse_criteria = "criteria=model::Gene,rma::criteria,products[abbreviation$eq'DevMouse']"
all_rows = "num_rows=all"
dev_mouse_genes_query = all_genes_query + "?" + dev_mouse_criteria + "&" + all_rows
dev_mouse_genes_response = requests.get(dev_mouse_genes_query).json()

# put the results into a pandas dataframe
dev_mouse_genes = pd.DataFrame(dev_mouse_genes_response['msg'])

print(dev_mouse_genes.shape)
dev_mouse_genes.head()

(2107, 16)


Unnamed: 0,acronym,alias_tags,chromosome_id,ensembl_id,entrez_id,genomic_reference_update_id,homologene_id,id,legacy_ensembl_gene_id,name,organism_id,original_name,original_symbol,reference_genome_id,sphinx_id,version_status
0,Slc1a6,EAAT4,35.0,,20513.0,491928275.0,21055.0,20275,,solute carrier family 1 (high affinity asparta...,2,solute carrier family 1 (high affinity asparta...,Slc1a6,,102670,no change
1,Lhx3,Lim3 mLim-3 mLIM3 P-LIM,47.0,,16871.0,491928275.0,7814.0,16642,,LIM homeobox protein 3,2,LIM homeobox protein 3,Lhx3,,91840,no change
2,Dlx2,AW121999 Dlx-2 Tes-1,47.0,,13392.0,491928275.0,3244.0,13171,,distal-less homeobox 2,2,distal-less homeobox 2,Dlx2,,92911,no change
3,Ncam2,Ncam-2 Ocam RNCAM,42.0,,17968.0,491928275.0,3336.0,17735,,neural cell adhesion molecule 2,2,neural cell adhesion molecule 2,Ncam2,,89989,no change
4,Hcrtr1,MGC141357 Ox1r,49.0,,230777.0,491928275.0,37492.0,87114,,hypocretin (orexin) receptor 1,2,hypocretin (orexin) receptor 1,Hcrtr1,,99831,no change


In [6]:
# our genes of interest
genes_of_interest = ['Robo2', 'Cdh5', 'Bmpr2']
our_genes = dev_mouse_genes[dev_mouse_genes.acronym.isin(genes_of_interest)]
our_genes.head()

Unnamed: 0,acronym,alias_tags,chromosome_id,ensembl_id,entrez_id,genomic_reference_update_id,homologene_id,id,legacy_ensembl_gene_id,name,organism_id,original_name,original_symbol,reference_genome_id,sphinx_id,version_status
851,Bmpr2,2610024H22Rik AL117858 AW546137 BB189135 BMP-2...,34.0,,12168.0,491928275.0,929.0,11954,,"bone morphogenetic protein receptor, type II (...",2,"bone morphogenic protein receptor, type II (se...",Bmpr2,,358483,updated
1111,Robo2,2600013A04Rik 9430089E08Rik BB097918 D230004I2...,42.0,,268902.0,491928275.0,43188.0,92611,,roundabout homolog 2 (Drosophila),2,roundabout homolog 2 (Drosophila),Robo2,,103280,no change
1447,Cdh5,7B4 AA408225 Cd144 CD144 Vec VEC VEcad VE-Cad ...,53.0,,12562.0,491928275.0,1359.0,12347,,cadherin 5,2,cadherin 5,Cdh5,,101182,no change


In [7]:
# the "id" column gives the id of the gene
our_gene_ids = our_genes.id.values.tolist()
our_gene_ids

[11954, 92611, 12347]

In [8]:
# all experiments (section data sets) in developmental mouse 
all_data_query = "http://api.brain-map.org/api/v2/data/query.json"

# criteria to get experimental data from developmental mouse
dev_mouse_criteria = "criteria=model::SectionDataSet,rma::criteria,products[abbreviation$eq'DevMouse']"
gene_query_by_id = lambda id: "genes[id$eq'" + str(id) + "']"
all_rows = "num_rows=all"

experiment_queries = {id:all_data_query + "?" + dev_mouse_criteria + ',' + gene_query_by_id(id) + "&" + all_rows for id in our_gene_ids}
experiment_queries

{11954: "http://api.brain-map.org/api/v2/data/query.json?criteria=model::SectionDataSet,rma::criteria,products[abbreviation$eq'DevMouse'],genes[id$eq'11954']&num_rows=all",
 12347: "http://api.brain-map.org/api/v2/data/query.json?criteria=model::SectionDataSet,rma::criteria,products[abbreviation$eq'DevMouse'],genes[id$eq'12347']&num_rows=all",
 92611: "http://api.brain-map.org/api/v2/data/query.json?criteria=model::SectionDataSet,rma::criteria,products[abbreviation$eq'DevMouse'],genes[id$eq'92611']&num_rows=all"}

In [10]:
# collect all the experiment data pertaining to the 3 proteins into a pandas df
our_experiments = None
for gene_id, query in experiment_queries.items():
    print(query)
    query_response = requests.get(query).json()
    query_df = pd.DataFrame(query_response['msg'])
    query_df['gene_id'] = gene_id
    if our_experiments is None:
        our_experiments = query_df
    else:
        our_experiments = pd.concat([our_experiments, query_df])

our_experiments  

http://api.brain-map.org/api/v2/data/query.json?criteria=model::SectionDataSet,rma::criteria,products[abbreviation$eq'DevMouse'],genes[id$eq'11954']&num_rows=all
http://api.brain-map.org/api/v2/data/query.json?criteria=model::SectionDataSet,rma::criteria,products[abbreviation$eq'DevMouse'],genes[id$eq'92611']&num_rows=all
http://api.brain-map.org/api/v2/data/query.json?criteria=model::SectionDataSet,rma::criteria,products[abbreviation$eq'DevMouse'],genes[id$eq'12347']&num_rows=all


Unnamed: 0,blue_channel,delegate,expression,failed,failed_facet,green_channel,id,name,plane_of_section_id,qc_date,red_channel,reference_space_id,rnaseq_design_id,section_thickness,specimen_id,sphinx_id,storage_directory,weight,gene_id
0,,True,True,False,734881840,,100046444,,2,2009-07-28T16:18:33Z,,3,,20.0,4908,148630,/external/devmouse/prod165/image_series_100046...,5200,11954
1,,True,True,False,734881840,,100042306,,2,2009-02-18T14:28:00Z,,8,,25.0,4797,83381,/external/devmouse/prod165/image_series_100042...,5200,11954
2,,True,True,False,734881840,,69529382,,2,2009-05-02T22:47:10Z,,10,,25.0,69486038,34806,/external/aibssan/production32/prod330/image_s...,5470,11954
3,,True,True,False,734881840,,100057140,,2,2009-05-04T16:12:12Z,,5,,20.0,6226,43380,/external/devmouse/prod170/image_series_100057...,5200,11954
4,,True,True,False,734881840,,100057296,,2,2009-05-06T15:44:29Z,,6,,20.0,6403,58993,/external/devmouse/prod170/image_series_100057...,5200,11954
5,,True,True,False,734881840,,100046631,,2,2009-06-23T09:52:00Z,,2,,20.0,4949,77369,/external/devmouse/prod181/image_series_100046...,5200,11954
6,,True,True,False,734881840,,100042432,,2,2009-02-05T13:43:02Z,,7,,25.0,4860,55775,/external/devmouse/prod171/image_series_100042...,5200,11954
7,,True,True,False,734881840,,100081744,,2,2009-10-09T14:34:30Z,,1,,20.0,701571,36776,/external/devmouse/prod216/image_series_100081...,5200,11954
0,,True,True,False,734881840,,100047274,,2,2009-07-30T11:51:16Z,,3,,20.0,5384,32889,/external/devmouse/prod167/image_series_100047...,5200,92611
1,,False,True,False,734881840,,75080998,,1,2009-05-02T23:06:21Z,,9,,25.0,74899344,18597,/external/aibssan/production32/prod342/image_s...,5470,92611


In [16]:
# these are the experiment, or section data set, ids for all 
our_experiment_ids = our_experiments.id.values.tolist()
'experiment_ids:' + ','.join([str(i) for i in our_experiment_ids])

'experiment_ids:\n100046444,100042306,69529382,100057140,100057296,100046631,100042432,100081744,100047274,75080998,100045469,100046870,100072173,100047596,100071976,71281319,100045355,77931975,100058500,100078477,100084794,100058480,100082796,100056407,100056766,100072907,100056389'

In [None]:
# let's view responses for section_data_set, given its id
experiment_criteria = "criteria=model::SectionImage"

# criteria to get experimental data from developmental mouse
experiment_query_by_id = lambda id: "[data_set_id$eq" + str(id) + "]"

experiment_query = all_data_query + "?" + experiment_criteria + ',rma::criteria,' +  experimet_query_by_id(100046444)
experiment_response = request.get(experiment_query)

In [None]:
sys.path.append(os.path.join(os.getcwd(), "src"))
from src.ecallen.ecallen import images as ecimg

all_image_ids = ecimg.get_all_section_image_ids(100046444)