# Differential search via ABA API

In [1]:
import pandas as pd
from allensdk.api.queries.ontologies_api import OntologiesApi, StructureTree

## Get MOB structure IDs from Ontology Structure Graph

In [2]:
oapi = OntologiesApi()
structure_graph = oapi.get_structures_with_sets([1])  # 1 is the id of the adult mouse structure graph

# This removes some unused fields returned by the query
structure_graph = StructureTree.clean_structures(structure_graph)  
tree = StructureTree(structure_graph)

In [3]:
mob = tree.get_structures_by_name(['Main olfactory bulb'])
mob_structures = tree.children([mob[0]['id']])[0]

mob_structures[:2]

[{'acronym': 'MOBgl',
  'rgb_triplet': [130, 199, 174],
  'graph_id': 1,
  'graph_order': 381,
  'id': 212,
  'name': 'Main olfactory bulb, glomerular layer',
  'structure_id_path': [997, 8, 567, 688, 695, 698, 507, 212],
  'structure_set_ids': [10, 12]},
 {'acronym': 'MOBgr',
  'rgb_triplet': [130, 199, 174],
  'graph_id': 1,
  'graph_order': 382,
  'id': 220,
  'name': 'Main olfactory bulb, granule layer',
  'structure_id_path': [997, 8, 567, 688, 695, 698, 507, 220],
  'structure_set_ids': [10, 12]}]

## Functions for differential gene expression search

In [4]:
def retrieve_ABA(url, start_row=0, num_rows=2000):
    '''Retrieves an ABA query result as CSV in multiple steps using paging'''

    filled_url = url % {'start_row': start_row, 'num_rows': num_rows}
    query_df = pd.read_csv(filled_url)
    final_df = []

    while (len(query_df) > 0):
        final_df.append(query_df)
        start_row += num_rows
        filled_url = url % {'start_row': start_row, 'num_rows': num_rows}
        query_df = pd.read_csv(filled_url)

    return pd.concat(final_df).reset_index()

In [5]:
from allensdk.api.queries.connected_services import ConnectedServices

def build_differential_search_url(target_structures, contrast_structures, threshold=1):
    cs = ConnectedServices()

    # For parameters see: http://help.brain-map.org/display/api/Connected+Services+and+Pipes#ConnectedServicesandPipes-service::mouse_differential
    url = cs.build_url('mouse_differential', kwargs={'set': 'mouse_coronal',
                                                     'structures2': target_structures,
                                                     'structures1': contrast_structures,
                                                     'threshold2': [threshold, 50],
                                                     'threshold1': [0, 50],
                                                     'start_row': '%(start_row)s', #placeholders are resolved in the download_ABA function
                                                     'num_rows': '%(num_rows)s'
                                                     }).replace('query.json', 'query.csv')
    return url

## Test if URL construction and download works

In [6]:
mob_url = build_differential_search_url(mob_structures[0]['id'], [x['id'] for x in mob_structures[1:]])
print(mob_url)
mob_df = retrieve_ABA(mob_url)
mob_df.head()

http://api.brain-map.org/api/v2/data/query.csv?q=service::mouse_differential[num_rows$eq%(num_rows)s][set$eqmouse_coronal][start_row$eq%(start_row)s][structures1$eq220,228,236,244][structures2$eq212][threshold1$eq0,50][threshold2$eq1,50]


Unnamed: 0,index,id,name,gene-id,gene-symbol,gene-name,entrez-id,chromosome,plane-of-section,specimen-id,fold-change,target-sum,contrast-sum,num-target-samples,num-contrast-samples
0,0,73520993,RP_051101_02_G12,88550,Kctd12,potassium channel tetramerisation domain conta...,239217,14,coronal,,4.234,963.874,1020.211,83,372
1,1,74357573,RP_050915_03_C03,88861,Tspan18,tetraspanin 18,241556,2,coronal,,4.183,142.255,156.017,80,367
2,2,72008121,RP_051017_01_B10,14038,Fmo1,flavin containing monooxygenase 1,14261,1,coronal,,4.041,211.137,234.167,83,372
3,3,74512017,RP_060220_03_D03,83946,Phldb2,"pleckstrin homology-like domain, family B, mem...",208177,16,coronal,,3.971,222.445,251.422,82,368
4,4,73929578,RP_050927_03_H11,21585,Thbs2,thrombospondin 2,21826,17,coronal,,3.922,175.864,200.959,83,372


In [7]:
from urllib.parse import unquote

# hand-crafted URL from ABA website
test_url = 'http://mouse.brain-map.org/api/v2/data/query.csv?criteria=model::Structure,rma::criteria,structure_sets%5Bid$eq2%5D,rma::options%5Bonly$eq%27id%27%5D,pipe::list%5Bxstructures$eq%27id%27%5D,service::differential_rows%5Bset$eq%27P56coronal%27%5D%5Bdomain1$eq%27220,228,236,244%27%5D%5Bdomain1_threshold$eq%270,50%27%5D%5Bdomain2$eq%27212%27%5D%5Bdomain2_threshold$eq%271,50%27%5D%5Bstart_row$eq%(start_row)s%5D%5Bnum_rows$eq%(num_rows)s%5D'
test_url = unquote(test_url)
print(test_url)

mob_manual = retrieve_ABA(test_url)
mob_manual.head()

http://mouse.brain-map.org/api/v2/data/query.csv?criteria=model::Structure,rma::criteria,structure_sets[id$eq2],rma::options[only$eq'id'],pipe::list[xstructures$eq'id'],service::differential_rows[set$eq'P56coronal'][domain1$eq'220,228,236,244'][domain1_threshold$eq'0,50'][domain2$eq'212'][domain2_threshold$eq'1,50'][start_row$eq%(start_row)s][num_rows$eq%(num_rows)s]


Unnamed: 0,index,id,name,gene-id,gene-symbol,gene-name,entrez-id,chromosome,plane-of-section,specimen-id,fold-change,target-sum,contrast-sum,num-target-samples,num-contrast-samples
0,0,73520993,RP_051101_02_G12,88550,Kctd12,potassium channel tetramerisation domain conta...,239217,14,coronal,,4.234,963.874,1020.211,83,372
1,1,74357573,RP_050915_03_C03,88861,Tspan18,tetraspanin 18,241556,2,coronal,,4.183,142.255,156.017,80,367
2,2,72008121,RP_051017_01_B10,14038,Fmo1,flavin containing monooxygenase 1,14261,1,coronal,,4.041,211.137,234.167,83,372
3,3,74512017,RP_060220_03_D03,83946,Phldb2,"pleckstrin homology-like domain, family B, mem...",208177,16,coronal,,3.971,222.445,251.422,82,368
4,4,73929578,RP_050927_03_H11,21585,Thbs2,thrombospondin 2,21826,17,coronal,,3.922,175.864,200.959,83,372


In [8]:
assert mob_df.equals(mob_manual)

## Perform one-vs-all differential searches for MOB regions

In [9]:
mob_dfs = {}

for region in mob_structures:
    name = region['acronym']
    _id = region['id']
    url = build_differential_search_url(_id, 
                                        [x['id'] for x in mob_structures if x['id'] != _id])
    mob_dfs[name] = retrieve_ABA(url)

In [10]:
for k, v in mob_dfs.items():
    v.to_csv(k + '.csv', index=False)