In [11]:
import urllib2
import json
import requests
import pandas

# Small example

Big GIM (Gene Interaction Miner) is a Translator Knowledge Source that contains function interaction data for all pairs of genes. Functional interaction data are available from four different sources: 

1. tissue-specific gene expression correlations from healthy tissue samples (GTEx), 
2. tissue-specific gene expression correlations from cancer samples (TCGA), 
3. tissue-specific probabilities of function interaction (GIANT), and 
4. direct interactions (BioGRID). 
   
The data is stored as a Google BigQuery table enabling fast access.

## Swagger api specification

http://biggim.ncats.io/api/

* 1.0 [Query Examples](#query_examples)
    * 1.1 [Simple predefined query](#query_simple)
    * 1.2 [Check the status of the simple predefined query](#query_status)
    * 1.3 [Get the results as dataframe](#query_results)


In [2]:
base_url = 'http://biggim.ncats.io/api'

In [3]:
#a couple of simple helper functions
def post(endpoint, data={}, base_url=base_url):
    req = requests.post('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()

def get(endpoint, data={}, base_url=base_url):
    req = requests.get('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()
    

def jprint(dct):
    print json.dumps(dct, indent=2)

In [7]:
try:
    tables = get('/metadata/table')
    #jprint(tables)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

default_table = [t for t in tables if t['default'] == True][0]['name']
print("Default table name: %s" % default_table)

Default table name: BigGIM_70_v1


<a id="query_examples"></a>
## Query examples

<a id="query_simple"></a>
### Simple predefined query

In [41]:
regpairs = pandas.read_table('theo_dump.tab', sep='\t')
#regpairs

ids1 = ",".join(str(regpairs[0:len(regpairs),1])

#for n in range(1,len(regpairs)):
#    ids1.join(str(regpairs[n,1])
    
    
    
ids1

TypeError: unhashable type

In [None]:
tt = 

In [16]:






example_query = {
      # The table to select from.
      "table": default_table, 
      # A comma delimited list of column names to return.
      "columns": "TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue", 
      # A comma delimited list of Entrez gene ids to select.
      "ids1": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,5722,1111,112,3333",
      # Entrez gene ids to select. If not given, the query selects any gene related to a gene in ids 1. 
      # If given, the query only selects relations that contain a gene in ids1 and a gene in ids2.
      "ids2": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,3333,1111,112,3333", 
      # The type of join made on restrictions. Either intersect or union
      "restriction_join": "intersect", 
      # A list of pairs of values column name,value with which to restrict
      "restriction_gt": "TCGA_GBM_Correlation,.8, GTEx_Brain_Correlation,.8", 
      # the results of the query to rows where the value of the column is greater than the given value.
      #"restriction_lt": "TCGA_GBM_Pvalue,.05, GTEx_Brain_Pvalue,.01",
      # The maximum number of rows to return.
      "limit": 100 
}
print("Query request:")
jprint(example_query)
try:
    query_submit = get('interactions/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

Query request:
{
  "restriction_join": "intersect", 
  "limit": 100, 
  "restriction_gt": "TCGA_GBM_Correlation,.8, GTEx_Brain_Correlation,.8", 
  "table": "BigGIM_70_v1", 
  "ids2": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,3333,1111,112,3333", 
  "columns": "TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue", 
  "ids1": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,5722,1111,112,3333"
}
{
  "status": "submitted", 
  "request_id": "8e3163ec-05d0-4550-b2fb-708f97f3f28d"
}


<a id="query_status"></a>
### Check the status of the simple predefined query

In [9]:
import time
try:
    while True:
        query_status = get('interactions/query/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print "Checking again"
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

{
  "status": "complete", 
  "rows": 100, 
  "processed_data": "0B", 
  "request_id": "6fdf5db3-19a8-4eb7-b973-17865ea32a20", 
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/6fdf5db3-19a8-4eb7-b973-17865ea32a20000000000000.csv"
  ], 
  "size": "5.0 KB"
}


<a id="query_results"></a>
### Get the results as dataframe

In [10]:
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
#result

pvalue_columns = []
column_names = []
for ss in substudies['substudies']:



Unnamed: 0,GPID,Gene1,Gene2,TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue
0,17940000051284,51284,1794,0.8672,36.81,0.8928,inf
1,17940000057121,57121,1794,0.8196,29.59,0.8704,inf
2,40990000084735,84735,4099,0.9349,54.18,0.9394,inf
3,40990000084504,84504,4099,0.9434,57.69,0.9362,inf
4,69160000007305,7305,6916,0.8629,36.04,0.9469,inf
5,79400000054209,54209,7940,0.8620,35.89,0.9331,inf
6,227970000058475,58475,22797,0.8748,38.22,0.8603,inf
7,227970000054209,54209,22797,0.8365,31.89,0.8852,inf
8,110240000057705,57705,11024,0.8317,31.21,0.8611,inf
9,110240000029760,29760,11024,0.8371,31.97,0.8463,inf


<a id="full_example"></a>
# Full example

Lets get relationships for a certain tissue where any tissue related correlation has a pvalue < .05

 Assume we are interested in issues in lymphoid tissues

In [25]:
# get lymphoid substudies
import numpy as np
query_status = None
# get lymphoid substudies
substudies = get('metadata/tissue/%s' % ("lymphoid_tissue",))

# we only want things in a specific table, we'll use the default
table = get('/metadata/table/%s' %  (default_table,))

print("Using table %s\n%s" % (table['name'], table['description']))

# grab pvalue columns
pvalue_columns = []
column_names = []
for ss in substudies['substudies']:
    for column in ss['columns']:
        # only if column is from our table
        if column['table']['name'] == table['name']:
            # add column to select
            column_names.append(column['name'])
            if column['interactions_type'] == 'P-value (-log10)':
                #add pvalue to where
                pvalue_columns.append(column['name'])
 
# grab columns with lower pvalues
pv = []
for p in pvalue_columns:
    pv.append(p)
    pv.append(str(abs(np.log10(.05))))
query_arg = {}
query_arg['table'] = table['name']
query_arg['columns'] = ','.join(sorted(column_names))
if len(pv):
    query_arg['restriction_gt'] = ','.join(pv)
# get if any of the lymphoid columns have 
query_arg['restriction_join'] = 'union'
query_arg['limit'] = 1000000
print "The constructed query."
jprint(query_arg)

print("Submitting query request.")
try:
    query_submit = get('interactions/query', data=query_arg)
    print("Query request response.")
    jprint(query_submit)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

print("Check query status")
try:
    ctr = 1
    while True:
        
        query_status = get('interactions/query/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(ctr)
            ctr += 1
            #linear backoff
            print "Checking again"
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())


if query_status and query_status['request_uri']:
    print("Query successful. Getting result.")
    result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
else:
    print "Error see above"

Using table BigGIM_70_v1
Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.7
The constructed query.
{
  "table": "BigGIM_70_v1", 
  "limit": 1000000, 
  "restriction_gt": "GTEx_Spleen_Pvalue,1.30102999566,TCGA_DLBC_Pvalue,1.30102999566,TCGA_THYM_Pvalue,1.30102999566", 
  "columns": "GIANT_b_lymphocyte_KnownFunctionalInteraction,GIANT_b_lymphocyte_ProbabilityOfFunctionalInteraction,GIANT_dendritic_cell_KnownFunctionalInteraction,GIANT_dendritic_cell_ProbabilityOfFunctionalInteraction,GIANT_lymph_node_KnownFunctionalInteraction,GIANT_lymph_node_ProbabilityOfFunctionalInteraction,GIANT_lymphocyte_KnownFunctionalInteraction,GIANT_lymphocyte_ProbabilityOfFunctionalInteraction,GIANT_natural_killer_cell_KnownFunctionalInteraction,GIANT_natural_killer_cell_P

In [26]:
#Full example result
result.head()

Unnamed: 0,GPID,Gene1,Gene2,GIANT_b_lymphocyte_KnownFunctionalInteraction,GIANT_b_lymphocyte_ProbabilityOfFunctionalInteraction,GIANT_dendritic_cell_KnownFunctionalInteraction,GIANT_dendritic_cell_ProbabilityOfFunctionalInteraction,GIANT_lymph_node_KnownFunctionalInteraction,GIANT_lymph_node_ProbabilityOfFunctionalInteraction,GIANT_lymphocyte_KnownFunctionalInteraction,...,GIANT_thymocyte_KnownFunctionalInteraction,GIANT_thymocyte_ProbabilityOfFunctionalInteraction,GIANT_tonsil_KnownFunctionalInteraction,GIANT_tonsil_ProbabilityOfFunctionalInteraction,GTEx_Spleen_Correlation,GTEx_Spleen_Pvalue,TCGA_DLBC_Correlation,TCGA_DLBC_Pvalue,TCGA_THYM_Correlation,TCGA_THYM_Pvalue
0,91170000171022,171022,9117,,,,,,,,...,,,,,-0.0303,0.12,,,-0.3085,3.19
1,842700101927402,101927402,84270,,,,,,,,...,,,,,-0.2865,2.5,,,,
2,46010000644759,644759,4601,,,,,,,,...,,,,,0.2115,1.51,,,,
3,796400100506686,100506686,79640,,,,,,,,...,,,,,0.204,1.42,,,,
4,237740000729867,729867,23774,,,,,,,,...,,,,,0.2811,2.42,,,,
