In [1]:
# import urllib2
import requests
import json
import requests
import csv
import pandas

# Small example

Big GIM (Gene Interaction Miner) is a Translator Knowledge Source that contains function interaction data for all pairs of genes. Functional interaction data are available from four different sources: 

1. tissue-specific gene expression correlations from healthy tissue samples (GTEx), 
2. tissue-specific gene expression correlations from cancer samples (TCGA), 
3. tissue-specific probabilities of function interaction (GIANT), and 
4. direct interactions (BioGRID). 
   
The data is stored as a Google BigQuery table enabling fast access.

## Swagger api specification

http://biggim.ncats.io/api/

* 1.0 [Query Examples](#query_examples)
    * 1.1 [Simple predefined query](#query_simple)
    * 1.2 [Check the status of the simple predefined query](#query_status)
    * 1.3 [Get the results as dataframe](#query_results)


In [2]:
base_url = 'http://biggim.ncats.io/api'

In [3]:
#a couple of simple helper functions
def post(endpoint, data={}, base_url=base_url):
    req = requests.post('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()

def get(endpoint, data={}, base_url=base_url):
    req = requests.get('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()
    

def jprint(dct):
    print(json.dumps(dct, indent=2))

In [4]:
try:
    tables = get('/metadata/table')
    #jprint(tables)
except requests.HTTPError as e:
    print(e)
    print("#returned message")
    print(jprint(e.response.json()))

default_table = [t for t in tables if t['default'] == True][0]['name']
print("Default table name: %s" % default_table)

Default table name: BigGIM_70_v1


<a id="query_examples"></a>
## Query examples

<a id="query_simple"></a>
### Simple predefined query

In [7]:
ids2;

In [8]:
#tt = 

In [9]:

example_query = {
      # The table to select from.
      "table": default_table, 
      # A comma delimited list of column names to return.
      #"columns": "TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue", 
      #'columns' : "GTEx_
      # A comma delimited list of Entrez gene ids to select.
      "ids1": ids1,
      # Entrez gene ids to select. If not given, the query selects any gene related to a gene in ids 1. 
      # If given, the query only selects relations that contain a gene in ids1 and a gene in ids2.
      "ids2": ids2, 
      # The type of join made on restrictions. Either intersect or union
      "restriction_join": "intersect", 
      # A list of pairs of values column name,value with which to restrict
      #"restriction_gt": "TCGA_GBM_Correlation,.8, GTEx_Brain_Correlation,.8", 
      # the results of the query to rows where the value of the column is greater than the given value.
      #"restriction_lt": "TCGA_GBM_Pvalue,.05, GTEx_Brain_Pvalue,.01",
      # The maximum number of rows to return.
      "limit": 300 
}
print("Query request:")
jprint(example_query)
try:
    query_submit = get('interactions/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print(e)
    print ("#returned message")
    print( jprint(e.response.json()))

Query request:
{
  "limit": 300,
  "ids2": "7391,393,8882,2683,121642,26747,148266,25914,55968,3059,7341,518,8800,9223,81572,9318,79845,5071,84671,8243,201627,54472,94107,147015,84524,51012,11021,10190,4152,80007,55269,100885848,25829,54520,51548,57336,23293,83461,92335,11344,51008,28987,54497,54832,54461,257236,55776,8131,133957,9277,79673,54069,10200,29104,221060,81622,7335,9870,10147,8379,25897,4201,6390,8479,646851,8666,51650,664,153396,5576,51552,7709,6655,84904,4212,9647,22807,282974,23543,64718,92002,5457,126272,159013,3646,1936,64428,79763,79639,27,109504726,514,55716,2804,54344,26190,56957,388695,56851,253769,84261,123775,285600,80233,4594,388969,57470,4913,171017,10489,93210,27349,29058,5481,5863,257,84886,25937,390792,27127,9468,283316,5602,83417,7049,23428,311,5325,5178,2938,266,84076,401505,4868,2939,23576,23283,10274,22929,9282,10644,51592,23548,3766,80045,80194,9906,23129,57678,55658,57556,9584,9215,79443,79651,2686,51161,5232,3981,284076,84650,23155,57623,6093,55171,606

<a id="query_status"></a>
### Check the status of the simple predefined query

In [10]:
import time
try:
    while True:
        query_status = get('interactions/query/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print("Checking again")
except requests.HTTPError as e:
    print(e)
    print("#returned message")
    print(jprint(e.response.json()))

{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a",
  "message": "Query job is running.",
  "status": "running"
}
Checking again
{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a",
  "message": "Query job is running.",
  "status": "running"
}
Checking again
{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a",
  "message": "Query job is running.",
  "status": "running"
}
Checking again
{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a",
  "message": "Query job is running.",
  "status": "running"
}
Checking again
{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a",
  "message": "Query job is running.",
  "status": "running"
}
Checking again
{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a",
  "message": "Query job is running.",
  "status": "running"
}
Checking again
{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a",
  "message": "Query job is running.",
  "status": "running"
}
Checking again
{
  "request_id": "158b5eed-65f4-4d08-8e00-d97f75379c4a

<a id="query_results"></a>
### Get the results as dataframe

In [14]:
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
columns = list(result)
columns


['GPID',
 'Gene1',
 'Gene2',
 'TCGA_ACC_Correlation',
 'TCGA_ACC_Pvalue',
 'TCGA_BLCA_Correlation',
 'TCGA_BLCA_Pvalue',
 'TCGA_BRCA_Correlation',
 'TCGA_BRCA_Pvalue',
 'TCGA_CESC_Correlation',
 'TCGA_CESC_Pvalue',
 'TCGA_CHOL_Correlation',
 'TCGA_CHOL_Pvalue',
 'TCGA_COAD_Correlation',
 'TCGA_COAD_Pvalue',
 'TCGA_DLBC_Correlation',
 'TCGA_DLBC_Pvalue',
 'TCGA_ESCA_Correlation',
 'TCGA_ESCA_Pvalue',
 'TCGA_GBM_Correlation',
 'TCGA_GBM_Pvalue',
 'TCGA_HNSC_Correlation',
 'TCGA_HNSC_Pvalue',
 'TCGA_KICH_Correlation',
 'TCGA_KICH_Pvalue',
 'TCGA_KIRC_Correlation',
 'TCGA_KIRC_Pvalue',
 'TCGA_KIRP_Correlation',
 'TCGA_KIRP_Pvalue',
 'TCGA_LAML_Correlation',
 'TCGA_LAML_Pvalue',
 'TCGA_LGG_Correlation',
 'TCGA_LGG_Pvalue',
 'TCGA_LIHC_Correlation',
 'TCGA_LIHC_Pvalue',
 'TCGA_LUAD_Correlation',
 'TCGA_LUAD_Pvalue',
 'TCGA_LUSC_Correlation',
 'TCGA_LUSC_Pvalue',
 'TCGA_MESO_Correlation',
 'TCGA_MESO_Pvalue',
 'TCGA_OV_Correlation',
 'TCGA_OV_Pvalue',
 'TCGA_PAAD_Correlation',
 'TCGA_PAAD_Pva

In [15]:
result.head(2)

Unnamed: 0,GPID,Gene1,Gene2,TCGA_ACC_Correlation,TCGA_ACC_Pvalue,TCGA_BLCA_Correlation,TCGA_BLCA_Pvalue,TCGA_BRCA_Correlation,TCGA_BRCA_Pvalue,TCGA_CESC_Correlation,...,GIANT_vascular_endothelial_cell_KnownFunctionalInteraction,GIANT_vascular_endothelial_cell_ProbabilityOfFunctionalInteraction,GIANT_vascular_endothelium_KnownFunctionalInteraction,GIANT_vascular_endothelium_ProbabilityOfFunctionalInteraction,GIANT_vermiform_appendix_KnownFunctionalInteraction,GIANT_vermiform_appendix_ProbabilityOfFunctionalInteraction,BioGRID_Interaction,BioGRID_Experimental_System,BioGRID_Experimental_System_Type,BioGRID_Throughput
0,551590000653319,653319,55159,0.039,0.13,-0.013,0.1,0.0927,2.44,0.2651,...,0,0.06115,0,0.052826,0,0.116249,,,,
1,21750000653319,653319,2175,0.1445,0.67,0.0236,0.19,0.2642,16.4,0.4273,...,0,0.069002,0,0.087776,0,0.177012,,,,


<a id="full_example"></a>
# Full example

Lets get relationships for a certain tissue where any tissue related correlation has a pvalue < .05

 Assume we are interested in issues in lymphoid tissues

In [None]:
# get lymphoid substudies
import numpy as np
query_status = None
# get lymphoid substudies
substudies = get('metadata/tissue/%s' % ("lymphoid_tissue",))

# we only want things in a specific table, we'll use the default
table = get('/metadata/table/%s' %  (default_table,))

print("Using table %s\n%s" % (table['name'], table['description']))

# grab pvalue columns
pvalue_columns = []
column_names = []
for ss in substudies['substudies']:
    for column in ss['columns']:
        # only if column is from our table
        if column['table']['name'] == table['name']:
            # add column to select
            column_names.append(column['name'])
            if column['interactions_type'] == 'P-value (-log10)':
                #add pvalue to where
                pvalue_columns.append(column['name'])
 
# grab columns with lower pvalues
pv = []
for p in pvalue_columns:
    pv.append(p)
    pv.append(str(abs(np.log10(.05))))
query_arg = {}
query_arg['table'] = table['name']
query_arg['columns'] = ','.join(sorted(column_names))
if len(pv):
    query_arg['restriction_gt'] = ','.join(pv)
# get if any of the lymphoid columns have 
query_arg['restriction_join'] = 'union'
query_arg['limit'] = 1000000
print "The constructed query."
jprint(query_arg)

print("Submitting query request.")
try:
    query_submit = get('interactions/query', data=query_arg)
    print("Query request response.")
    jprint(query_submit)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

print("Check query status")
try:
    ctr = 1
    while True:
        
        query_status = get('interactions/query/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(ctr)
            ctr += 1
            #linear backoff
            print "Checking again"
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())


if query_status and query_status['request_uri']:
    print("Query successful. Getting result.")
    result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
else:
    print "Error see above"

In [None]:
#Full example result
result.head()