In [1]:
# import urllib2
import requests
import json
import requests
import csv
import pandas

# Small example

Big GIM (Gene Interaction Miner) is a Translator Knowledge Source that contains function interaction data for all pairs of genes. Functional interaction data are available from four different sources: 

1. tissue-specific gene expression correlations from healthy tissue samples (GTEx), 
2. tissue-specific gene expression correlations from cancer samples (TCGA), 
3. tissue-specific probabilities of function interaction (GIANT), and 
4. direct interactions (BioGRID). 
   
The data is stored as a Google BigQuery table enabling fast access.

## Swagger api specification

http://biggim.ncats.io/api/

* 1.0 [LIG1 example](#query_examples)
    * 1.1 [Tissue specific queries](#query_simple)
    * 1.2 [Check the status of the simple predefined query](#query_status)
    * 1.3 [Get the results as dataframe](#query_results)


In [2]:
base_url = 'http://biggim.ncats.io/api'

In [3]:
#a couple of simple helper functions
def post(endpoint, data={}, base_url=base_url):
    req = requests.post('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()

def get(endpoint, data={}, base_url=base_url):
    req = requests.get('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()
    

def jprint(dct):
    print(json.dumps(dct, indent=2))

In [16]:
try:
    tables = get('/metadata/table')
    jprint(tables)
except requests.HTTPError as e:
    print(e)
    print("#returned message")
    print(jprint(e.response.json()))

default_table = [t for t in tables if t['default'] == True][0]['name']
print("Default table name: %s" % default_table)

[
  {
    "num_rows": 51090886,
    "description": "Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.7",
    "name": "BigGIM_70_v1",
    "num_bytes": 146013971730,
    "default": true
  },
  {
    "num_rows": 14210893,
    "description": "Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.8",
    "name": "BigGIM_80_v1",
    "num_bytes": 41062787154,
    "default": false
  },
  {
    "num_rows": 1173151,
    "description": "Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, fu

<a id="query_examples"></a>
## LIG1 example

<a id="query_simple"></a>
### Simple predefined query

In [17]:

example_query = {
      # The table to select from.
      "table": "FA_70_v1", 
      # A comma delimited list of column names to return.
      "columns": "TCGA_ACC_Correlation,TCGA_BRCA_Correlation,TCGA_COAD_Correlation,TCGA_ESCA_Correlation,TCGA_GBM_Correlation,TCGA_LAML_Correlation,TCGA_LIHC_Correlation,TCGA_LUAD_Correlation,TCGA_PAAD_Correlation,TCGA_PRAD_Correlation,TCGA_SKCM_Correlation,TCGA_STAD_Correlation,TCGA_TGCT_Correlation,TCGA_THCA_Correlation,GTEx_Adrenal_Gland_Correlation,GTEx_Breast_Correlation,GTEx_Colon_Correlation,GTEx_Esophagus_Correlation,GTEx_Brain_Correlation,GTEx_Blood_Correlation,GTEx_Liver_Correlation,GTEx_Lung_Correlation,GTEx_Pancreas_Correlation,GTEx_Prostate_Correlation,GTEx_Skin_Correlation,GTEx_Stomach_Correlation,GTEx_Testis_Correlation,GTEx_Thyroid_Correlation", 
      # The type of join made on restrictions. Either intersect or union
      "restriction_join": "union", 
      # A list of pairs of values column name,value with which to restrict
      "restriction_gt": "TCGA_ACC_Correlation,.7,TCGA_BRCA_Correlation,.7,TCGA_COAD_Correlation,.7,TCGA_ESCA_Correlation,.7,TCGA_GBM_Correlation,.7,TCGA_LAML_Correlation,.7,TCGA_LIHC_Correlation,.7,TCGA_LUAD_Correlation,.7,TCGA_PAAD_Correlation,.7,TCGA_PRAD_Correlation,.7,TCGA_SKCM_Correlation,.7,TCGA_STAD_Correlation,.7,TCGA_TGCT_Correlation,.7,TCGA_THCA_Correlation,.7,GTEx_Adrenal_Gland_Correlation,.7,GTEx_Breast_Correlation,.7,GTEx_Colon_Correlation,.7,GTEx_Esophagus_Correlation,.7,GTEx_Brain_Correlation,.7,GTEx_Blood_Correlation,.7,GTEx_Liver_Correlation,.7,GTEx_Lung_Correlation,.7,GTEx_Pancreas_Correlation,.7,GTEx_Prostate_Correlation,.7,GTEx_Skin_Correlation,.7,GTEx_Stomach_Correlation,.7,GTEx_Testis_Correlation,.7,GTEx_Thyroid_Correlation,.7", 
      # the results of the query to rows where the value of the column is greater than the given value.
      #"restriction_lt": "TCGA_GBM_Pvalue,.05, GTEx_Brain_Pvalue,.01",
      # The maximum number of rows to return.
      "limit": 1000000 
}
print("Query request:")
jprint(example_query)
try:
    query_submit = get('interactions/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print(e)
    print ("#returned message")
    print( jprint(e.response.json()))

Query request:
{
  "columns": "TCGA_ACC_Correlation,TCGA_BRCA_Correlation,TCGA_COAD_Correlation,TCGA_ESCA_Correlation,TCGA_GBM_Correlation,TCGA_LAML_Correlation,TCGA_LIHC_Correlation,TCGA_LUAD_Correlation,TCGA_PAAD_Correlation,TCGA_PRAD_Correlation,TCGA_SKCM_Correlation,TCGA_STAD_Correlation,TCGA_TGCT_Correlation,TCGA_THCA_Correlation,GTEx_Adrenal_Gland_Correlation,GTEx_Breast_Correlation,GTEx_Colon_Correlation,GTEx_Esophagus_Correlation,GTEx_Brain_Correlation,GTEx_Blood_Correlation,GTEx_Liver_Correlation,GTEx_Lung_Correlation,GTEx_Pancreas_Correlation,GTEx_Prostate_Correlation,GTEx_Skin_Correlation,GTEx_Stomach_Correlation,GTEx_Testis_Correlation,GTEx_Thyroid_Correlation",
  "restriction_gt": "TCGA_ACC_Correlation,.7,TCGA_BRCA_Correlation,.7,TCGA_COAD_Correlation,.7,TCGA_ESCA_Correlation,.7,TCGA_GBM_Correlation,.7,TCGA_LAML_Correlation,.7,TCGA_LIHC_Correlation,.7,TCGA_LUAD_Correlation,.7,TCGA_PAAD_Correlation,.7,TCGA_PRAD_Correlation,.7,TCGA_SKCM_Correlation,.7,TCGA_STAD_Correlation,.

<a id="query_status"></a>
### Check the status of the simple predefined query

In [18]:
import time
try:
    while True:
        query_status = get('interactions/query/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print("Checking again")
except requests.HTTPError as e:
    print(e)
    print("#returned message")
    print(jprint(e.response.json()))

{
  "message": "Query job is running.",
  "request_id": "836c66b9-ba9a-4d52-95d4-12baff7b0dc3",
  "status": "running"
}
Checking again
{
  "message": "Query job is running.",
  "request_id": "836c66b9-ba9a-4d52-95d4-12baff7b0dc3",
  "status": "running"
}
Checking again
{
  "message": "Query job is running.",
  "request_id": "836c66b9-ba9a-4d52-95d4-12baff7b0dc3",
  "status": "running"
}
Checking again
{
  "message": "Query job is running.",
  "request_id": "836c66b9-ba9a-4d52-95d4-12baff7b0dc3",
  "status": "running"
}
Checking again
{
  "message": "Query job is running.",
  "request_id": "836c66b9-ba9a-4d52-95d4-12baff7b0dc3",
  "status": "running"
}
Checking again
{
  "message": "Extraction job is running.",
  "request_id": "836c66b9-ba9a-4d52-95d4-12baff7b0dc3",
  "status": "running"
}
Checking again
{
  "message": "Extraction job is running.",
  "request_id": "836c66b9-ba9a-4d52-95d4-12baff7b0dc3",
  "status": "running"
}
Checking again
{
  "message": "Extraction job is running.",


<a id="query_results"></a>
### Get the results as dataframe

In [19]:
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
columns = list(result)
columns


['GPID',
 'Gene1',
 'Gene2',
 'TCGA_ACC_Correlation',
 'TCGA_BRCA_Correlation',
 'TCGA_COAD_Correlation',
 'TCGA_ESCA_Correlation',
 'TCGA_GBM_Correlation',
 'TCGA_LAML_Correlation',
 'TCGA_LIHC_Correlation',
 'TCGA_LUAD_Correlation',
 'TCGA_PAAD_Correlation',
 'TCGA_PRAD_Correlation',
 'TCGA_SKCM_Correlation',
 'TCGA_STAD_Correlation',
 'TCGA_TGCT_Correlation',
 'TCGA_THCA_Correlation',
 'GTEx_Adrenal_Gland_Correlation',
 'GTEx_Breast_Correlation',
 'GTEx_Colon_Correlation',
 'GTEx_Esophagus_Correlation',
 'GTEx_Brain_Correlation',
 'GTEx_Blood_Correlation',
 'GTEx_Liver_Correlation',
 'GTEx_Lung_Correlation',
 'GTEx_Pancreas_Correlation',
 'GTEx_Prostate_Correlation',
 'GTEx_Skin_Correlation',
 'GTEx_Stomach_Correlation',
 'GTEx_Testis_Correlation',
 'GTEx_Thyroid_Correlation']

In [20]:
result.head(10)

Unnamed: 0,GPID,Gene1,Gene2,TCGA_ACC_Correlation,TCGA_BRCA_Correlation,TCGA_COAD_Correlation,TCGA_ESCA_Correlation,TCGA_GBM_Correlation,TCGA_LAML_Correlation,TCGA_LIHC_Correlation,...,GTEx_Brain_Correlation,GTEx_Blood_Correlation,GTEx_Liver_Correlation,GTEx_Lung_Correlation,GTEx_Pancreas_Correlation,GTEx_Prostate_Correlation,GTEx_Skin_Correlation,GTEx_Stomach_Correlation,GTEx_Testis_Correlation,GTEx_Thyroid_Correlation
0,10240000002188,2188,1024,,-0.1038,0.2715,0.1771,-0.0056,0.2324,0.1673,...,0.1082,0.8046,0.4699,-0.1003,0.584,-0.3364,0.1581,-0.1202,0.3897,0.0638
1,10240000055120,55120,1024,-0.2393,0.0698,0.0141,-0.0578,0.2579,0.3558,-0.0049,...,0.1069,0.8789,-0.0698,0.0913,0.4098,-0.1256,0.4816,-0.0325,0.1886,0.3548
2,10240000055215,55215,1024,0.1568,0.3583,0.0315,0.0755,0.4437,0.2821,-0.0325,...,0.6209,0.8219,0.1426,0.0986,0.4647,0.1571,0.3938,0.0063,0.371,0.1894
3,10240000057697,57697,1024,0.4603,0.2292,-0.1136,0.0298,0.4228,0.4317,0.389,...,0.7556,0.8911,0.1781,-0.1775,0.5654,0.1609,0.4839,0.1306,0.6534,-0.1103
4,10240000029089,29089,1024,0.2624,0.2862,0.0087,-0.0059,0.3375,0.2709,-0.098,...,0.3328,0.7688,0.3445,-0.1017,0.3023,0.3943,0.4738,-0.0605,0.327,0.2065
5,10240000002176,2176,1024,0.0298,0.2543,,0.1034,0.3063,0.088,0.0986,...,0.6405,0.8316,-0.2262,0.0732,0.4901,-0.0966,0.1695,-0.0218,0.3278,0.1942
6,10240000007516,7516,1024,0.167,0.4253,0.1909,0.0254,0.3551,0.2756,0.2993,...,0.5869,0.814,0.0884,-0.1101,0.1923,0.0623,0.4269,-0.0203,0.5733,0.0671
7,10240000002178,2178,1024,0.1665,0.1735,-0.111,-0.2329,0.1858,0.1163,-0.0003,...,0.5846,0.8052,0.4692,0.0775,0.5588,-0.0317,0.0287,0.133,0.1232,0.0326
8,10240000002177,2177,1024,0.1757,0.4179,-0.0287,0.1781,0.4639,0.2437,0.0795,...,0.6607,0.8185,0.1624,-0.1253,0.4852,-0.0009,0.3874,-0.1038,0.2047,0.0267
9,10240000005889,5889,1024,-0.0754,-0.0164,-0.0524,-0.2117,,0.1523,-0.131,...,0.6072,0.8142,0.3573,-0.187,0.4978,-0.1818,0.3504,0.2912,0.4024,-0.0919


In [70]:
ts = result.iloc[:,3:]>.7
    

In [75]:
ts2 = ts.sum(axis=1)
ts3 = ts2.sort_values
ts3

<bound method Series.sort_values of 0         1
1         1
2         1
3         2
4         1
5         1
6         1
7         1
8         1
9         1
10        2
11        2
12        1
13        1
14        1
15        1
16        2
17        1
18        1
19        1
20        1
21        1
22        1
23        1
24        1
25        1
26        1
27        1
28        1
29        1
         ..
139999    2
140000    1
140001    1
140002    2
140003    1
140004    2
140005    1
140006    1
140007    1
140008    1
140009    1
140010    1
140011    1
140012    2
140013    1
140014    2
140015    1
140016    2
140017    2
140018    1
140019    1
140020    1
140021    1
140022    1
140023    1
140024    2
140025    2
140026    2
140027    2
140028    1
Length: 140029, dtype: int64>