In [48]:
import requests
from pprint import pprint
import json
import pandas as pd

In [49]:
uberon_bto_map = json.loads(open ('bto_uberon_bg.json').read())

In [50]:
def id2term(var, key, return_key, json_blob):
    """
    check json map of uberbon, bto an bg terms for id
    :param var: identifier
    :param key: source of identifier
    :param return_key: key to return in matched object
    :param json_blob: json to search
    :return: bg term name if mapping to that id exists, input var if no mapping exists
    """
    result = None
    for obj in json_blob:
        if obj[key] == var:
            result = obj[return_key]
    return result

In [51]:
tables = requests.get('http://127.0.0.1:5000/metadata/table').json()

In [52]:
table_names = list()
for table in tables:
    table_names.append(table['name'])
table_names

['BigGIM_70_v1',
 'BigGIM_80_v1',
 'BigGIM_90_v1',
 'BigGIM_v1',
 'CRC_70_v1',
 'CRC_80_v1',
 'CRC_90_v1',
 'CRC_v1',
 'FA_70_v1',
 'FA_80_v1',
 'FA_90_v1',
 'FA_v1']

In [53]:
all_columns = list()
sep_columns = dict()
for tn in table_names:
    sep_columns[tn] = list()
    bg_table = requests.get('http://127.0.0.1:5000/metadata/table/{}'.format(tn)).json()
    for column in bg_table['columns']:
        all_columns.append(column['name'])
        sep_columns[tn].append(column['name'])


In [54]:
bg_columns_set = list(set(all_columns))

In [55]:
bg_columns_set

['GIANT_tear_gland_ProbabilityOfFunctionalInteraction',
 'GTEx_Nerve_Pvalue',
 'GIANT_medulla_oblongata_ProbabilityOfFunctionalInteraction',
 'GIANT_thyroid_gland_ProbabilityOfFunctionalInteraction',
 'GIANT_blood_vessel_KnownFunctionalInteraction',
 'GIANT_basophil_ProbabilityOfFunctionalInteraction',
 'GIANT_testis_ProbabilityOfFunctionalInteraction',
 'GIANT_glia_KnownFunctionalInteraction',
 'GIANT_all_tissues_KnownFunctionalInteraction',
 'TCGA_PRAD_Pvalue',
 'GIANT_occipital_pole_KnownFunctionalInteraction',
 'GTEx_Breast_Pvalue',
 'TCGA_COAD_Correlation',
 'GIANT_large_intestine_KnownFunctionalInteraction',
 'GIANT_megakaryocyte_KnownFunctionalInteraction',
 'GIANT_skeletal_muscle_ProbabilityOfFunctionalInteraction',
 'GTEx_Adipose_Tissue_Pvalue',
 'GIANT_cardiac_muscle_ProbabilityOfFunctionalInteraction',
 'GIANT_telencephalon_ProbabilityOfFunctionalInteraction',
 'GIANT_oviduct_KnownFunctionalInteraction',
 'GIANT_cecum_KnownFunctionalInteraction',
 'GIANT_corpus_callosum_Prob

In [56]:
column_map = dict()
for col1 in bg_columns_set:
    col = col1.split('_')
    source = col[0]
    column_map[col1] = {
        "source": source,
        "type": None,
        "tissue": None,
        "cancer_type": None,
    }
    if source == 'BioGRID':
        column_map[col1]["type"] = "_".join(col[1:])
    if source == 'GIANT' or source == 'GTEx':
        tissue = dict()
        bg_term = "_".join(col[1:-1]).lower()
        tissue['bg_label'] = bg_term
        tissue['uberon'] = id2term(var=bg_term, key='bg_label', return_key='uberon_id', json_blob=uberon_bto_map)
        tissue['bto'] = id2term(var=bg_term, key='bg_label', return_key='bto_id', json_blob=uberon_bto_map)
        column_map[col1]['bg_term'] = bg_term
        column_map[col1]['tissue'] = tissue
        column_map[col1]['type'] = col[-1]
    if source == 'TCGA':
        cancer_type = "_".join(col[1:-1])
        column_map[col1]['cancer_type'] = cancer_type
        column_map[col1]['type'] = col[-1]
pprint(column_map)

{'BioGRID_Experimental_System': {'cancer_type': None,
                                 'source': 'BioGRID',
                                 'tissue': None,
                                 'type': 'Experimental_System'},
 'BioGRID_Experimental_System_Type': {'cancer_type': None,
                                      'source': 'BioGRID',
                                      'tissue': None,
                                      'type': 'Experimental_System_Type'},
 'BioGRID_Interaction': {'cancer_type': None,
                         'source': 'BioGRID',
                         'tissue': None,
                         'type': 'Interaction'},
 'BioGRID_Throughput': {'cancer_type': None,
                        'source': 'BioGRID',
                        'tissue': None,
                        'type': 'Throughput'},
 'GIANT_adipose_tissue_KnownFunctionalInteraction': {'bg_term': 'adipose_tissue',
                                                     'cancer_type': None,
                

                                                              'tissue': {'bg_label': 'corpus_callosum',
                                                                         'bto': 'BTO:0000615',
                                                                         'uberon': 'UBERON:0002336'},
                                                              'type': 'ProbabilityOfFunctionalInteraction'},
 'GIANT_corpus_luteum_KnownFunctionalInteraction': {'bg_term': 'corpus_luteum',
                                                    'cancer_type': None,
                                                    'source': 'GIANT',
                                                    'tissue': {'bg_label': 'corpus_luteum',
                                                               'bto': 'BTO:0000292',
                                                               'uberon': 'UBERON:0002512'},
                                                    'type': 'KnownFunctionalInteraction'},
 'GIAN

                                                 'source': 'GIANT',
                                                 'tissue': {'bg_label': 'macrophage',
                                                            'bto': 'BTO:0000801',
                                                            'uberon': None},
                                                 'type': 'KnownFunctionalInteraction'},
 'GIANT_macrophage_ProbabilityOfFunctionalInteraction': {'bg_term': 'macrophage',
                                                         'cancer_type': None,
                                                         'source': 'GIANT',
                                                         'tissue': {'bg_label': 'macrophage',
                                                                    'bto': 'BTO:0000801',
                                                                    'uberon': None},
                                                         'type': 'ProbabilityOfFunctionalInter

 'GIANT_uterine_cervix_ProbabilityOfFunctionalInteraction': {'bg_term': 'uterine_cervix',
                                                             'cancer_type': None,
                                                             'source': 'GIANT',
                                                             'tissue': {'bg_label': 'uterine_cervix',
                                                                        'bto': 'BTO:0001421',
                                                                        'uberon': 'UBERON:0000002'},
                                                             'type': 'ProbabilityOfFunctionalInteraction'},
 'GIANT_uterine_endometrium_KnownFunctionalInteraction': {'bg_term': 'uterine_endometrium',
                                                          'cancer_type': None,
                                                          'source': 'GIANT',
                                                          'tissue': {'bg_label': 'uterine_endom

In [57]:
with open('bg_column_map.json', 'w') as outfile:
    json.dump(column_map, outfile)

In [104]:
things = [{'a':'B'},{'a':'B'},{'a':'C'}]

def search_things(key1, key2, value, data):
    return [item for item in data if item[key1][key2] == value]

In [77]:
search_things(key='a', value='B', data=things)

[{'a': 'B'}, {'a': 'B'}]

In [100]:

# map of columns to metadata objects
meta_columns = json.loads(open ('bg_column_map.json').read())

In [92]:
r1 = requests.get('http://biggim.ncats.io/api/interactions/query?columns=GTEx_Testis_Correlation,GTEx_Testis_Pvalue,TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue&ids1=5111,6996,57697,6815,889,7112,2176,1019,5888,5706,5722,1111,112,3333&ids2=5111,6996,57697,6815,889,7112,2176,1019,5888,5706,3333,1111,112,3333&limit=10000&restriction_gt=TCGA_GBM_Correlation,.2,%20GTEx_Brain_Correlation,.2&restriction_join=union&restriction_lt=TCGA_GBM_Pvalue,.05,%20GTEx_Brain_Pvalue,.01&table=BigGIM_70_v1')
r1 = r1.json()

In [97]:
r2 = requests.get('http://biggim.ncats.io/api/interactions/query/status/{}'.format(r1['request_id']))
r2 = r2.json()
r2

{'processed_data': '0B',
 'request_id': '31c409c1-8899-4ca9-8ebc-be145ba842e8',
 'request_uri': ['https://storage.googleapis.com/ncats_bigquery_results/31c409c1-8899-4ca9-8ebc-be145ba842e8000000000000.csv'],
 'rows': 54,
 'size': '3.3 KB',
 'status': 'complete'}

In [192]:
from collections import defaultdict
def pandas2json(request_uri):
    # use pandas to get csv with request uri and serialize into json for return
    pd_df = pd.read_csv(request_uri)
    out_json = json.loads(pd_df.to_json(orient='records'))

    final_json = list()
    for record in out_json:
        d = defaultdict(list)
        new_record = {
            'Gene1': record['Gene1'],
            'Gene2': record['Gene2'],
            'GPID': record['GPID'],
            'GIANT': [],
            'GTEx': [],
            'BioGRID': [],
            'TCGA': []
        }
        sources = {
            'GIANT': [],
            'GTEx': [],
            'BioGRID': [],
            'TCGA': []
        }
        for k, v in record.items():
            if k in meta_columns.keys():
                col = meta_columns[k]
                sources[col['source']].append({
                    col['type']:  v,
                    'cancer_type': col['cancer_type'],
                    'tissue':  col['tissue']
                })
        for sor in list(sources.keys()):
            for l1, l2 in zip(sources[sor], sources[sor][1:]):
                if l1['tissue'] is not None and l2['tissue'] is not None:
                    if l1['tissue']['bg_label'] == l2['tissue']['bg_label']:
                        l1.update(l2)
                        new_record[sor].append(l1)
                if l1['cancer_type'] is not None and l2['cancer_type'] is not None:
                    if l1['cancer_type'] == l2['cancer_type']:
                        l1.update(l2)
                        new_record[sor].append(l1)
        final_json.append(new_record)

    return final_json

In [193]:
pandas2json(request_uri=r2['request_uri'][0])

[{'BioGRID': [],
  'GIANT': [],
  'GPID': 21760000005888,
  'GTEx': [{'Correlation': 0.4106,
    'Pvalue': 7.65,
    'cancer_type': None,
    'tissue': {'bg_label': 'testis',
     'bto': 'BTO:0001363',
     'uberon': 'UBERON:0000473'}},
   {'Correlation': 0.5403,
    'Pvalue': 95.62,
    'cancer_type': None,
    'tissue': {'bg_label': 'brain',
     'bto': 'BTO:0000142',
     'uberon': 'UBERON:0000955'}}],
  'Gene1': 5888,
  'Gene2': 2176,
  'TCGA': [{'Correlation': 0.543,
    'Pvalue': 9.83,
    'cancer_type': 'GBM',
    'tissue': None}]},
 {'BioGRID': [],
  'GIANT': [],
  'GPID': 11110000005888,
  'GTEx': [{'Correlation': 0.2869,
    'Pvalue': 3.87,
    'cancer_type': None,
    'tissue': {'bg_label': 'testis',
     'bto': 'BTO:0001363',
     'uberon': 'UBERON:0000473'}},
   {'Correlation': -0.0131,
    'Pvalue': 0.19,
    'cancer_type': None,
    'tissue': {'bg_label': 'brain',
     'bto': 'BTO:0000142',
     'uberon': 'UBERON:0000955'}}],
  'Gene1': 5888,
  'Gene2': 1111,
  'TCGA': [