In [320]:
import requests
from pprint import pprint
import json
import pandas as pd

In [321]:
uberon_bto_map = json.loads(open ('bto_uberon_bg.json').read())

In [242]:
def id2term(var, key, return_key, json_blob):
    """
    check json map of uberbon, bto an bg terms for id
    :param var: identifier
    :param key: source of identifier
    :param return_key: key to return in matched object
    :param json_blob: json to search
    :return: bg term name if mapping to that id exists, input var if no mapping exists
    """
    result = None
    for obj in json_blob:
        if obj[key] == var:
            result = obj[return_key]
    return result

In [243]:
tables = requests.get('http://127.0.0.1:5000/metadata/table').json()

In [244]:
table_names = list()
for table in tables:
    table_names.append(table['name'])
table_names

['BigGIM_70_v1',
 'BigGIM_80_v1',
 'BigGIM_90_v1',
 'BigGIM_v1',
 'CRC_70_v1',
 'CRC_80_v1',
 'CRC_90_v1',
 'CRC_v1',
 'FA_70_v1',
 'FA_80_v1',
 'FA_90_v1',
 'FA_v1']

In [245]:
all_columns = list()
sep_columns = dict()
for tn in table_names:
    sep_columns[tn] = list()
    bg_table = requests.get('http://127.0.0.1:5000/metadata/table/{}'.format(tn)).json()
    for column in bg_table['columns']:
        all_columns.append(column['name'])
        sep_columns[tn].append(column['name'])


In [246]:
bg_columns_set = list(set(all_columns))

In [247]:
for cs in bg_columns_set:
    if 'BioGRID' in cs:
        print(cs)

BioGRID_Experimental_System_Type
BioGRID_Interaction
BioGRID_Throughput
BioGRID_Experimental_System


In [248]:
bg_columns_set

['GIANT_tear_gland_ProbabilityOfFunctionalInteraction',
 'GTEx_Nerve_Pvalue',
 'GIANT_medulla_oblongata_ProbabilityOfFunctionalInteraction',
 'GIANT_thyroid_gland_ProbabilityOfFunctionalInteraction',
 'GIANT_blood_vessel_KnownFunctionalInteraction',
 'GIANT_basophil_ProbabilityOfFunctionalInteraction',
 'GIANT_testis_ProbabilityOfFunctionalInteraction',
 'GIANT_glia_KnownFunctionalInteraction',
 'GIANT_all_tissues_KnownFunctionalInteraction',
 'TCGA_PRAD_Pvalue',
 'GIANT_occipital_pole_KnownFunctionalInteraction',
 'GTEx_Breast_Pvalue',
 'TCGA_COAD_Correlation',
 'GIANT_large_intestine_KnownFunctionalInteraction',
 'GIANT_megakaryocyte_KnownFunctionalInteraction',
 'GIANT_skeletal_muscle_ProbabilityOfFunctionalInteraction',
 'GTEx_Adipose_Tissue_Pvalue',
 'GIANT_cardiac_muscle_ProbabilityOfFunctionalInteraction',
 'GIANT_telencephalon_ProbabilityOfFunctionalInteraction',
 'GIANT_oviduct_KnownFunctionalInteraction',
 'GIANT_cecum_KnownFunctionalInteraction',
 'GIANT_corpus_callosum_Prob

In [249]:
column_map = dict()
for col1 in bg_columns_set:
    col = col1.split('_')
    source = col[0]
    column_map[col1] = {
        "source": source,
        "type": None,
        "tissue": None,
        "cancer_type": None,
    }
    if source == 'BioGRID':
        column_map[col1]["type"] = "_".join(col[1:])
    if source == 'GIANT' or source == 'GTEx':
        tissue = dict()
        bg_term = "_".join(col[1:-1]).lower()
        tissue['bg_label'] = bg_term
        tissue['uberon'] = id2term(var=bg_term, key='bg_label', return_key='uberon_id', json_blob=uberon_bto_map)
        tissue['bto'] = id2term(var=bg_term, key='bg_label', return_key='bto_id', json_blob=uberon_bto_map)
        column_map[col1]['bg_term'] = bg_term
        column_map[col1]['tissue'] = tissue
        column_map[col1]['type'] = col[-1]
    if source == 'TCGA':
        cancer_type = "_".join(col[1:-1])
        column_map[col1]['cancer_type'] = cancer_type
        column_map[col1]['type'] = col[-1]
pprint(column_map)

{'BioGRID_Experimental_System': {'cancer_type': None,
                                 'source': 'BioGRID',
                                 'tissue': None,
                                 'type': 'Experimental_System'},
 'BioGRID_Experimental_System_Type': {'cancer_type': None,
                                      'source': 'BioGRID',
                                      'tissue': None,
                                      'type': 'Experimental_System_Type'},
 'BioGRID_Interaction': {'cancer_type': None,
                         'source': 'BioGRID',
                         'tissue': None,
                         'type': 'Interaction'},
 'BioGRID_Throughput': {'cancer_type': None,
                        'source': 'BioGRID',
                        'tissue': None,
                        'type': 'Throughput'},
 'GIANT_adipose_tissue_KnownFunctionalInteraction': {'bg_term': 'adipose_tissue',
                                                     'cancer_type': None,
                

                                                     'source': 'GIANT',
                                                     'tissue': {'bg_label': 'artery',
                                                                'bto': 'BTO:0000573',
                                                                'uberon': 'UBERON:0001637'},
                                                     'type': 'ProbabilityOfFunctionalInteraction'},
 'GIANT_astrocyte_KnownFunctionalInteraction': {'bg_term': 'astrocyte',
                                                'cancer_type': None,
                                                'source': 'GIANT',
                                                'tissue': {'bg_label': 'astrocyte',
                                                           'bto': 'BTO:0000099',
                                                           'uberon': None},
                                                'type': 'KnownFunctionalInteraction'},
 'GIANT_astrocyte_Probabili

                                                                'tissue': {'bg_label': 'cerebellar_cortex',
                                                                           'bto': 'BTO:0000043',
                                                                           'uberon': 'UBERON:0002129'},
                                                                'type': 'ProbabilityOfFunctionalInteraction'},
 'GIANT_cerebellum_KnownFunctionalInteraction': {'bg_term': 'cerebellum',
                                                 'cancer_type': None,
                                                 'source': 'GIANT',
                                                 'tissue': {'bg_label': 'cerebellum',
                                                            'bto': 'BTO:0000232',
                                                            'uberon': 'UBERON:0002037'},
                                                 'type': 'KnownFunctionalInteraction'},
 'GIANT_cerebellum_Prob

                                                                'cancer_type': None,
                                                                'source': 'GIANT',
                                                                'tissue': {'bg_label': 'medulla_oblongata',
                                                                           'bto': 'BTO:0000041',
                                                                           'uberon': 'UBERON:0001896'},
                                                                'type': 'ProbabilityOfFunctionalInteraction'},
 'GIANT_megakaryocyte_KnownFunctionalInteraction': {'bg_term': 'megakaryocyte',
                                                    'cancer_type': None,
                                                    'source': 'GIANT',
                                                    'tissue': {'bg_label': 'megakaryocyte',
                                                               'bto': 'BTO:0000843',
           

                                                     'cancer_type': None,
                                                     'source': 'GIANT',
                                                     'tissue': {'bg_label': 'neuron',
                                                                'bto': 'BTO:0000938',
                                                                'uberon': None},
                                                     'type': 'ProbabilityOfFunctionalInteraction'},
 'GIANT_neutrophil_KnownFunctionalInteraction': {'bg_term': 'neutrophil',
                                                 'cancer_type': None,
                                                 'source': 'GIANT',
                                                 'tissue': {'bg_label': 'neutrophil',
                                                            'bto': 'BTO:0000130',
                                                            'uberon': None},
                                            

                                                     'tissue': {'bg_label': 'uterus',
                                                                'bto': 'BTO:0001424',
                                                                'uberon': 'UBERON:0000995'},
                                                     'type': 'ProbabilityOfFunctionalInteraction'},
 'GIANT_vascular_endothelial_cell_KnownFunctionalInteraction': {'bg_term': 'vascular_endothelial_cell',
                                                                'cancer_type': None,
                                                                'source': 'GIANT',
                                                                'tissue': {'bg_label': 'vascular_endothelial_cell',
                                                                           'bto': 'BTO:0001854',
                                                                           'uberon': None},
                                                          

In [322]:
with open('bg_column_map.json', 'w') as outfile:
    json.dump(column_map, outfile)

In [323]:
# map of columns to metadata objects
meta_columns = json.loads(open ('bg_column_map.json').read())

In [253]:
r1 = requests.get('http://biggim.ncats.io/api/interactions/query?columns=GTEx_Testis_Correlation,GTEx_Testis_Pvalue,TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue&ids1=5111,6996,57697,6815,889,7112,2176,1019,5888,5706,5722,1111,112,3333&ids2=5111,6996,57697,6815,889,7112,2176,1019,5888,5706,3333,1111,112,3333&limit=10000&restriction_gt=TCGA_GBM_Correlation,.2,%20GTEx_Brain_Correlation,.2&restriction_join=union&restriction_lt=TCGA_GBM_Pvalue,.05,%20GTEx_Brain_Pvalue,.01&table=BigGIM_70_v1')
r1 = r1.json()

In [254]:
r2 = requests.get('http://biggim.ncats.io/api/interactions/query/status/{}'.format(r1['request_id']))
r2 = r2.json()
r2

{'processed_data': '0B',
 'request_id': '2b009ac2-4e56-4db7-ae27-9473a82cac92',
 'request_uri': ['https://storage.googleapis.com/ncats_bigquery_results/2b009ac2-4e56-4db7-ae27-9473a82cac92000000000000.csv'],
 'rows': 54,
 'size': '3.3 KB',
 'status': 'complete'}

In [197]:
ex_q1 ='http://biggim.ncats.io/api/interactions/query?columns=BioGRID_Interaction,BioGRID_Experimental_System_Type,BioGRID_Throughput,BioGRID_Experimental_System&ids1=5111,6996,57697,6815,889,7112,2176,1019,5888,5706,5722,1111,112,3333&ids2=5111,6996,57697,6815,889,7112,2176,1019,5888,5706,3333,1111,112,3333&limit=10000&restriction_gt=TCGA_GBM_Correlation,.2,%20GTEx_Brain_Correlation,.2&restriction_join=union&restriction_lt=TCGA_GBM_Pvalue,.05,%20GTEx_Brain_Pvalue,.01&table=BigGIM_70_v1'
r3 = requests.get(ex_q1)
r3 = r3.json()
r3

{'request_id': '514592d5-f6e9-4dc3-8ff9-a39b1b0ef1ff', 'status': 'submitted'}

In [198]:
r4 = requests.get('http://biggim.ncats.io/api/interactions/query/status/{}'.format(r3['request_id']))
r4 = r4.json()
r4

{'processed_data': '0B',
 'request_id': '514592d5-f6e9-4dc3-8ff9-a39b1b0ef1ff',
 'request_uri': ['https://storage.googleapis.com/ncats_bigquery_results/514592d5-f6e9-4dc3-8ff9-a39b1b0ef1ff000000000000.csv'],
 'rows': 54,
 'size': '2.3 KB',
 'status': 'complete'}

In [259]:
r3_pd = pd.read_csv(r4['request_uri'][0])

In [260]:
r3_pd

Unnamed: 0,GPID,Gene1,Gene2,BioGRID_Interaction,BioGRID_Experimental_System_Type,BioGRID_Throughput,BioGRID_Experimental_System
0,58880000007112,7112,5888,,,,
1,58880000057697,57697,5888,,,,
2,57060000006815,6815,5706,,,,
3,57060000007112,7112,5706,,,,
4,57060000057697,57697,5706,,,,
5,57060000006996,6996,5706,,,,
6,69960000007112,7112,6996,,,,
7,69960000057697,57697,6996,,,,
8,11110000002176,2176,1111,,,,
9,11110000006815,6815,1111,,,,


In [365]:

def remove_kv_pair(obj, key):
    obj1 = obj.copy()
    obj1.pop(key)
    return obj1

def pandas2json(request_uri):
    # use pandas to get csv with request uri and serialize into json for return
    pd_df = pd.read_csv(request_uri)
    out_json = json.loads(pd_df.to_json(orient='records'))
    final_json = list()
    for record in out_json:
        d = defaultdict(list)
        new_record = {
            'Gene1': record['Gene1'],
            'Gene2': record['Gene2'],
            'GPID': record['GPID'],
            'interactions': []
        }
        sources = {
            'BioGRID': defaultdict(list),
            'TCGA': defaultdict(list),
            'GTEx': defaultdict(list),
            'GIANT': defaultdict(list)
        }
        for k, v in record.items():
            if k in meta_columns.keys() and v is not None:
                col = meta_columns[k]
                int_source = col['source']
                if int_source == 'BioGRID' and isinstance(v, str):
                    v = v.split(',')
                    v = ",".join(set(v))
                if col['tissue'] is not None:
                    col[col['type']] = v
                    new_col = remove_kv_pair(col, 'type')
                    sources[int_source][col['tissue']['bg_label']].append(new_col)
                if col['cancer_type'] is not None:
                    col[col['type']] = v
                    new_col = remove_kv_pair(col, 'type')
                    sources[int_source][col['cancer_type']].append(new_col)
        for skey in sources.keys():
            for k,v in sources[skey].items():
                for vobj in v[1:]:
                    v[0].update(vobj)
                new_record['interactions'].append(v[0])
        final_json.append(new_record)
    return final_json

In [366]:
pandas2json(request_uri=r2['request_uri'][0])

[{'GPID': 21760000005888,
  'Gene1': 5888,
  'Gene2': 2176,
  'interactions': [{'Correlation': 0.543,
    'Pvalue': 9.83,
    'cancer_type': 'GBM',
    'source': 'TCGA',
    'tissue': None},
   {'Correlation': 0.4106,
    'Pvalue': 7.65,
    'bg_term': 'testis',
    'cancer_type': None,
    'source': 'GTEx',
    'tissue': {'bg_label': 'testis',
     'bto': 'BTO:0001363',
     'uberon': 'UBERON:0000473'}},
   {'Correlation': 0.5403,
    'Pvalue': 95.62,
    'bg_term': 'brain',
    'cancer_type': None,
    'source': 'GTEx',
    'tissue': {'bg_label': 'brain',
     'bto': 'BTO:0000142',
     'uberon': 'UBERON:0000955'}}]},
 {'GPID': 11110000005888,
  'Gene1': 5888,
  'Gene2': 1111,
  'interactions': [{'Correlation': 0.653,
    'Pvalue': 15.2,
    'cancer_type': 'GBM',
    'source': 'TCGA',
    'tissue': None},
   {'Correlation': 0.2869,
    'Pvalue': 3.87,
    'bg_term': 'testis',
    'cancer_type': None,
    'source': 'GTEx',
    'tissue': {'bg_label': 'testis',
     'bto': 'BTO:0001363'

In [273]:
meta_columns

{'BioGRID_Experimental_System': {'cancer_type': None,
  'source': 'BioGRID',
  'tissue': None,
  'type': 'Experimental_System'},
 'BioGRID_Experimental_System_Type': {'cancer_type': None,
  'source': 'BioGRID',
  'tissue': None,
  'type': 'Experimental_System_Type'},
 'BioGRID_Interaction': {'cancer_type': None,
  'source': 'BioGRID',
  'tissue': None,
  'type': 'Interaction'},
 'BioGRID_Throughput': {'cancer_type': None,
  'source': 'BioGRID',
  'tissue': None,
  'type': 'Throughput'},
 'GIANT_adipose_tissue_KnownFunctionalInteraction': {'bg_term': 'adipose_tissue',
  'cancer_type': None,
  'source': 'GIANT',
  'tissue': {'bg_label': 'adipose_tissue',
   'bto': 'BTO:0001487',
   'uberon': 'UBERON:0001013'},
  'type': 'KnownFunctionalInteraction'},
 'GIANT_adipose_tissue_ProbabilityOfFunctionalInteraction': {'bg_term': 'adipose_tissue',
  'cancer_type': None,
  'source': 'GIANT',
  'tissue': {'bg_label': 'adipose_tissue',
   'bto': 'BTO:0001487',
   'uberon': 'UBERON:0001013'},
  'type

In [345]:
th = {
    'a': "itsa",
    'b': "itsb"
}

In [346]:
remove_kv_pair(th, 'a')

{'b': 'itsb'}

In [347]:
th

{'a': 'itsa', 'b': 'itsb'}