In [1]:
import requests
from pprint import pprint
import pandas as pd
from ontobio.ontol_factory import OntologyFactory

In [2]:
# retrieve tissues from BigGIM API (these are BTO Ontology terms with underscores inplace of ' ' and '-')
bg_tissues = requests.get('http://biggim.ncats.io/api/metadata/tissue').json()['tissues']
len(bg_tissues)

350

In [3]:
# use ontobio to get mappings from bto and uberon ontologies
ofactory = OntologyFactory()
ub_ont = ofactory.create("uberon")
bto_ont = ofactory.create("bto")

In [5]:
# create dictionary of bto ids and labels ex: {'brain': BTO:0000142'}
bto_nodes = bto_ont.nodes()
bto_term_label = dict()
for bto in bto_nodes:
    bto_term_label[bto_ont.label(bto)] = bto

In [8]:
bto_term_label['brain']

'BTO:0000142'

In [10]:
def get_ont_xref(xref_id, ont_factory):
    """
    search for xref in given ontology and return an object with the node id and label
    """
    ont_nodes = ont_factory.nodes()
    result = {'id': None, 'label': None}
    for node in ont_nodes:
        if xref_id in ont_factory.xrefs(node):
            result['id'] = node
            result['label'] = ont_factory.label(node)
    return result

In [11]:
# map bigGIM terms to bto terms/ids, then uberon terms/ids
bto2bg = list()
for tissue in bg_tissues:
    result = {
        'bg_label': tissue,
        'bto_label': None,
        'bto_id': None,
        'uberon_id': None,
        'uberon_label': None
    }
    # ex B_lymphocite -> B lymphocite
    tissue1 = tissue.replace('_', ' ')
    if tissue1 in bto_term_label.keys():
        result['bto_label'] = tissue1
        result['bto_id'] = bto_term_label[tissue1]
        uberon = get_ont_xref(xref_id=bto_term_label[tissue1], ont_factory=ub_ont)
        result['uberon_id'] = uberon['id']
        result['uberon_label'] = uberon['label']
    else:
        # ex: B lymphocite -> B-lymphocite
        tissue2 = tissue1.replace(' ', '-', 1)
        if tissue2 in bto_term_label.keys():
            result['bto_label'] = tissue2
            result['bto_id'] = bto_term_label[tissue2]
            uberon = get_ont_xref(xref_id=bto_term_label[tissue2], ont_factory=ub_ont)
            result['uberon_id'] = uberon['id']
            result['uberon_label'] = uberon['label']
    bto2bg.append(result)
with open('bto_uberon_bg.json', 'w') as outfile:
    json.dump(bto2bg, outfile)

In [12]:
bto_df

Unnamed: 0,bto_label,bto_id,bg_label,uberon_id,uberon_label
0,B-lymphocyte,BTO:0000776,B_lymphocyte,,
1,B-lymphoma cell line,BTO:0001518,B_lymphoma_cell_line,,
2,T-lymphocyte,BTO:0000782,T_lymphocyte,,
3,acute myeloid leukemia cell,BTO:0001545,acute_myeloid_leukemia_cell,,
4,adenocarcinoma cell,BTO:0000604,adenocarcinoma_cell,,
5,adipose tissue,BTO:0001487,adipose_tissue,UBERON:0001013,adipose tissue
6,adrenal cortex,BTO:0000045,adrenal_cortex,UBERON:0001235,adrenal cortex
7,adrenal gland,BTO:0000047,adrenal_gland,UBERON:0002369,adrenal gland
8,adrenal gland cancer cell,BTO:0000592,adrenal_gland_cancer_cell,,
9,adrenocortical carcinoma cell,BTO:0002219,adrenocortical_carcinoma_cell,,
