In [65]:
import csv
import bmt
import requests
import linkml
from linkml_runtime import SchemaView
from collections import defaultdict
from typing import Tuple, List
import requests
import yaml
from pprint import pprint
from biothings_explorer.smartapi_kg.dataload import load_specs
import pandas

tk = bmt.Toolkit('https://raw.githubusercontent.com/biolink/biolink-model/2.2.12/biolink-model.yaml')
tsv_file = open("predicates.tsv", "w")
tsv_attributes = open("attributes.tsv", "w")
tsv_writer = csv.writer(tsv_file, delimiter='\t')
tsv_writer_att = csv.writer(tsv_attributes, delimiter='\t')

In [14]:

def sample_predicates():
    specs = load_specs()
    kp_titles = []
    for spec in specs:
        if 'x-translator' not in spec['info']:
            continue
        if spec['info']['x-translator']['component'] == 'KP':
            kp_titles.append(spec['info']['title'])
        if 'servers' not in spec:
            continue
        else:
            url = spec['servers'][0]['url']
            apititle = '_'.join(spec['info']['title'].split())
            if url.endswith('/'):
                url = url[:-1]
            predicates_url = f'{url}/meta_knowledge_graph'
            print(predicates_url)
            trapi, predicates = get_predicates(predicates_url)
            # if is_trapi:
            if not predicates:
                continue
            else:
                print("got predicates")
                preds, attribs, url = dump_trapi_predicate_results(predicates, predicates_url)
                predicates = set(preds)
                attributes = set(attribs)
                for pred in predicates:
                    tsv_writer.writerow([apititle, url, pred])
                for attrib in attributes:
                    tsv_writer_att.writerow([apititle, url, attrib])
            # else:
            #     dump_smartapi_predicate_results(spec['info']['title'])


In [15]:

def in_biolink_model(predicate):
    is_predicate = tk.is_predicate(predicate)
    return is_predicate


In [16]:

def dump_trapi_predicate_results(predicates, url):
    preds = []
    attribs = []
    for edge in predicates.get('edges'):
        predicate = edge.get('predicate')
        subject = edge.get('subject')
        tobject = edge.get('object')
        if 'attributes' in edge and edge.get('attributes') is not None:
            for attribute in edge.get('attributes'):
                attribs.append(attribute.get('attribute_type_id'))
        preds.append(predicate)
    return preds, attribs, url

            # tsv_writer.writerow([subject, predicate, tobject, url])


    

In [18]:
def get_predicates(pr_url):
    try:
        response = requests.get(pr_url)
        if response.status_code == 200:
            return True, response.json()
        else:
            return False, {}
    except:
        return False, {}


In [19]:

if __name__ == '__main__':
    sample_predicates()


/socio_environmental_exposures_api/v1/meta_knowledge_graph
/cmaq_exposures_api/v1/meta_knowledge_graph
/roadway_proximity_api/v1/meta_knowledge_graph
https://name-resolution-sri.renci.org/meta_knowledge_graph
https://icees-pcd.renci.org/meta_knowledge_graph
got predicates
https://icees-dili-dev.apps.renci.org/meta_knowledge_graph
got predicates
https://www.ebi.ac.uk/proteins/api/meta_knowledge_graph
https://icees-asthma-dev.apps.renci.org/meta_knowledge_graph
got predicates
https://icees-asthma.renci.org/meta_knowledge_graph
got predicates
https://translator-workflow-runner.renci.org/meta_knowledge_graph
https://icees-pcd-dev.apps.renci.org/meta_knowledge_graph
got predicates
https://www.ebi.ac.uk/ols/api/meta_knowledge_graph
https://www.ncbi.nlm.nih.gov/research/bionlp/litvar/api/v1/meta_knowledge_graph
https://icees-dili.renci.org/meta_knowledge_graph
got predicates
https://www.ebi.ac.uk/QuickGO/services/meta_knowledge_graph
https://ontology-kp.apps.renci.org/meta_knowledge_graph
got

In [27]:
sv = SchemaView('https://raw.githubusercontent.com/biolink/biolink-model/master/biolink-model.yaml')

In [35]:
for s in sv.all_slots().values():
    if 'related to' in sv.slot_ancestors(s.name):
        print(s.name)
        print(sv.slot_children(s.name))

related to
['related to at concept level', 'related to at instance level', 'disease has location', 'composed primarily of']
related to at concept level
['superclass of', 'subclass of', 'close match', 'broad match', 'narrow match']
related to at instance level
['associated with', 'opposite of', 'has real world evidence of association with', 'target for', 'has target', 'active in', 'acts upstream of', 'has upstream actor', 'mentions', 'mentioned by', 'contributor', 'has contributor', 'interacts with', 'affects', 'affected by', 'diagnoses', 'is diagnosed by', 'increases amount or activity of', 'decreases amount or activity of', 'gene product of', 'has gene product', 'transcribed to', 'transcribed from', 'translates to', 'translation of', 'coexists with', 'genetic association', 'affects risk for', 'risk affected by', 'contributes to', 'contribution from', 'caused by', 'treated by', 'has phenotype', 'phenotype of', 'occurs in', 'contains process', 'located in', 'location of', 'similar to', 

[]
has excipient
[]
manifestation of
[]
has manifestation
[]
produces
[]
produced by
[]
consumes
[]
consumed by
[]
temporally related to
['precedes', 'preceded by']
precedes
[]
preceded by
[]
directly interacts with
[]
affects expression in
[]
has variant part
[]
variant part of
[]
related condition
[]
is sequence variant of
['is missense variant of', 'is synonymous variant of', 'is nonsense variant of', 'is frameshift variant of', 'is splice site variant of', 'is nearby variant of', 'is non coding variant of']
has sequence variant
['has missense variant', 'has synonymous variant', 'has nonsense variant', 'has frameshift variant', 'has splice site variant', 'has nearby variant', 'has non coding variant']
is missense variant of
[]
has missense variant
[]
is synonymous variant of
[]
has synonymous variant
[]
is nonsense variant of
[]
has nonsense variant
[]
is frameshift variant of
[]
has frameshift variant
[]
is splice site variant of
[]
has splice site variant
[]
is nearby variant of
[

In [55]:
def load_predicate_tree_data(biolink_version: str) -> Tuple[List[dict], str]:
    # Grab Biolink yaml file and load into dictionary tree structures
    response = requests.get(f"https://raw.githubusercontent.com/biolink/biolink-model/"
                            f"{biolink_version if biolink_version else 'master'}/biolink-model.yaml",
                            timeout=10)
    if response.status_code == 200:
        # Build predicates tree
        biolink_model = yaml.safe_load(response.text)
        parent_to_child_dict = defaultdict(set)
        for slot_name_english, info in biolink_model["slots"].items():
            slot_name = convert_predicate_to_trapi_format(slot_name_english)
            parent_name_english = info.get("is_a")
            if parent_name_english:
                parent_name = convert_predicate_to_trapi_format(parent_name_english)
                parent_to_child_dict[parent_name].add(slot_name)
        root_node = {"name": "related_to"}
        predicate_tree = get_tree_node_recursive(root_node, parent_to_child_dict)

        biolink_version = biolink_model["version"]
        return [predicate_tree], biolink_version
    else:
        return [], ""

In [58]:
def get_tree_node_recursive(root_node: dict, parent_to_child_map: dict):
    root_name = root_node["name"]
    children_predicates = parent_to_child_map.get(root_name, [])
    if children_predicates:
        children = []
        for child_predicate in children_predicates:
            child_node = {"name": child_predicate, "parent": root_name}
            child_node = get_tree_node_recursive(child_node, parent_to_child_map)
            children.append(child_node)
        root_node["children"] = sorted(children, key=lambda x: x["name"])
    return root_node

In [66]:
ptd = load_predicate_tree_data("master")
dataframe = pandas.DataFrame()
for d in ptd[0]:
    record = pandas.json_normalize(d)
    dataframe = dataframe.append(record, ignore_index=True)

  dataframe = dataframe.append(record, ignore_index=True)


In [45]:
def convert_predicate_to_trapi_format(english_predicate: str) -> str:
    # Converts a string like "treated by" to "treated_by"
    return english_predicate.replace(' ', '_')