In [1]:
import requests

tk_bio = "https://kba.ncats.io/statements"
riluzole = "wd:Q415744"

params = {
    'c': riluzole
}

solr_request = requests.get(tk_bio, params=params)
response = solr_request.json()

# Get list of wd ids where drug is a therapeutic area
disease_set = {doc['object']['id'] for doc in response
                   if doc['predicate']['name'] == 'therapeutic area' or
                       doc['predicate']['name'] == 'medical condition treated'
              }

disease_set

{'wd:Q206901'}

In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Get DOIDs from wikidata

omim_set = set()

sparql = SPARQLWrapper("http://query.wikidata.org/sparql")

for disease in disease_set:

    doid_xrefs = """
        SELECT ?disease
        WHERE
        {{
            {} wdt:P492 ?disease
        }}
        LIMIT 20
    """.format(disease)

    sparql.setQuery(doid_xrefs)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        omim_set.add("OMIM:{}".format(result["disease"]["value"]))

omim_set


{'OMIM:105400'}

In [3]:
import biolink_client
from biolink_client.api_client import ApiClient
from biolink_client.rest import ApiException

# Get phenotypes for each disease

MONARCH_API = "https://api.monarchinitiative.org/api"

disease_hpo_map = {}


client = ApiClient(host=MONARCH_API)
client.set_default_header('Content-Type', 'text/plain')
api_instance = biolink_client.BioentityApi(client)

for disease in omim_set:
    api_response = api_instance.get_disease_phenotype_associations(disease, rows=500)
    disease_hpo_map[disease] = api_response.objects
    
disease_hpo_map

{'OMIM:105400': ['HP:0001257',
  'HP:0001324',
  'HP:0001347',
  'HP:0002314',
  'HP:0002380',
  'HP:0002398',
  'HP:0003202',
  'HP:0003394',
  'HP:0007024',
  'HP:0007354',
  'HP:0010535']}

In [4]:
import requests
import pandas as pd

# Get similar diseases

OWLSIM_API = "http://owlsim3.monarchinitiative.org/api"

# Use phenodigm algorithm, with a cutoff of 70/100
matcher = 'phenodigm'
score_cutoff = 50

result_set = []


for disease, profile in disease_hpo_map.items():
    params = {
        'id': profile
    }
    url = "{}/match/{}".format(OWLSIM_API, matcher)
    req = requests.get(url, params=params)
    owlsim_results = req.json()
    for match in owlsim_results['matches']:
        try:
            if (match['matchId'].startswith("OMIM") \
                    or  match['matchId'].startswith("Orphanet") \
                    or  match['matchId'].startswith("DOID")) \
                    and match['rawScore'] >= score_cutoff:
                result = [disease, match['matchId'],
                          match['matchLabel'], match['rawScore']]
                result_set.append(result)
        except TypeError as e:
            # TypeError when score is NaN
            #print(e)
            #print(match)
            continue
            
# Create a table of query gene, matched gene, and sim score
column_names = ['query_disease', 'match_disease', 'match_label', 'sim_score']
result_frame = pd.DataFrame(data=result_set, columns=column_names)

result_frame.head(30)

Unnamed: 0,query_disease,match_disease,match_label,sim_score
0,OMIM:105400,OMIM:105400,Amyotrophic lateral sclerosis type 1,100.0
1,OMIM:105400,OMIM:614808,Amyotrophic Lateral Sclerosis 18,62.304265
2,OMIM:105400,OMIM:613954,Amyotrophic Lateral Sclerosis 14 With or Witho...,60.378501
3,OMIM:105400,OMIM:312920,"Spastic Paraplegia 2, X-Linked",60.187194
4,OMIM:105400,Orphanet:85162,Facial onset sensory and motor neuronopathy,59.714106
5,OMIM:105400,OMIM:603563,hereditary spastic paraplegia 8,59.450566
6,OMIM:105400,OMIM:182600,Spastic paraplegia 3,58.100485
7,OMIM:105400,OMIM:182601,"Spastic paraplegia 4, autosomal dominant",57.087924
8,OMIM:105400,OMIM:607259,"Spastic Paraplegia 7, Autosomal Recessive",57.087924
9,OMIM:105400,OMIM:600363,hereditary spastic paraplegia 6,55.833389
