In order to perform the REST API requests, the bearer token and organization to query against must be set below.

In [22]:
import requests
import pandas as pd
import numpy as np

TOKEN=""
BX_REST="https://rest.biobox.app"
ORG_ID="bx-3079fc5a-1ed1-4e17-a482-e38efdbfe837"

s = requests.Session()
s.headers.update({
  "Authorization": "Bearer {}".format(TOKEN),
  "x-biobox-orgid": ORG_ID,
  "Content-Type": "application/json"
})

The following block of code is to show that the queries from the earlier notebook still function properly

In [23]:
from string import Template
query = Template('(Trait { uuid="EFO:0000384" })<- ${rel} <-(VariantAssociation)-> has association ->(Gene { displayName="NOD2" })')

risk_query = query.substitute(rel="risk of trait")
print("Running", risk_query)
risk_response = s.post("{}/bioref/query/objects".format(BX_REST), json={
  "query": risk_query,
  "class": 'VariantAssociation',
  "pagination": {
      "limit": 100
  }
})
risk_data = risk_response.json()

protective_query = query.substitute(rel="protective against")
print("Running", protective_query)
protective_response = s.post("{}/bioref/query/objects".format(BX_REST), json={
  "query": protective_query,
  "class": 'VariantAssociation',
  "pagination": {
      "limit": 100
  }
})
protective_data = protective_response.json()


unknown_query = query.substitute(rel="of trait")
print("Running", unknown_query)
unknown_response = s.post("{}/bioref/query/objects".format(BX_REST), json={
  "query": unknown_query,
  "class": 'VariantAssociation',
  "pagination": {
      "limit": 100
  }
})
unknown_data = unknown_response.json()

data = risk_data[0]["data"] + protective_data[0]["data"] + unknown_data[0]["data"]
df = pd.DataFrame(data)

Running (Trait { uuid="EFO:0000384" })<- risk of trait <-(VariantAssociation)-> has association ->(Gene { displayName="NOD2" })
Running (Trait { uuid="EFO:0000384" })<- protective against <-(VariantAssociation)-> has association ->(Gene { displayName="NOD2" })
Running (Trait { uuid="EFO:0000384" })<- of trait <-(VariantAssociation)-> has association ->(Gene { displayName="NOD2" })


In [24]:
df.head()

Unnamed: 0,pValueExponent,displayName,oddsRatioConfidenceIntervalUpper,uuid,oddsRatioConfidenceIntervalLower,variantFunctionalConsequenceId,functional_consequence,publicationFirstAuthor,score,oddsRatio,publicationYear,pValueMantissa,projectId,direction_on_trait,variantEffect
0,-17,16_50725820_T_A has association and is risk of...,3.297864,9baa05f95ccba8d940a10d74f3ba008dbab5cae3,2.10554,SO_0001627,intron_variant,UKB Neale v2,0.874949,2.635106,2018,2.57236,NEALE,risk,
1,-26,16_50342044_C_T has association and is risk of...,1.159529,cc00603cc1d1173d6ddee98986236446c962f7ad,1.106746,SO_0001628,intergenic_variant,Liu JZ,0.14824,1.13283,2015,9.253188,GCST,risk,
2,-108,16_50724938_G_A has association and is risk of...,1.520751,99621ea47e6921b2003b08971220290cac3496f1,1.420482,SO_0001627,intron_variant,de Lange KM,0.818072,1.469761,2017,2.84,GCST,risk,
3,-8,16_50729867_G_GC has association and is risk o...,2.57422,f7c3f081897f910cb4788260a28019793a57b964,1.570028,SO_0001589,frameshift_variant,FINNGEN_R6,0.603378,2.010372,2022,3.09,FINNGEN,risk,LoF
4,-10,16_50729867_G_GC has association and is risk o...,7.206691,b6d2b22667a2ef21dcc52f7d0672f77fc457a033,2.860654,SO_0001589,frameshift_variant,FINNGEN_R6,0.694576,4.540468,2022,1.37,FINNGEN,risk,LoF


For the given use case of finding gene information from a set of diseases and corresponding ontology relationship types, the values are set in the variables.

In [25]:
dis = [
    "atopic eczema",
    "inflammatory bowel disease",
    "ulcerative colitis",
]
dis_gene_edge_types = [
    "is implicated in severity of",
    "is marker for succeptibility to",
    "is implicated via orthology disease progression of",
    "is implicated via orthology resistance to",
    "is implicated via orthology succeptibility to",
    "is implicated via orthology resistance to",
    "is implicated in",
    "is implicated via orthology",
    "is marker via orthology disease progression of",
    "is implicated in succeptibility to",
    "is implicated in resistance to",
    "is implicated via orthology penetrance of",
    "is marker via orthology severity of",
    "is marker for resistance to",
    "is marker via orthology succeptibility to",
    "is implicated in onset of",
    # "is not marker for",
    "is marker via orthology",
    "is implicated via orthology severity of",
    "is implicated via orthology onset of",
    "is marker for severity of",
    # "is not implicated in",
    "is marker for disease progression of",
    "is implicated in disease progression of",
    # add rest
]

Using these variables, we can generate a list of all possible queries we'd like to perform. We test the query against one of them to ensure that we do get a response.

In [26]:
allQueries = []
for disease in dis:
    for edgetype in dis_gene_edge_types:
        query = f"(Disease {{displayName=\"{disease}\"}})<- {edgetype} <-(Gene)"
        allQueries.append(query)

# Testing query
test_query = s.post("{}/bioref/query/objects".format(BX_REST), json={
  "query": allQueries[0],
  "class": 'Gene',
  "pagination": {
      "limit": 100
  }
})
test_query_data = test_query.json()
test_query_data

[{'class': 'Gene',
  'variable': 'g',
  'statement': "MATCH\n(d:`Disease`)<-[_r:`is implicated in severity of`]-(g:`Gene`)\nWHERE ((d.`displayName`='atopic eczema'))RETURN DISTINCT g\nSKIP 0\nLIMIT 100",
  'countStatement': "MATCH\n(d:`Disease`)<-[_r:`is implicated in severity of`]-(g:`Gene`)\nWHERE ((d.`displayName`='atopic eczema'))RETURN count(DISTINCT g)",
  'total': 4,
  'offset': 0,
  'count': 100,
  'data': [{'biotype': 'protein_coding',
    'symbol': 'IL13',
    'level': 'manually annotated loci',
    'displayName': 'IL13',
    'start': 132656263,
    'source': 'HAVANA',
    'chr': '5',
    'uuid': 'ENSG00000169194',
    'strand': 1,
    'ensembl_gene_id_version': 'ENSG00000169194.10',
    'HGNC': 'HGNC:5973',
    'end': 132661110,
    'tag': ['']},
   {'biotype': 'protein_coding',
    'symbol': 'IL18',
    'level': 'verified loci',
    'displayName': 'IL18',
    'start': 112143253,
    'source': 'HAVANA',
    'chr': '11',
    'uuid': 'ENSG00000150782',
    'strand': -1,
    'e