## Using Wikidata (data from IUPHAR), find protein-drug interactions with FA genes

In [1]:
import requests
import pandas as pd
pd.options.display.max_colwidth=100

In [7]:
def query_and_format_result(query):
    params = {'query': query, 'format': 'json'}
    results = requests.get('https://query.wikidata.org/sparql', params=params).json()['results']['bindings']
    return pd.DataFrame([{k:v['value'] for k,v in item.items()} for item in results])

### Load FA genes

In [11]:
url = "https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_4_all_genes.txt"
fa_genes = list(pd.DataFrame.from_csv(url, sep="\t", header=None)[1])
print(fa_genes)
genes_quotes = '"' + '" "'.join(fa_genes) + '"'
print(genes_quotes)

['FANCA', 'FANCB', 'FANCC', 'FANCE', 'FANCF', 'FANCG', 'FANCL', 'FANCM', 'FANCD2', 'FANCI', 'UBE2T', 'BRCA2', 'BRIP1', 'PALB2', 'RAD51C', 'SLX4', 'ERCC4', 'RAD51', 'BRCA1', 'MAD2L2', 'XRCC2', 'RFWD3', 'FAAP100', 'FAAP24', 'FAAP20', 'CENPS', 'CENPX']
"FANCA" "FANCB" "FANCC" "FANCE" "FANCF" "FANCG" "FANCL" "FANCM" "FANCD2" "FANCI" "UBE2T" "BRCA2" "BRIP1" "PALB2" "RAD51C" "SLX4" "ERCC4" "RAD51" "BRCA1" "MAD2L2" "XRCC2" "RFWD3" "FAAP100" "FAAP24" "FAAP20" "CENPS" "CENPX"


### As an example, find all drugs that physically interact with HTR3A

In [12]:
query = """SELECT ?drug ?drugLabel ?prot ?protLabel WHERE {
  values ?hgnc {"HTR3A"}
  ?gene wdt:P353 ?hgnc .
  ?gene wdt:P688 ?prot .
  ?drug wdt:P129 ?prot .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""
query_and_format_result(query)

Unnamed: 0,drug,drugLabel,prot,protLabel
0,http://www.wikidata.org/entity/Q41576,cocaine,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
1,http://www.wikidata.org/entity/Q167934,pathogen,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
2,http://www.wikidata.org/entity/Q410011,ondansetron,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
3,http://www.wikidata.org/entity/Q416463,alosetron,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
4,http://www.wikidata.org/entity/Q418459,bilobalide,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
5,http://www.wikidata.org/entity/Q421095,metoclopramide,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
6,http://www.wikidata.org/entity/Q421268,tubocurarine,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
7,http://www.wikidata.org/entity/Q422229,diltiazem,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
8,http://www.wikidata.org/entity/Q2979523,ramosetron,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A
9,http://www.wikidata.org/entity/Q3563148,vortioxetine,http://www.wikidata.org/entity/Q21108125,5-hydroxytryptamine receptor 3A


In [13]:
## same query but for FA genes

In [14]:
query = """SELECT ?drug ?drugLabel ?prot ?protLabel WHERE {
  values ?hgnc {****}
  ?gene wdt:P353 ?hgnc .
  ?gene wdt:P688 ?prot .
  ?drug wdt:P129 ?prot .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}""".replace("****", genes_quotes)
query_and_format_result(query)

In [None]:
## no results.... :(