In [2]:
import requests
import pandas as pd
pd.options.display.max_colwidth=100

In [3]:
def query_and_format_result(query):
    params = {'query': query, 'format': 'json'}
    results = requests.get('https://query.wikidata.org/sparql', params=params).json()['results']['bindings']
    return pd.DataFrame([{k:v['value'] for k,v in item.items()} for item in results])
    

### Get the gene sets

In [4]:
url = "https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_1_core_complex.txt"
core_genes = list(pd.DataFrame.from_csv(url, sep="\t", header=None)[1])
core_genes_quotes = '"' + '" "'.join(core_genes) + '"'
print(core_genes_quotes)

"FANCA" "FANCB" "FANCC" "FANCE" "FANCF" "FANCG" "FANCL" "FANCM" "FANCD2" "FANCI" "UBE2T"


In [5]:
url = "https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_2_effector_proteins.txt"
effector_genes = list(pd.DataFrame.from_csv(url, sep="\t", header=None)[1])
effector_genes_quotes = '"' + '" "'.join(effector_genes) + '"'
print(effector_genes_quotes)

"BRCA2" "BRIP1" "PALB2" "RAD51C" "SLX4" "ERCC4" "RAD51" "BRCA1" "MAD2L2" "XRCC2" "RFWD3"


In [6]:
url = "https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_3_associated_proteins.txt"
ass_genes = list(pd.DataFrame.from_csv(url, sep="\t", header=None)[1])
ass_genes_quotes = '"' + '" "'.join(ass_genes) + '"'
print(ass_genes_quotes)

"FAAP100" "FAAP24" "FAAP20" "CENPS" "CENPX"


In [7]:
all_genes = set(core_genes) | set(effector_genes) | set(ass_genes)
all_genes_quotes = '"' + '" "'.join(all_genes) + '"'

### Exploratory query showing all variants on these genes

In [8]:
query = """select ?geneLabel ?variantLabel ?variant where {
  values ?hgnc {**hgnc**}
  ?gene wdt:P353 ?hgnc .
  ?variant wdt:P3433 ?gene .
  SERVICE wikibase:label {  bd:serviceParam wikibase:language "en" }
}"""

In [9]:
# Core FA genes
query_and_format_result(query.replace("**hgnc**", core_genes_quotes))

Unnamed: 0,geneLabel,variant,variantLabel
0,FANCC,http://www.wikidata.org/entity/Q28445146,FANCC LOSS-OF-FUNCTION


In [10]:
# All FA genes
query_and_format_result(query.replace("**hgnc**", all_genes_quotes))

Unnamed: 0,geneLabel,variant,variantLabel
0,BRCA1,http://www.wikidata.org/entity/Q28381200,BRCA1 P968FS
1,BRCA1,http://www.wikidata.org/entity/Q28444937,BRCA1 LOSS-OF-FUNCTION
2,BRCA1,http://www.wikidata.org/entity/Q28444960,BRCA1 MUTATION
3,BRCA1,http://www.wikidata.org/entity/Q28445085,BRCA1 EXPRESSION
4,BRCA1,http://www.wikidata.org/entity/Q28445090,BRCA1 UNDEREXPRESSION
5,BRCA1,http://www.wikidata.org/entity/Q28532483,BRCA1 TRUNCATING MUTATION
6,BRCA1,http://www.wikidata.org/entity/Q29938723,BRCA1 M1V
7,BRCA1,http://www.wikidata.org/entity/Q29938724,BRCA1 M1I
8,BRCA1,http://www.wikidata.org/entity/Q29938725,BRCA1 C61G
9,BRCA1,http://www.wikidata.org/entity/Q29938726,BRCA1 C64Y


### Get variants on these genes that are negative therapeutic predictors for drugs where the drug is used to treat any kind of cancer

In [11]:
query = """select ?geneLabel ?variantLabel ?variant ?drugLabel ?cid ?diseaseLabel ?ref where {
  values ?hgnc {**hgnc**}
  ?gene wdt:P353 ?hgnc .
  ?variant wdt:P3433 ?gene .
  ?variant p:P3355 ?s .
  ?s ps:P3355 ?drug .
  ?s prov:wasDerivedFrom/pr:P854 ?ref .
  ?drug wdt:P662 ?cid .
  ?s pq:P2175 ?disease .
  ?disease wdt:P279* wd:Q12078 .
  SERVICE wikibase:label {  bd:serviceParam wikibase:language "en" }
}
"""

In [12]:
# Core FA genes
query_and_format_result(query.replace("**hgnc**", core_genes_quotes))

In [13]:
# All FA genes
query_and_format_result(query.replace("**hgnc**", all_genes_quotes))

Unnamed: 0,cid,diseaseLabel,drugLabel,geneLabel,ref,variant,variantLabel
0,23725625,breast cancer,olaparib,BRCA2,https://civic.genome.wustl.edu/links/evidence/1776,http://www.wikidata.org/entity/Q28444961,BRCA2 MUTATION
1,23725625,breast cancer,olaparib,BRCA1,https://civic.genome.wustl.edu/links/evidence/1775,http://www.wikidata.org/entity/Q28444960,BRCA1 MUTATION


### What about any kind of predictor for any drug?

In [14]:
query = """
select ?geneLabel ?variantLabel ?variant ?drugLabel ?cid ?diseaseLabel ?ref ?pLabel where {
  values ?hgnc {**hgnc**}
  values ?predictor {p:P3355 p:P3354}
  ?gene wdt:P353 ?hgnc .
  ?variant wdt:P3433 ?gene .
  ?variant ?predictor ?s .
  ?p wikibase:claim ?predictor .
  ?s ps:P3355|ps:P3354 ?drug .
  ?s prov:wasDerivedFrom/pr:P854 ?ref .
  ?s pq:P2175 ?disease .
  SERVICE wikibase:label {  bd:serviceParam wikibase:language "en" }
}"""

In [15]:
# Core FA genes
query_and_format_result(query.replace("**hgnc**", core_genes_quotes))

Unnamed: 0,diseaseLabel,drugLabel,geneLabel,pLabel,ref,variant,variantLabel
0,pancreatic cancer,melphalan,FANCC,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1307,http://www.wikidata.org/entity/Q28445146,FANCC LOSS-OF-FUNCTION
1,pancreatic cancer,gemcitabine,FANCC,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1307,http://www.wikidata.org/entity/Q28445146,FANCC LOSS-OF-FUNCTION
2,pancreatic cancer,mitomycin,FANCC,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1307,http://www.wikidata.org/entity/Q28445146,FANCC LOSS-OF-FUNCTION
3,pancreatic cancer,chlorambucil,FANCC,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1307,http://www.wikidata.org/entity/Q28445146,FANCC LOSS-OF-FUNCTION
4,pancreatic cancer,cisplatin,FANCC,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1307,http://www.wikidata.org/entity/Q28445146,FANCC LOSS-OF-FUNCTION


In [16]:
# All FA genes
query_and_format_result(query.replace("**hgnc**", all_genes_quotes))

Unnamed: 0,diseaseLabel,drugLabel,geneLabel,pLabel,ref,variant,variantLabel
0,breast cancer,olaparib,BRCA2,negative therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1776,http://www.wikidata.org/entity/Q28444961,BRCA2 MUTATION
1,breast cancer,olaparib,BRCA1,negative therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1775,http://www.wikidata.org/entity/Q28444960,BRCA1 MUTATION
2,cancer,olaparib,BRCA2,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1371,http://www.wikidata.org/entity/Q28444961,BRCA2 MUTATION
3,cancer,olaparib,BRCA2,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1678,http://www.wikidata.org/entity/Q28444961,BRCA2 MUTATION
4,cancer,olaparib,BRCA2,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1773,http://www.wikidata.org/entity/Q28444961,BRCA2 MUTATION
5,triple-receptor negative breast cancer,cisplatin,BRCA2,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1685,http://www.wikidata.org/entity/Q28444961,BRCA2 MUTATION
6,triple-receptor negative breast cancer,Carboplatin,BRCA2,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1685,http://www.wikidata.org/entity/Q28444961,BRCA2 MUTATION
7,pancreatic cancer,mitomycin,BRCA2,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/1308,http://www.wikidata.org/entity/Q28444938,BRCA2 LOSS-OF-FUNCTION
8,ovarian cancer,olaparib,BRCA1,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/211,http://www.wikidata.org/entity/Q28444937,BRCA1 LOSS-OF-FUNCTION
9,ovarian cancer,olaparib,BRCA1,positive therapeutic predictor,https://civic.genome.wustl.edu/links/evidence/845,http://www.wikidata.org/entity/Q28444937,BRCA1 LOSS-OF-FUNCTION
