In [48]:
import requests
import csv
from pprint import pprint
from io import StringIO
import pandas as pd

In [10]:
# methods for various utilities
# get gene sets from github and convert to dict
def gather_gene_sets(url):
    r = requests.get(url)
    f = StringIO(r.text)
    reader = csv.reader(f, delimiter='\t')
    dict_rows = list()
    for row in reader:
        dict_rows.append(
            {
                'curie': row[0],
                'name': row[1]
            }
        )
    return dict_rows

In [11]:
# Core FA Gene set
FA_1_core_complex = "https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_1_core_complex.txt"
fa_genes = gather_gene_sets(FA_1_core_complex)

In [51]:
#  return gene disease association data from biolink
def query_biolink_gene_disease(gene_curie):
    bl_url = 'https://api.monarchinitiative.org/api/bioentity/gene/{}/diseases/'
    params = {
        'fetch_objects': True,   
    }
    r = requests.get(url=bl_url.format(gene_curie), params=params)
    return r.json()

In [1]:
# map of relation terms
term_map_reversed = {
    "GENO:0000840": "pathogenic",
    "GENO:0000841": "likely pathogenic",
    "GENO:0000843": "benign",
    "GENO:0000844": "likely benign",
    "GENO:0000845": "uncertain significance"    
}

In [54]:
result_set = []
# iterate through fa genes
for gene in fa_genes:
    bl_dat = query_biolink_gene_disease(gene['curie'])
    for assoc in bl_dat['associations']:
        edges = assoc['evidence_graph']['edges']
        nodes = assoc['evidence_graph']['nodes']
        node_map = dict()
        # create map of node ids and labels
        for node in nodes:
            node_map[node['id']] = node['lbl']
        # generate table of gene -> variant -> disease associations
        for edge in edges:
            # check for 
            if edge['pred'] in term_map_reversed.keys():
                row = [gene['name'], gene['curie'], node_map[edge['sub']], edge['sub'], term_map_reversed[edge['pred']], edge['pred'], node_map[edge['obj']], edge['obj']]
                result_set.append(row)
    column_names = ['gene_name', 'gene_curie', 'variant_name', 'variant_curie', 
                    'relation_label', 'relation_curie', 'disease_label', 'disease_curie']
result_frame = pd.DataFrame(data=result_set, columns=column_names)
result_frame.head(30)

Unnamed: 0,gene_name,gene_curie,variant_name,variant_curie,relation_label,relation_curie,disease_label,disease_curie
0,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.1115_1118delTTGG (p.Val37...,ClinVarVariant:3440,pathogenic,GENO:0000840,"Fanconi Anemia, Complementation Group a",OMIM:227650
1,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.1615delG (p.Asp539Thrfs),ClinVarVariant:3443,pathogenic,GENO:0000840,"Fanconi Anemia, Complementation Group a",OMIM:227650
2,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.4015delC (p.Leu1339Serfs),ClinVarVariant:208638,likely pathogenic,GENO:0000841,"Fanconi Anemia, Complementation Group a",OMIM:227650
3,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.513G>A (p.Trp171Ter),ClinVarVariant:3447,pathogenic,GENO:0000840,"Fanconi Anemia, Complementation Group a",OMIM:227650
4,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.2839dupT (p.Ser947Phefs),ClinVarVariant:188383,likely pathogenic,GENO:0000841,"Fanconi Anemia, Complementation Group a",OMIM:227650
5,FANCA,NCBIGene:2175,"FANCA, IVS7DS, G-A, +5",ClinVarVariant:192385,pathogenic,GENO:0000840,"Fanconi Anemia, Complementation Group a",OMIM:227650
6,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.894_1006del113 (p.Trp298C...,ClinVarVariant:3442,pathogenic,GENO:0000840,"Fanconi Anemia, Complementation Group a",OMIM:227650
7,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.295C>T (p.Gln99Ter),ClinVarVariant:370361,pathogenic,GENO:0000840,"Fanconi Anemia, Complementation Group a",OMIM:227650
8,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.811C>T (p.Gln271Ter),ClinVarVariant:371093,likely pathogenic,GENO:0000841,"Fanconi Anemia, Complementation Group a",OMIM:227650
9,FANCA,NCBIGene:2175,NM_000135.2(FANCA):c.154C>T (p.Arg52Ter),ClinVarVariant:371668,pathogenic,GENO:0000840,"Fanconi Anemia, Complementation Group a",OMIM:227650
