In [29]:
#setup

import urllib.request
import json 
import datetime
import pandas as pd
import numpy as np
import array
import matplotlib
import seaborn

from IPython.core.display import display, HTML

# The MONARCH mart bulk call to retrieve all gene annotations for all human genes.

Note: this call may give server errors from time to time, rerunning usually resolves.

In [4]:
print(datetime.datetime.now().time())

GO_data = []
with urllib.request.urlopen("https://api.monarchinitiative.org/api/mart/gene/function/NCBITaxon:9606") as url:
    GO_data.append(json.loads(url.read().decode()))

print(datetime.datetime.now().time())

14:43:50.148908
14:43:59.326184


In [40]:
GO_data[0][0]

{'objects': ['GO:0016020',
  'GO:0005811',
  'GO:0016192',
  'GO:0005794',
  'GO:0005515',
  'GO:0005768',
  'GO:0005737',
  'GO:0030133',
  'GO:0010008',
  'GO:0045296',
  'GO:0005829'],
 'relation': None,
 'subject': 'UniProtKB:O60664',
 'subject_label': 'PLIN3'}

# Retrieving the core, effector, associated, and all FA gene sets from github.

In [11]:
print(datetime.datetime.now().time())
FA_core = []
with urllib.request.urlopen("https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_1_core_complex.txt") as url:
    FA_core.append(url.read())
print(datetime.datetime.now().time())

print(datetime.datetime.now().time())
FA_effector = []
with urllib.request.urlopen("https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_2_effector_proteins.txt") as url:
    FA_effector.append(url.read())
print(datetime.datetime.now().time())

print(datetime.datetime.now().time())
FA_assoc = []
with urllib.request.urlopen("https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_3_associated_proteins.txt") as url:
    FA_assoc.append(url.read())
print(datetime.datetime.now().time())

print(datetime.datetime.now().time())
FA_all = []
with urllib.request.urlopen("https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_4_all_genes.txt") as url:
    FA_all.append(url.read())
print(datetime.datetime.now().time())

14:47:55.393451
14:47:55.693251
14:47:55.693410
14:47:55.990861
14:47:55.991013
14:47:56.293340
14:47:56.293490
14:47:56.583171


# Collecting GO annotations for core FA genes

In [43]:
FA_core_list= FA_core[0].splitlines()
#print( len(FA_core_list))

gene_GO_dict = dict()

for i in range(0, len(FA_core_list)):
    curlist = FA_core_list[i].split()
    curid = curlist[0].decode("utf-8") 
    print(curid)
    #for j in range(0, len(GO_data[0])):
        #print(GO_data[0][j]['subject_label'] )
        
    qurl = "https://api.monarchinitiative.org/api/bioentity/gene/"+curid+"/function/"
    print(qurl)
    with urllib.request.urlopen(qurl) as thisurl:
        getdata = json.loads(thisurl.read().decode())

        for j in range(0, len(getdata['associations'])):
            #print(getdata['associations'][j]['object']['id'])
            if curlist[1] not in gene_GO_dict:
                gene_GO_dict[curlist[1]] = getdata['associations'][j]['object']['label']
            else:
                curdata = gene_GO_dict[curlist[1]] 
                if curdata.find(getdata['associations'][j]['object']['label']) == -1:
                    gene_GO_dict[curlist[1]] = curdata+", "+getdata['associations'][j]['object']['label']
 


NCBIGene:2175
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2175/function/
NCBIGene:2187
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2187/function/
NCBIGene:2176
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2176/function/
NCBIGene:2178
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2178/function/
NCBIGene:2188
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2188/function/
NCBIGene:2189
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2189/function/
NCBIGene:55120
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:55120/function/
NCBIGene:57697
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:57697/function/
NCBIGene:2177
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2177/function/
NCBIGene:55215
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:55215/function/
NCBIGene:29089
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:29089/f

# Display GO annotations for core FA genes

In [45]:
my_html = '<table><thead><tr><th>Gene name</th><th>GO term</th></tr></thead><tbody>{}</tbody></table>'
rows = []

for k, v in gene_GO_dict.items():
    rows.append('<tr><td>{}</td><td>{}</td></tr>'.format(k.decode("utf-8") , v))
result = my_html.format(''.join(rows))
display(HTML(result))

Gene name,GO term
FANCA,"protein binding, nucleus, nucleoplasm"
FANCB,"protein binding, nucleoplasm, interstrand cross-link repair, Fanconi anaemia nuclear complex"
FANCC,"myeloid cell homeostasis, protein binding, nucleus, nucleoplasm, cytoplasm, cytosol, DNA repair, nucleotide-excision repair, protein complex assembly"
FANCE,"molecular_function, nucleus, nucleoplasm, interstrand cross-link repair, Fanconi anaemia nuclear complex"
FANCF,"ovarian follicle development, molecular_function, protein binding, nucleoplasm, spermatogenesis, biological_process, protein ubiquitination, interstrand cross-link repair"
FANCG,"cell cycle checkpoint, ovarian follicle development, damaged DNA binding, protein binding, nucleoplasm"
FANCL,"ubiquitin-protein transferase activity, protein binding, nuclear envelope, nucleoplasm"
FANCM,"resolution of meiotic recombination intermediates, DNA binding, chromatin binding, helicase activity, nuclease activity, protein binding, ATP binding, nucleoplasm"
FANCD2,"condensed chromosome, protein binding, nucleus, nucleoplasm"
FANCI,"DNA binding, protein binding, nucleoplasm, cytosol, mitotic G2 DNA damage checkpoint, membrane, positive regulation of protein ubiquitination"


# Collecting GO annotations for effector FA genes

In [46]:
FA_effector_list= FA_effector[0].splitlines()
#print( len(FA_core_list))

gene_GO_dict_effectors = dict()

for i in range(0, len(FA_effector_list)):
    curlist = FA_effector_list[i].split()
    curid = curlist[0].decode("utf-8") 
    print(curid)
        
    qurl = "https://api.monarchinitiative.org/api/bioentity/gene/"+curid+"/function/"
    print(qurl)
    with urllib.request.urlopen(qurl) as thisurl:
        getdata = json.loads(thisurl.read().decode())

        for j in range(0, len(getdata['associations'])):
            #print(getdata['associations'][j]['object']['id'])
            if curlist[1] not in gene_GO_dict:
                gene_GO_dict_effectors[curlist[1]] = getdata['associations'][j]['object']['label']
            else:
                curdata = gene_GO_dict_effectors[curlist[1]] 
                if curdata.find(getdata['associations'][j]['object']['label']) == -1:
                    gene_GO_dict_effectors[curlist[1]] = curdata+", "+getdata['associations'][j]['object']['label']


NCBIGene:675
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:675/function/
NCBIGene:83990
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:83990/function/
NCBIGene:79728
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:79728/function/
NCBIGene:5889
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:5889/function/
NCBIGene:84464
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:84464/function/
NCBIGene:2072
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:2072/function/
NCBIGene:5888
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:5888/function/
NCBIGene:672
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:672/function/
NCBIGene:10459
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:10459/function/
NCBIGene:7516
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:7516/function/
NCBIGene:55159
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:55159/fun

# Display GO annotations for effector FA genes

In [47]:
my_html = '<table><thead><tr><th>Gene name</th><th>GO term</th></tr></thead><tbody>{}</tbody></table>'
rows = []

for k, v in gene_GO_dict_effectors.items():
    rows.append('<tr><td>{}</td><td>{}</td></tr>'.format(k.decode("utf-8") , v))
result = my_html.format(''.join(rows))
display(HTML(result))

Gene name,GO term
BRCA2,protein binding
BRIP1,nucleoplasm
PALB2,protein binding
RAD51C,protein binding
SLX4,protein binding
ERCC4,protein binding
RAD51,chromatin binding
BRCA1,protein binding
MAD2L2,protein binding
XRCC2,protein binding


# Collecting GO annotations for associated FA genes

In [49]:
FA_associated_list= FA_assoc[0].splitlines()
#print( len(FA_core_list))

gene_GO_dict_associated = dict()

for i in range(0, len(FA_associated_list)):
    curlist = FA_associated_list[i].split()
    curid = curlist[0].decode("utf-8") 
    print(curid)
        
    qurl = "https://api.monarchinitiative.org/api/bioentity/gene/"+curid+"/function/"
    print(qurl)
    with urllib.request.urlopen(qurl) as thisurl:
        getdata = json.loads(thisurl.read().decode())

        for j in range(0, len(getdata['associations'])):
            #print(getdata['associations'][j]['object']['id'])
            if curlist[1] not in gene_GO_dict:
                gene_GO_dict_associated[curlist[1]] = getdata['associations'][j]['object']['label']
            else:
                curdata = gene_GO_dict_associated[curlist[1]] 
                if curdata.find(getdata['associations'][j]['object']['label']) == -1:
                    gene_GO_dict_associated[curlist[1]] = curdata+", "+getdata['associations'][j]['object']['label']


NCBIGene:80233
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:80233/function/
NCBIGene:91442
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:91442/function/
NCBIGene:199990
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:199990/function/
NCBIGene:378708
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:378708/function/
NCBIGene:201254
https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene:201254/function/


# Display GO annotations for associated FA genes

In [50]:
my_html = '<table><thead><tr><th>Gene name</th><th>GO term</th></tr></thead><tbody>{}</tbody></table>'
rows = []

for k, v in gene_GO_dict_associated.items():
    rows.append('<tr><td>{}</td><td>{}</td></tr>'.format(k.decode("utf-8") , v))
result = my_html.format(''.join(rows))
display(HTML(result))

Gene name,GO term
FAAP100,intermediate filament cytoskeleton
FAAP24,Fanconi anaemia nuclear complex
FAAP20,cellular response to DNA damage stimulus
CENPS,nucleoplasm
CENPX,nucleoplasm
