## Orange Team CQ#1.7

### Query: 
What genes show high phenotypic similarity to the 11 Fanconi Anemia core complex genes (set FA-core)?

### Services:
BioLink API (Monarch) - https://api.monarchinitiative.org/api/
Simsearch - https://monarchinitiative.org/simsearch

### Approach:

### Author
Gstupp

Method:
Take all 27 human FA genes. For each gene, retrieve the phenotypically similar genes from mouse, zebrafish, worm, and fly. The output is a list of genes and a score. Sum of the scores for each of the genes, across all 27 FA genes. Take the top X (10) phenotypically similar genes from each organism, and then get the human orthologs of those genes. 

In [1]:
# autogenerate biolink_client
# curl --insecure -X POST -H "content-type:application/json" -d '{"swaggerUrl":"https://api.monarchinitiative.org/api/swagger.json"}' https://generator.swagger.io/api/gen/clients/python
# and rename it to biolink_client

In [2]:
import os, sys
# change this path
sys.path.insert(0, "/home/gstupp/projects/NCATS-Tangerine/biolink_client")

In [19]:
import biolink_client
from biolink_client.api_client import ApiClient
from biolink_client.rest import ApiException
import requests
from itertools import chain
import pandas as pd
from pprint import pprint
from tqdm import tqdm, tqdm_notebook
from collections import defaultdict

pd.options.display.max_rows = 999

MONARCH_API = "https://api.monarchinitiative.org/api"
SIMSEARCH_API = "https://monarchinitiative.org/simsearch/phenotype"

gene_list = "https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_4_all_genes.txt"

client = ApiClient(host=MONARCH_API)
client.set_default_header('Content-Type', 'text/plain')
api_instance = biolink_client.BioentityApi(client)

# Get the gene list from github
dataframe = pd.read_csv(gene_list, sep='\t', names=['gene_id', 'symbol'])
df = dataframe.set_index('symbol')
human_genes = set(df.gene_id)

In [32]:
taxids = [10090, 7955, 7227, 6239]
prefixes = ['MGI', 'ZFIN', 'WormBase', 'FlyBase']

In [5]:
def get_obj(obj_id):
    url = "https://api.monarchinitiative.org/api/bioentity/{}".format(obj_id)
    res = requests.get(url)
    d = res.json()
    return d
def get_taxon_from_gene(gene):
    return get_obj(gene)['taxon']['label']
get_taxon_from_gene('NCBIGene:2176')

'Homo sapiens'

In [None]:
def query_orthologs(gene_id, taxon=None):
    """Query Monarch to determine the orthologs of a gene."""
    url = "https://api.monarchinitiative.org/api/bioentity/gene/{}/homologs/".format(gene_id)
    if taxon:
        res = requests.get(url, params={'homolog_taxon': taxon})
    else:        
        res = requests.get(url)
    d = res.json()
    return [x['object']['id'] for x in d['associations']]
#query_orthologs('MGI:88276', taxon="NCBITaxon:9606")

In [68]:
def get_phenotype_from_gene(gene):
    # https://monarchinitiative.org/gene/NCBIGene%3A2176/phenotype_list.json
    url = "https://monarchinitiative.org/gene/{}/phenotype_list.json"
    return [x['id'] for x in requests.get(url.format(gene)).json()['phenotype_list']]
def get_phenotype_from_gene_verbose(gene):
    # https://monarchinitiative.org/gene/NCBIGene%3A2176/phenotype_list.json
    url = "https://monarchinitiative.org/gene/{}/phenotype_list.json"
    return [(x['id'],x['label']) for x in requests.get(url.format(gene)).json()['phenotype_list']]
#get_phenotype_from_gene("NCBIGene:2176")

In [82]:
def get_phenotypically_similar_genes(phenotypes, taxon, return_all=False):
    headers = {
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-US,en;q=0.8',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
    }
    data = {'input_items': " ".join(phenotypes), "target_species": taxon}
    r = requests.post(SIMSEARCH_API, data=data, headers=headers)
    d = r.json()
    if return_all:
        return d
    if 'b' in d:
        scores = [(x['id'],x['score']['score'], x['label']) for x in d['b']]
    else:
        scores = []
    return scores
#get_phenotypically_similar_genes(phenotypes, "10090")

In [21]:
# human_genes = ["NCBIGene:2176"]
gene_genes = defaultdict(list)
for taxid in tqdm_notebook(taxids):
    for gene in tqdm_notebook(human_genes, leave=False):
        phenotypes = get_phenotype_from_gene(gene)
        gene_genes[gene].extend(get_phenotypically_similar_genes(phenotypes, taxid))

The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.





In [98]:
s = defaultdict(int)
gene_label = dict()
for human_gene, ortho_genes in gene_genes.items():
    for ortho_gene, score, label in ortho_genes:
        gene_label[ortho_gene] = label
        s[ortho_gene] += score

In [101]:
top10 = dict()
for prefix in prefixes:
    ss = {k:v for k,v in s.items() if k.startswith(prefix)}
    top10[prefix] = sorted(ss.items(), key=lambda x:x[1], reverse=True)[:10]
ss = list(chain(*top10.values()))
ss = [{'gene': s[0], 'score': s[1]} for s in ss]
ss

[{'gene': 'MGI:88276', 'score': 1359},
 {'gene': 'MGI:1347466', 'score': 1341},
 {'gene': 'MGI:88039', 'score': 1332},
 {'gene': 'MGI:99851', 'score': 1321},
 {'gene': 'MGI:105373', 'score': 1314},
 {'gene': 'MGI:88064', 'score': 1310},
 {'gene': 'MGI:95729', 'score': 1308},
 {'gene': 'MGI:1330810', 'score': 1301},
 {'gene': 'MGI:98726', 'score': 1286},
 {'gene': 'MGI:88180', 'score': 1282},
 {'gene': 'WormBase:WBGene00010556', 'score': 747},
 {'gene': 'WormBase:WBGene00004077', 'score': 731},
 {'gene': 'WormBase:WBGene00016837', 'score': 726},
 {'gene': 'WormBase:WBGene00004392', 'score': 705},
 {'gene': 'WormBase:WBGene00003829', 'score': 701},
 {'gene': 'WormBase:WBGene00004391', 'score': 699},
 {'gene': 'WormBase:WBGene00000405', 'score': 693},
 {'gene': 'WormBase:WBGene00000871', 'score': 689},
 {'gene': 'WormBase:WBGene00001005', 'score': 689},
 {'gene': 'WormBase:WBGene00004208', 'score': 677},
 {'gene': 'ZFIN:ZDB-GENE-031114-4', 'score': 1185},
 {'gene': 'ZFIN:ZDB-GENE-040827-2

In [102]:
for s in tqdm_notebook(ss):
    s['orthologs'] = query_orthologs(s['gene'], "NCBITaxon:9606")
ss

The installed widget Javascript is the wrong version.





[{'gene': 'MGI:88276', 'orthologs': ['NCBIGene:1499'], 'score': 1359},
 {'gene': 'MGI:1347466', 'orthologs': ['NCBIGene:2296'], 'score': 1341},
 {'gene': 'MGI:88039', 'orthologs': ['NCBIGene:324'], 'score': 1332},
 {'gene': 'MGI:99851', 'orthologs': ['NCBIGene:865'], 'score': 1321},
 {'gene': 'MGI:105373', 'orthologs': ['NCBIGene:5727'], 'score': 1314},
 {'gene': 'MGI:88064', 'orthologs': ['NCBIGene:367'], 'score': 1310},
 {'gene': 'MGI:95729', 'orthologs': ['NCBIGene:2737'], 'score': 1308},
 {'gene': 'MGI:1330810', 'orthologs': ['NCBIGene:8626'], 'score': 1301},
 {'gene': 'MGI:98726', 'orthologs': ['NCBIGene:7042'], 'score': 1286},
 {'gene': 'MGI:88180', 'orthologs': ['NCBIGene:652'], 'score': 1282},
 {'gene': 'WormBase:WBGene00010556',
  'orthologs': ['NCBIGene:10399'],
  'score': 747},
 {'gene': 'WormBase:WBGene00004077',
  'orthologs': ['NCBIGene:6932',
   'NCBIGene:83439',
   'NCBIGene:6934',
   'NCBIGene:51176'],
  'score': 731},
 {'gene': 'WormBase:WBGene00016837',
  'orthologs'

In [109]:
for s in tqdm_notebook(ss):
    s['label'] = get_obj(s['gene'])['label']
    s['ortholog_labels'] = [get_obj(x)['label'] for x in s['orthologs']]

The installed widget Javascript is the wrong version.





## This is the output!!!

In [112]:
sorted(ss, key=lambda x: x['score'], reverse=True)

[{'gene': 'MGI:88276',
  'label': 'Ctnnb1',
  'ortholog_labels': ['CTNNB1'],
  'orthologs': ['NCBIGene:1499'],
  'score': 1359},
 {'gene': 'MGI:1347466',
  'label': 'Foxc1',
  'ortholog_labels': ['FOXC1'],
  'orthologs': ['NCBIGene:2296'],
  'score': 1341},
 {'gene': 'MGI:88039',
  'label': 'Apc',
  'ortholog_labels': ['APC'],
  'orthologs': ['NCBIGene:324'],
  'score': 1332},
 {'gene': 'MGI:99851',
  'label': 'Cbfb',
  'ortholog_labels': ['CBFB'],
  'orthologs': ['NCBIGene:865'],
  'score': 1321},
 {'gene': 'MGI:105373',
  'label': 'Ptch1',
  'ortholog_labels': ['PTCH1'],
  'orthologs': ['NCBIGene:5727'],
  'score': 1314},
 {'gene': 'MGI:88064',
  'label': 'Ar',
  'ortholog_labels': ['AR'],
  'orthologs': ['NCBIGene:367'],
  'score': 1310},
 {'gene': 'MGI:95729',
  'label': 'Gli3',
  'ortholog_labels': ['GLI3'],
  'orthologs': ['NCBIGene:2737'],
  'score': 1308},
 {'gene': 'MGI:1330810',
  'label': 'Trp63',
  'ortholog_labels': ['TP63'],
  'orthologs': ['NCBIGene:8626'],
  'score': 13

### Demo with one gene

In [69]:
## FANCC
phenotypes = get_phenotype_from_gene_verbose("NCBIGene:2176")
phenotypes

[('HP:0000601', 'Hypotelorism'),
 ('HP:0008572', 'External ear malformation'),
 ('HP:0011133', 'Increased sensitivity to ionizing radiation'),
 ('HP:0005584', 'Renal cell carcinoma'),
 ('HP:0001000', 'Abnormality of skin pigmentation'),
 ('HP:0001249', 'Intellectual disability'),
 ('HP:0001347', 'Hyperreflexia'),
 ('HP:0000978', 'Bruising susceptibility'),
 ('HP:0001562', 'Oligohydramnios'),
 ('HP:0008064', 'Ichthyosis'),
 ('HP:0001631', 'Atrial septal defect'),
 ('HP:0007874', 'Almond-shaped palpebral fissure'),
 ('HP:0002827', 'Hip dislocation'),
 ('HP:0006740', 'Transitional cell carcinoma of the bladder'),
 ('HP:0000218', 'High palate'),
 ('HP:0004209', 'Clinodactyly of the 5th finger'),
 ('HP:0000316', 'Hypertelorism'),
 ('HP:0000453', 'Choanal atresia'),
 ('HP:0100587', 'Abnormality of the preputium'),
 ('HP:0002245', 'Meckel diverticulum'),
 ('HP:0000463', 'Anteverted nares'),
 ('HP:0004820', 'Acute myelomonocytic leukemia'),
 ('HP:0000568', 'Microphthalmia'),
 ('HP:0001639', 'H

In [84]:
d = get_phenotypically_similar_genes([x[0] for x in phenotypes], "10090", return_all=True)

In [87]:
match = d['b'][0]
(match['id'],match['label'])

('MGI:95729', 'Gli3')

In [91]:
match['matches'][:2]

[{'a': {'IC': 8.248732127568665,
   'id': 'HP:0100615',
   'label': 'Ovarian neoplasm'},
  'b': {'IC': 12.741287225631801,
   'id': 'MP:0011727',
   'label': 'ectopic ovary'},
  'lcs': {'IC': 6.461063581698676,
   'id': 'MP:0001126',
   'label': 'abnormal ovary morphology'}},
 {'a': {'IC': 8.751303137059177,
   'id': 'HP:0000978',
   'label': 'Bruising susceptibility'},
  'b': {'IC': 5.125135097538753,
   'id': 'HP:0001892',
   'label': 'Abnormal bleeding'},
  'lcs': {'IC': 5.125135097538753, 'id': 'MP:0001914', 'label': 'hemorrhage'}}]

In [89]:
human_orthologs = query_orthologs(match['id'], taxon="NCBITaxon:9606")
human_orthologs

['NCBIGene:2737']

In [108]:
for human_gene, pgenes in gene_genes.items():
    pgenes = [x for x in pgenes if "MGI:88276" == x[0]]
    print(human_gene, pgenes)

NCBIGene:5888 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:80233 []
NCBIGene:29089 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:55215 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:83990 [('MGI:88276', 64, 'Ctnnb1')]
NCBIGene:2176 [('MGI:88276', 66, 'Ctnnb1')]
NCBIGene:10459 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:57697 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:7516 [('MGI:88276', 64, 'Ctnnb1')]
NCBIGene:5889 [('MGI:88276', 64, 'Ctnnb1')]
NCBIGene:199990 []
NCBIGene:84464 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:55120 [('MGI:88276', 66, 'Ctnnb1')]
NCBIGene:201254 []
NCBIGene:675 [('MGI:88276', 62, 'Ctnnb1')]
NCBIGene:378708 []
NCBIGene:55159 []
NCBIGene:91442 []
NCBIGene:2175 [('MGI:88276', 66, 'Ctnnb1')]
NCBIGene:2072 [('MGI:88276', 63, 'Ctnnb1')]
NCBIGene:2188 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:2178 [('MGI:88276', 66, 'Ctnnb1')]
NCBIGene:2189 [('MGI:88276', 65, 'Ctnnb1')]
NCBIGene:79728 [('MGI:88276', 64, 'Ctnnb1')]
NCBIGene:672 [('MGI:88276', 60, 'Ctnnb1')]
NCBIGene:2177 [('MGI:88276', 66, 'Ctnnb1')]
NCB