In [1]:
# further workup of owlsim-fa-1.7 orthologs showing the details of the tgfbeta match

In [2]:
import requests
from itertools import chain
import pandas as pd
import json
with open("gene_genes_1.7_orthologs.json") as f:
    gene_genes = json.load(f)

In [3]:
n = len({k for k,v in gene_genes.items() if "MGI:98726" in [x[0] for x in v]})
print("mouse tgfb2 is above the phenotype similarity cutoff for {} of the 27 FA genes".format(n))

mouse tgfb2 is above the phenotype similarity cutoff for 21 of the 27 FA genes


In [4]:
# these are the FA genes that share similarity with mouse tgfb
tgf = {k:[y for y in v if y[0] == "MGI:98726"] for k,v in gene_genes.items()}
tgf

{'NCBIGene:10459': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:199990': [],
 'NCBIGene:201254': [],
 'NCBIGene:2072': [['MGI:98726', 56, 'Tgfb2']],
 'NCBIGene:2175': [['MGI:98726', 62, 'Tgfb2']],
 'NCBIGene:2176': [['MGI:98726', 61, 'Tgfb2']],
 'NCBIGene:2177': [['MGI:98726', 62, 'Tgfb2']],
 'NCBIGene:2178': [['MGI:98726', 61, 'Tgfb2']],
 'NCBIGene:2187': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:2188': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:2189': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:29089': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:378708': [],
 'NCBIGene:55120': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:55159': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:55215': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:57697': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:5888': [['MGI:98726', 62, 'Tgfb2']],
 'NCBIGene:5889': [['MGI:98726', 61, 'Tgfb2']],
 'NCBIGene:672': [],
 'NCBIGene:675': [['MGI:98726', 57, 'Tgfb2']],
 'NCBIGene:7516': [['MGI:98726', 63, 'Tgfb2']],
 'NCBIGene:79728': [['MGI:98726', 61, 

In [5]:
MONARCH_API = "https://api.monarchinitiative.org/api"
SIMSEARCH_API = "https://monarchinitiative.org/simsearch/phenotype"
def get_phenotype_from_gene_verbose(gene):
    # https://monarchinitiative.org/gene/NCBIGene%3A2176/phenotype_list.json
    url = "https://monarchinitiative.org/gene/{}/phenotype_list.json"
    return [(x['id'],x['label']) for x in requests.get(url.format(gene)).json()['phenotype_list']]
def get_phenotypically_similar_genes(phenotypes, taxon, return_all=False):
    headers = {
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-US,en;q=0.8',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
    }
    data = {'input_items': " ".join(phenotypes), "target_species": taxon}
    r = requests.post(SIMSEARCH_API, data=data, headers=headers)
    d = r.json()
    if return_all:
        return d
    if 'b' in d:
        scores = [(x['id'],x['score']['score'], x['label']) for x in d['b']]
    else:
        scores = []
    return scores

In [6]:
# get the single most phenotypically similar FA gene
# theres a bunch with the same highest score..... pick one... NCBIGene:2187 FANCB
phenotypes = get_phenotype_from_gene_verbose("NCBIGene:2187")
phenotypes

[('HP:0000164', 'Abnormality of the dentition'),
 ('HP:0002860', 'Squamous cell carcinoma'),
 ('HP:0004859', 'Amegakaryocytic thrombocytopenia'),
 ('HP:0000639', 'Nystagmus'),
 ('HP:0009892', 'Anotia'),
 ('HP:0000252', 'Microcephaly'),
 ('HP:0002827', 'Hip dislocation'),
 ('HP:0007400', 'Irregular hyperpigmentation'),
 ('HP:0011027', 'Abnormality of the fallopian tube'),
 ('HP:0000028', 'Cryptorchidism'),
 ('HP:0100587', 'Abnormality of the preputium'),
 ('HP:0001000', 'Abnormality of skin pigmentation'),
 ('HP:0000316', 'Hypertelorism'),
 ('HP:0012745', 'Short palpebral fissure'),
 ('HP:0000587', 'Abnormality of the optic nerve'),
 ('HP:0000824', 'Growth hormone deficiency'),
 ('HP:0100541', 'Femoral hernia'),
 ('HP:0002937', 'Hemivertebrae'),
 ('HP:0001763', 'Pes planus'),
 ('HP:0001639', 'Hypertrophic cardiomyopathy'),
 ('HP:0010469', 'Absent testis'),
 ('HP:0000369', 'Low-set ears'),
 ('HP:0001053', 'Hypopigmented skin patches'),
 ('HP:0000072', 'Hydroureter'),
 ('HP:0000175', 'Cle

In [7]:
d = get_phenotypically_similar_genes([x[0] for x in phenotypes], "10090", return_all=True)

In [8]:
# the details of the match between human FANCB and mouse tgfb
match = [x for x in d['b'] if x['id'] == "MGI:98726"][0]
print((match['id'],match['label']))

('MGI:98726', 'Tgfb2')


In [9]:
print("There are {} phenotype matches".format(len(match['matches'])))

There are 31 phenotype matches


In [10]:
# sort by IC score
matches = sorted(match['matches'], key=lambda x:x['lcs']['IC'], reverse=True)
matches[:2]

[{'a': {'IC': 9.222230105740707,
   'id': 'HP:0003022',
   'label': 'Hypoplasia of the ulna'},
  'b': {'IC': 9.222230105740707,
   'id': 'HP:0003022',
   'label': 'Hypoplasia of the ulna'},
  'lcs': {'IC': 9.222230105740707, 'id': 'MP:0004359', 'label': 'short ulna'}},
 {'a': {'IC': 9.407528756226537,
   'id': 'HP:0010650',
   'label': 'Hypoplasia of the premaxilla'},
  'b': {'IC': 12.516685989266875,
   'id': 'MP:0000430',
   'label': 'absent maxillary shelf'},
  'lcs': {'IC': 8.734293049024156,
   'id': 'UBERON:0002244PHENOTYPE',
   'label': 'premaxilla phenotype'}}]

In [11]:
# format nicely

pd.DataFrame([{'human phenotype': x['a']['label'],
  'mouse phenotype': x['b']['label'],
  'lcs': x['lcs']['label'],
  'score': x['lcs']['IC']} for x in matches])

Unnamed: 0,human phenotype,lcs,mouse phenotype,score
0,Hypoplasia of the ulna,short ulna,Hypoplasia of the ulna,9.22223
1,Hypoplasia of the premaxilla,premaxilla phenotype,absent maxillary shelf,8.734293
2,Renal agenesis,absent kidney,Renal agenesis,8.313661
3,Retrognathia,mandibular retrognathia,Retrognathia,8.286692
4,Microcornea,Microcornea,decreased corneal stroma thickness,8.076628
5,Spina bifida,spina bifida,spina bifida occulta,8.03448
6,Abnormal aortic valve morphology,Abnormal aortic valve morphology,patent aortic valve,7.934124
7,Tetralogy of Fallot,interventricular septum membranous part phenotype,Perimembranous ventricular septal defect,7.858723
8,Tracheoesophageal fistula,abnormal respiratory conducting tube morphology,dilated respiratory conducting tubes,7.819655
9,Hypospadias,male urethra phenotype,abnormal bulbourethral gland physiology,7.714192
