#### Find genes associated with Ehlers-Danlos syndrome type III via phenotypic similarity

Currently EDS III is associated with two genes 
TNXB - https://www.ncbi.nlm.nih.gov/clinvar/variation/217018/
COL3A1 - https://www.omim.org/entry/130020

In this notebook we explore finding model organism genes associated with similar phenotypes to those found in EDS III

In [10]:
# Get phenotypes associated with EDS III
import copy
import requests

SOLR_URL = 'https://solr.monarchinitiative.org/solr/golr/select'

disease = 'MONDO:0007523'

def get_solr_results(solr, params):
    solr_params = copy.deepcopy(params)
    resultCount = solr_params['rows']
    while solr_params['start'] < resultCount:
        solr_request = requests.get(solr, params=solr_params)
        response = solr_request.json()
        resultCount = response['response']['numFound']
        solr_params['start'] += solr_params['rows']
        for doc in response['response']['docs']:
            yield doc

def get_direct_phenotypes(entity):
    phenotype_list = []
    params = {
        'wt': 'json',
        'rows': 100,
        'start': 0,
        'q': '*:*',
        'fl': 'object',
        'fq': ['subject:"{0}"'.format(entity),
               'object_category:"phenotype"']
    }

    for doc in get_solr_results(SOLR_URL, params):
        phenotype_list.append(doc['object'])

    return phenotype_list


phenotypes = get_direct_phenotypes(disease)
# Expected: 56
len(phenotypes)

56

In [31]:
# Get owlsim results for mouse, zebrafish, fly, worm
OWLSIM = 'https://beta.monarchinitiative.org/owlsim/'

search_url = OWLSIM + "searchByAttributeSet"

sim_req = requests.get(search_url, params={'a':phenotypes, 'target':'MGI'})
results = sim_req.json()

mouse_genes = [[res['j']['id'], res['j']['label'], res['combinedScore']] for res in results['results']]

mouse_genes[0:5]

[['MGI:95489', 'Fbn1', 68],
 ['MGI:98726', 'Tgfb2', 62],
 ['MGI:88190', 'Braf', 60],
 ['MGI:1195272', 'Eda', 59],
 ['MGI:95586', 'Fst', 59]]

In [33]:
# get rest of orgs

def get_eds_sim(target):
    sim_req = requests.get(search_url, params={'a':phenotypes, 'target':target}) 
    results = sim_req.json()
    return [[res['j']['id'], res['j']['label'], res['combinedScore']] for res in results['results']]

human_genes = get_eds_sim('HGNC')
zebrafish_genes = get_eds_sim('ZFIN')
worm_genes = get_eds_sim('WormBase')
fly_genes = get_eds_sim('FlyBase')

# See if everything has content
print('human genes: {}'.format(len(human_genes)))
print('mouse genes: {}'.format(len(mouse_genes)))
print('zebrafish genes: {}'.format(len(zebrafish_genes)))
print('worm genes: {}'.format(len(worm_genes)))
print('fly genes: {}'.format(len(fly_genes)))


human genes: 153
mouse genes: 1000
zebrafish genes: 1000
worm genes: 281
fly genes: 175


In [43]:
# For the non human list get orthologs

human_fh = open('eds-human.tsv', 'w')
human_fh.write("\t".join(['id','symbol','score']) + "\n")

for row in human_genes[0:100]:
    human_fh.write("{}\t{}\t{}\n".format(row[0], row[1], row[2]))


In [52]:
# Generate ortholog files

def get_human_ortholog(solr, gene):
    params = {
        'wt': 'json',
        'rows': 100,
        'start': 0,
        'q': '*:*',
        'fl': 'subject, subject_label,'
              'object, object_label',
        'fq': ['subject_closure: "{0}"'.format(gene),
               'relation_closure: "RO:HOM0000017"',
               'object_taxon: "NCBITaxon:9606"'
        ]
    }
    for doc in get_solr_results(solr, params):
        yield doc
        

mouse_fh = open('eds-mouse.tsv', 'w')
zfin_fh = open('eds-zebrafish.tsv', 'w')
worm_fh = open('eds-worm.tsv', 'w')
fly_fh = open('eds-fly.tsv', 'w')

def add_ortho_to_sim(owlsim_results, fh):
    mod_header = "\t".join(['id','symbol','score','orthlog_id', 'ortholog_symbol']) + "\n"
    fh.write(mod_header)
    for row in owlsim_results[0:100]:
        for ortholog in get_human_ortholog(SOLR_URL, row[0]):
            fh.write("{}\t{}\t{}\t{}\t{}\n".format(
                ortholog['object'],
                ortholog['object_label'],
                row[2],
                row[0],
                row[1]
            ))

add_ortho_to_sim(mouse_genes, mouse_fh)
add_ortho_to_sim(zebrafish_genes, zfin_fh)
add_ortho_to_sim(worm_genes, worm_fh)
add_ortho_to_sim(fly_genes, fly_fh)

mouse_fh.close()
zfin_fh.close()
worm_fh.close()
fly_fh.close()
human_fh.close()