In [1]:
import requests

ensembl_server = 'http://rest.ensembl.org'

def do_req(server,service,*args, **kwargs):
    url_parameters = ''
    for arg in args:
        if arg:
            url_parameters += '/' + arg
    req = requests.get('%s/%s%s' % (server, service, url_parameters), params = kwargs,
                       headers={'Content-Type': 'application/json'})
    
    if not req.ok:
        req.raise_for_status()
    return req.json()

In [2]:
all_species = do_req(ensembl_server, 'info/species')
for sp in all_species['species']:
    print(sp['name'])

saccharomyces_cerevisiae
ciona_savignyi
myotis_lucifugus
taeniopygia_guttata
sorex_araneus
otolemur_garnettii
macropus_eugenii
erinaceus_europaeus
anolis_carolinensis
gadus_morhua
dasypus_novemcinctus
chlorocebus_sabaeus
tursiops_truncatus
mus_musculus
bos_taurus
monodelphis_domestica
choloepus_hoffmanni
sus_scrofa
rattus_norvegicus
caenorhabditis_elegans
pteropus_vampyrus
microcebus_murinus
sarcophilus_harrisii
ovis_aries
papio_anubis
pelodiscus_sinensis
equus_caballus
xiphophorus_maculatus
macaca_mulatta
astyanax_mexicanus
latimeria_chalumnae
ficedula_albicollis
gasterosteus_aculeatus
gorilla_gorilla
oryctolagus_cuniculus
oreochromis_niloticus
echinops_telfairi
nomascus_leucogenys
homo_sapiens
dipodomys_ordii
lepisosteus_oculatus
anas_platyrhynchos
canis_familiaris
callithrix_jacchus
pongo_abelii
ornithorhynchus_anatinus
tetraodon_nigroviridis
mustela_putorius_furo
tarsius_syrichta
vicugna_pacos
meleagris_gallopavo
xenopus_tropicalis
ictidomys_tridecemlineatus
cavia_porcellus
takifug

In [3]:
ext_dbs = do_req(ensembl_server, 'info/external_dbs', 'homo_sapiens', filter='HGNC%')

print(ext_dbs)

[{'display_name': 'HGNC Symbol', 'release': '1', 'name': 'HGNC', 'description': None}, {'display_name': 'HGNC (automatic)', 'release': '1', 'name': 'HGNC_automatic_gene', 'description': None}, {'display_name': 'HGNC (automatic)', 'release': '1', 'name': 'HGNC_automatic_transcript', 'description': None}, {'display_name': 'HGNC (curated)', 'release': '1', 'name': 'HGNC_curated_gene', 'description': None}, {'display_name': 'HGNC (curated)', 'release': '1', 'name': 'HGNC_curated_transcript', 'description': None}, {'display_name': 'HGNC transcript name', 'release': '1', 'name': 'HGNC_trans_name', 'description': 'transcript name from HGNC'}]


In [21]:
ensembl = do_req(ensembl_server, 'lookup/symbol', 'homo_sapiens', 'HMGCR')
print(ensembl)
hmgcr_id = ensembl['id']

{'id': 'ENSG00000113161', 'seq_region_name': '5', 'logic_name': 'ensembl_havana_gene', 'species': 'homo_sapiens', 'end': 75362104, 'strand': 1, 'description': '3-hydroxy-3-methylglutaryl-CoA reductase [Source:HGNC Symbol;Acc:HGNC:5006]', 'start': 75336329, 'object_type': 'Gene', 'assembly_name': 'GRCh38', 'biotype': 'protein_coding', 'source': 'ensembl_havana', 'version': 15, 'display_name': 'HMGCR', 'db_type': 'core'}


'ENSG00000113161'

In [5]:
hmgcr_seq = do_req(ensembl_server,'sequence/id', hmgcr_id)
cleaned_seq = hmgcr_seq['seq']

In [6]:
# from Bio import Seq
# from Bio.Alphabet import IUPAC
# seq = Seq.Seq(str(cleaned_seq), IUPAC.unambiguous_dna)
# seq = seq.transcribe()
# protein = seq.translate()
# protein

In [7]:
#Check other databases
hmg_xrefs = do_req(ensembl_server, 'xrefs/id',hmgcr_id)
for xref in hmg_xrefs:
    print(xref['db_display_name'])
    print(xref)

Vega gene
{'info_type': 'NONE', 'synonyms': [], 'display_id': 'OTTHUMG00000102069', 'info_text': 'Added during ensembl-vega production', 'primary_id': 'OTTHUMG00000102069', 'dbname': 'Vega_gene', 'version': '4', 'db_display_name': 'Vega gene', 'description': None}
Vega gene
{'info_type': 'NONE', 'synonyms': [], 'display_id': 'HMGCR', 'info_text': '', 'primary_id': 'OTTHUMG00000102069', 'dbname': 'Vega_gene', 'version': '1', 'db_display_name': 'Vega gene', 'description': None}
Havana gene
{'info_type': 'NONE', 'synonyms': [], 'display_id': 'OTTHUMG00000102069', 'info_text': '', 'primary_id': 'OTTHUMG00000102069', 'dbname': 'OTTG', 'version': '0', 'db_display_name': 'Havana gene', 'description': None}
Expression Atlas
{'info_type': 'DIRECT', 'synonyms': [], 'display_id': 'ENSG00000113161', 'info_text': '', 'primary_id': 'ENSG00000113161', 'dbname': 'ArrayExpress', 'version': '0', 'db_display_name': 'Expression Atlas', 'description': ''}
EntrezGene
{'info_type': 'DEPENDENT', 'synonyms': [

# Retrieve orthologues

In [8]:
hom_resp = do_req(ensembl_server, 'homology/id', hmgcr_id, type='orthologues', sequence='none')
homologies = hom_resp['data'][0]['homologies']
for homology in homologies:
    print(homology['target']['species'])
    if homology['target']['species'] != 'rattus_norvegicus':
        continue
    print(homology)
    print(homology['taxonomy_level'])
    rat_id = homology['target']['id']

pan_troglodytes
gorilla_gorilla
pongo_abelii
nomascus_leucogenys
macaca_mulatta
papio_anubis
chlorocebus_sabaeus
callithrix_jacchus
tarsius_syrichta
microcebus_murinus
otolemur_garnettii
rattus_norvegicus
{'type': 'ortholog_one2one', 'source': {'id': 'ENSG00000113161', 'perc_id': 93, 'perc_pos': 96, 'species': 'homo_sapiens', 'taxon_id': 9606, 'cigar_line': '888M', 'protein_id': 'ENSP00000287936'}, 'target': {'id': 'ENSRNOG00000016122', 'perc_id': 93, 'perc_pos': 96, 'species': 'rattus_norvegicus', 'taxon_id': 10116, 'cigar_line': '419MD468M', 'protein_id': 'ENSRNOP00000022055'}, 'method_link_type': 'ENSEMBL_ORTHOLOGUES', 'taxonomy_level': 'Euarchontoglires', 'dn_ds': 0.06787}
Euarchontoglires
dipodomys_ordii
ochotona_princeps
tupaia_belangeri
cavia_porcellus
mus_musculus
oryctolagus_cuniculus
ictidomys_tridecemlineatus
vicugna_pacos
sorex_araneus
felis_catus
equus_caballus
ovis_aries
bos_taurus
ailuropoda_melanoleuca
tursiops_truncatus
mustela_putorius_furo
erinaceus_europaeus
myotis_

In [12]:
rat_req = do_req(ensembl_server, 'lookup/id', rat_id)
print(rat_req)
# rat_id = rat_id.find('ENSO')
# rat_id = rat_id.replace('"', '')
# rat_id = rat_id[rat_id.find('ENSO'):]
# rat_id = rat_id[rat_id.find('ENSO'):]
# rat_id = str(rat_id)
rat_id

{'id': 'ENSRNOG00000016122', 'seq_region_name': '2', 'logic_name': 'ensembl', 'species': 'rattus_norvegicus', 'end': 27500654, 'strand': -1, 'description': '3-hydroxy-3-methylglutaryl-CoA reductase [Source:RGD Symbol;Acc:2803]', 'start': 27480226, 'object_type': 'Gene', 'assembly_name': 'Rnor_6.0', 'biotype': 'protein_coding', 'source': 'ensembl', 'version': 4, 'display_name': 'Hmgcr', 'db_type': 'core'}


'ENSRNOG00000016122'

In [22]:
refs = do_req(ensembl_server, 'xrefs/id',hmgcr_id, external_db='GO', all_levels='1')
print(len(refs))
print(refs[0].keys())
for ref in refs:
    go_id = ref['primary_id']
    details = do_req(ensembl_server, 'ontology/id', go_id)
    print('%s %s %s' % (go_id,  details['namespace'], ref['description']))
    print('%s\n' % details['definition'])

106
dict_keys(['info_type', 'synonyms', 'linkage_types', 'display_id', 'info_text', 'primary_id', 'dbname', 'description', 'db_display_name', 'version'])
GO:0004420 molecular_function hydroxymethylglutaryl-CoA reductase (NADPH) activity
"Catalysis of the reaction: (R)-mevalonate + CoA + 2 NADP(+) = (S)-3-hydroxy-3-methylglutaryl-CoA + 2 H(+) + 2 NADPH." [EC:1.1.1.34, RHEA:15992]

GO:0005515 molecular_function protein binding
"Interacting selectively and non-covalently with any protein or protein complex (a complex of two or more proteins that may include other nonprotein molecules)." [GOC:go_curators]

GO:0005622 cellular_component intracellular
"The living contents of a cell; the matter contained within (but not including) the plasma membrane, usually taken to exclude large vacuoles and masses of secretory or ingested material. In eukaryotes it includes the nucleus and cytoplasm." [ISBN:0198506732]

GO:0005778 cellular_component peroxisomal membrane
"The lipid bilayer surrounding a pe

In [25]:
go_id = 'GO:0006695'
my_data = do_req(ensembl_server, 'ontology/id',go_id)
for k,v in my_data.items():
    if k=='parents':
        for parent in v:
            print(parent)
            parent_id = parent['accession']
    else:
        print('%s: %s' % (k, str(v)))
parent_data = do_req(ensembl_server, 'ontology/id',parent_id)
print(parent_data, len(parent_data['children']))

{'definition': '"The chemical reactions and pathways involving cholesterol, cholest-5-en-3 beta-ol, the principal sterol of vertebrates and the precursor of many steroids, including bile acids and steroid hormones. It is a component of the plasma membrane lipid bilayer and of plasma lipoproteins and can be found in all animal tissues." [ISBN:0198506732]', 'namespace': 'biological_process', 'accession': 'GO:0008203', 'synonyms': ['"cholesterol metabolism" []'], 'name': 'cholesterol metabolic process', 'ontology': 'GO', 'subsets': ['gosubset_prok']}
{'definition': '"The chemical reactions and pathways resulting in the formation of sterols, steroids with one or more hydroxyl groups and a hydrocarbon side-chain in the molecule." [GOC:go_curators]', 'namespace': 'biological_process', 'accession': 'GO:0016126', 'synonyms': ['"sterol anabolism" []', '"sterol biosynthesis" []', '"sterol formation" []', '"sterol synthesis" []'], 'name': 'sterol biosynthetic process', 'ontology': 'GO', 'subsets'

In [26]:
refs = do_req(ensembl_server, 'ontology/ancestors/chart', go_id)
for go, entry in refs.items():
    print(go)
    term = entry['term']
    print('%s %s' % (term['name'], term['definition']))
    is_a = entry.get('is_a', [])
    print('\t is a: %s\n' % ', '.join([x['accession'] for x in is_a]))

GO:0008203
cholesterol metabolic process "The chemical reactions and pathways involving cholesterol, cholest-5-en-3 beta-ol, the principal sterol of vertebrates and the precursor of many steroids, including bile acids and steroid hormones. It is a component of the plasma membrane lipid bilayer and of plasma lipoproteins and can be found in all animal tissues." [ISBN:0198506732]
	 is a: GO:1902652, GO:0016125

GO:1901360
organic cyclic compound metabolic process "The chemical reactions and pathways involving organic cyclic compound." [GOC:TermGenie]
	 is a: GO:0071704

GO:0016126
sterol biosynthetic process "The chemical reactions and pathways resulting in the formation of sterols, steroids with one or more hydroxyl groups and a hydrocarbon side-chain in the molecule." [GOC:go_curators]
	 is a: GO:0006694, GO:0046165, GO:0016125

GO:1901617
organic hydroxy compound biosynthetic process "The chemical reactions and pathways resulting in the formation of organic hydroxy compound." [GOC:pr,

In [30]:
def get_upper(go_id):
    parents = {}
    node_data = {}
    refs = do_req(ensembl_server, 'ontology/ancestors/chart', go_id)
    for ref, entry in refs.items():
        my_data = do_req(ensembl_server, 'ontology/id', ref)
        node_data[ref] = {'name': entry['term']['name'], 'children': my_data['children']}
        try:
            parents[ref] = [x['accession'] for x in entry['is_a']]
        except KeyError:
            pass  # Top of hierarchy
    return parents, node_data

In [38]:
parents, node_data = get_upper(go_id)
import pygraphviz as pgv
g = pgv.AGraph(directed=True)
# help(pgv.AGraph)
for ofs, ofs_parents in parents.items():
    ofs_text = '%s\n(%s)' % (node_data[ofs]['name'].replace(', ', '\n'), ofs)
    for parent in ofs_parents:
        parent_text = '%s\n(%s)' % (node_data[parent]['name'].replace(', ', '\n'), parent)
        children = node_data[parent]['children']
        if len(children) < 3:
            for child in children:
                if child['accession'] in node_data:
                    continue
                g.add_edge(parent_text, child['accession'])
        else:
            g.add_edge(parent_text, '...%d...' % (len(children) - 1))
        g.add_edge(parent_text, ofs_text)
# print(g)
g.graph_attr['label']='Ontology tree for HMG activity'
g.node_attr['shape']='rectangle'
g.layout(prog='dot')
g.draw('graph.png')

ValueError: Program dot not found in path.

In [None]:
from Ipython.core.display import Image