# ensembl module

## client

In [15]:
# ensembl.py

import sys
import json
import time
import requests
from urllib.parse import urlencode


class EnsemblRestClient(object):
    '''
    Adapted from https://github.com/Ensembl/ensembl-rest/wiki/Example-Python-Client
    
    # Example usage
    species = "homo_sapiens"
    symbol = "BRCA2"
    run(species, symbol)
    '''
    def __init__(self, server='http://rest.ensembl.org', reqs_per_sec=15):
        self.server = server
        self.reqs_per_sec = reqs_per_sec
        self.req_count = 0
        self.last_req = 0

    def perform_rest_action(self, endpoint, hdrs=None, params=None):
        if hdrs is None:
            hdrs = {}

        if 'Content-Type' not in hdrs:
            hdrs['Content-Type'] = 'application/json'

        if params:
            endpoint += '?' + urlencode(params)

        data = None

        # check if we need to rate limit ourselves
        if self.req_count >= self.reqs_per_sec:
            delta = time.time() - self.last_req
            if delta < 1:
                time.sleep(1 - delta)
            self.last_req = time.time()
            self.req_count = 0

        try:
            response = requests.get(self.server + endpoint, headers=hdrs)
            response.raise_for_status()
            content = response.content
            if content:
                data = json.loads(content)
            self.req_count += 1

        except requests.exceptions.HTTPError as e:
            # check if we are being rate limited by the server
            if e.response.status_code == 429:
                if 'Retry-After' in e.response.headers:
                    retry = e.response.headers['Retry-After']
                    time.sleep(float(retry))
                    self.perform_rest_action(endpoint, hdrs, params)
            else:
                sys.stderr.write(
                    'Request failed for {0}: '
                    'Status code: {1.response.status_code} Reason: {1.response.reason}\n'
                    .format(endpoint, e)
                )

        return data

    def get_variants(self, species, symbol):
        genes = self.perform_rest_action(
            endpoint='/xrefs/symbol/{0}/{1}'.format(species, symbol),
            params={'object_type': 'gene'}
        )
        if genes:
            stable_id = genes[0]['id']
            variants = self.perform_rest_action(
                '/overlap/id/{0}'.format(stable_id),
                params={'feature': 'variation'}
            )
            return variants
        return None

class EnsemblAPI:
    def __init__(self):
        self.client = EnsemblRestClient()

    def get_variants(self, species, symbol):
        variants = self.client.get_variants(species, symbol)
        if variants:
            formatted_variants = []
            for v in variants:
                formatted_variants.append(
                    '{seq_region_name}:{start}-{end}:{strand} ==> '
                    '{id} ({consequence_type})'
                    .format(**v)
                )
            return formatted_variants
        else:
            return None

In [16]:
# main.py

# from ensembl import EnsemblAPI

api = EnsemblAPI()
species = "homo_sapiens"
symbol = "BRCA2"
variants = api.get_variants(species, symbol)

if variants:
    for i, variant in enumerate(variants):
        print(variant)

13:32315090-32315093:1 ==> rs1252827512 (5_prime_UTR_variant)
13:32315093-32315093:1 ==> rs1593879206 (5_prime_UTR_variant)
13:32315094-32315094:1 ==> rs1593879211 (5_prime_UTR_variant)
13:32315101-32315101:1 ==> rs768038550 (5_prime_UTR_variant)
13:32315106-32315106:1 ==> rs1045400508 (5_prime_UTR_variant)
13:32315109-32315109:1 ==> rs927273467 (5_prime_UTR_variant)
13:32315111-32315111:1 ==> rs1294449372 (5_prime_UTR_variant)
13:32315134-32315134:1 ==> rs750992637 (5_prime_UTR_variant)
13:32315145-32315145:1 ==> rs1246133691 (splice_region_variant)
13:32315146-32315146:1 ==> rs1478582284 (splice_donor_variant)
13:32315160-32315160:1 ==> rs1170598691 (intron_variant)
13:32315175-32315175:1 ==> rs937348137 (intron_variant)
13:32315176-32315176:1 ==> rs1305618584 (intron_variant)
13:32315182-32315182:1 ==> rs543736093 (intron_variant)
13:32315192-32315192:1 ==> rs1221594928 (intron_variant)
13:32315204-32315204:1 ==> rs1374960313 (intron_variant)
13:32315205-32315205:1 ==> rs1460434907 

In [5]:
import sys
import json
import time
import requests
from urllib.parse import urlencode


class EnsemblRestClient(object):
    '''
    Adapted from https://github.com/Ensembl/ensembl-rest/wiki/Example-Python-Client
    
    # Example usage
    species = "homo_sapiens"
    symbol = "BRCA2"
    run(species, symbol)
    '''
    def __init__(self, server='http://rest.ensembl.org', reqs_per_sec=15):
        self.server = server
        self.reqs_per_sec = reqs_per_sec
        self.req_count = 0
        self.last_req = 0

    def perform_rest_action(self, endpoint, hdrs=None, params=None):
        if hdrs is None:
            hdrs = {}

        if 'Content-Type' not in hdrs:
            hdrs['Content-Type'] = 'application/json'

        if params:
            endpoint += '?' + urlencode(params)

        data = None

        # check if we need to rate limit ourselves
        if self.req_count >= self.reqs_per_sec:
            delta = time.time() - self.last_req
            if delta < 1:
                time.sleep(1 - delta)
            self.last_req = time.time()
            self.req_count = 0

        try:
            response = requests.get(self.server + endpoint, headers=hdrs)
            response.raise_for_status()
            content = response.content
            if content:
                data = json.loads(content)
            self.req_count += 1

        except requests.exceptions.HTTPError as e:
            # check if we are being rate limited by the server
            if e.response.status_code == 429:
                if 'Retry-After' in e.response.headers:
                    retry = e.response.headers['Retry-After']
                    time.sleep(float(retry))
                    self.perform_rest_action(endpoint, hdrs, params)
            else:
                sys.stderr.write('Request failed for {0}: Status code: {1.response.status_code} Reason: {1.response.reason}\n'.format(endpoint, e))

        return data

    def get_variants(self, species, symbol):
        genes = self.perform_rest_action(
            endpoint='/xrefs/symbol/{0}/{1}'.format(species, symbol),
            params={'object_type': 'gene'}
        )
        if genes:
            stable_id = genes[0]['id']
            variants = self.perform_rest_action(
                '/overlap/id/{0}'.format(stable_id),
                params={'feature': 'variation'}
            )
            return variants
        return None

def run(species, symbol):
    client = EnsemblRestClient()
    variants = client.get_variants(species, symbol)
    if variants:
        for v in variants:
            print('{seq_region_name}:{start}-{end}:{strand} ==> {id} ({consequence_type})'.format(**v))

In [6]:
# Example usage
species = "homo_sapiens"
symbol = "BRCA2"
run(species, symbol)

13:32315090-32315093:1 ==> rs1252827512 (5_prime_UTR_variant)
13:32315093-32315093:1 ==> rs1593879206 (5_prime_UTR_variant)
13:32315094-32315094:1 ==> rs1593879211 (5_prime_UTR_variant)
13:32315101-32315101:1 ==> rs768038550 (5_prime_UTR_variant)
13:32315106-32315106:1 ==> rs1045400508 (5_prime_UTR_variant)
13:32315109-32315109:1 ==> rs927273467 (5_prime_UTR_variant)
13:32315111-32315111:1 ==> rs1294449372 (5_prime_UTR_variant)
13:32315134-32315134:1 ==> rs750992637 (5_prime_UTR_variant)
13:32315145-32315145:1 ==> rs1246133691 (splice_region_variant)
13:32315146-32315146:1 ==> rs1478582284 (splice_donor_variant)
13:32315160-32315160:1 ==> rs1170598691 (intron_variant)
13:32315175-32315175:1 ==> rs937348137 (intron_variant)
13:32315176-32315176:1 ==> rs1305618584 (intron_variant)
13:32315182-32315182:1 ==> rs543736093 (intron_variant)
13:32315192-32315192:1 ==> rs1221594928 (intron_variant)
13:32315204-32315204:1 ==> rs1374960313 (intron_variant)
13:32315205-32315205:1 ==> rs1460434907 

### without `requests`

In [None]:
import sys
import json
import time

from urllib.parse import urlparse, urlencode
from urllib.request import urlopen, Request
from urllib.error import HTTPError


class EnsemblRestClient(object):
    def __init__(self, server='http://rest.ensembl.org', reqs_per_sec=15):
        self.server = server
        self.reqs_per_sec = reqs_per_sec
        self.req_count = 0
        self.last_req = 0

    def perform_rest_action(self, endpoint, hdrs=None, params=None):
        if hdrs is None:
            hdrs = {}

        if 'Content-Type' not in hdrs:
            hdrs['Content-Type'] = 'application/json'

        if params:
            endpoint += '?' + urlencode(params)

        data = None

        # check if we need to rate limit ourselves
        if self.req_count >= self.reqs_per_sec:
            delta = time.time() - self.last_req
            if delta < 1:
                time.sleep(1 - delta)
            self.last_req = time.time()
            self.req_count = 0
        
        try:
            request = Request(self.server + endpoint, headers=hdrs)
            response = urlopen(request)
            content = response.read()
            if content:
                data = json.loads(content)
            self.req_count += 1

        except HTTPError as e:
            # check if we are being rate limited by the server
            if e.code == 429:
                if 'Retry-After' in e.headers:
                    retry = e.headers['Retry-After']
                    time.sleep(float(retry))
                    self.perform_rest_action(endpoint, hdrs, params)
            else:
                sys.stderr.write('Request failed for {0}: Status code: {1.code} Reason: {1.reason}\n'.format(endpoint, e))
           
        return data

    def get_variants(self, species, symbol):
        genes = self.perform_rest_action(
            endpoint='/xrefs/symbol/{0}/{1}'.format(species, symbol), 
            params={'object_type': 'gene'}
        )
        if genes:
            stable_id = genes[0]['id']
            variants = self.perform_rest_action(
                '/overlap/id/{0}'.format(stable_id),
                params={'feature': 'variation'}
            )
            return variants
        return None

def run(species, symbol):
    client = EnsemblRestClient()
    variants = client.get_variants(species, symbol)
    if variants:
        for v in variants:
            print('{seq_region_name}:{start}-{end}:{strand} ==> {id} ({consequence_type})'.format(**v))

## example client from docs

In [None]:
#!/usr/bin/env python

import sys
import json
import time

# Python 2/3 adaptability
try:
    from urllib.parse import urlparse, urlencode
    from urllib.request import urlopen, Request
    from urllib.error import HTTPError

except ImportError:
    from urlparse import urlparse
    from urllib import urlencode
    from urllib2 import urlopen, Request, HTTPError

class EnsemblRestClient(object):
    def __init__(self, server='http://rest.ensembl.org', reqs_per_sec=15):
        self.server = server
        self.reqs_per_sec = reqs_per_sec
        self.req_count = 0
        self.last_req = 0

    def perform_rest_action(self, endpoint, hdrs=None, params=None):
        if hdrs is None:
            hdrs = {}

        if 'Content-Type' not in hdrs:
            hdrs['Content-Type'] = 'application/json'

        if params:
            endpoint += '?' + urlencode(params)

        data = None

        # check if we need to rate limit ourselves
        if self.req_count >= self.reqs_per_sec:
            delta = time.time() - self.last_req
            if delta < 1:
                time.sleep(1 - delta)
            self.last_req = time.time()
            self.req_count = 0
        
        try:
            request = Request(self.server + endpoint, headers=hdrs)
            response = urlopen(request)
            content = response.read()
            if content:
                data = json.loads(content)
            self.req_count += 1

        except HTTPError as e:
            # check if we are being rate limited by the server
            if e.code == 429:
                if 'Retry-After' in e.headers:
                    retry = e.headers['Retry-After']
                    time.sleep(float(retry))
                    self.perform_rest_action(endpoint, hdrs, params)
            else:
                sys.stderr.write('Request failed for {0}: Status code: {1.code} Reason: {1.reason}\n'.format(endpoint, e))
           
        return data

    def get_variants(self, species, symbol):
        genes = self.perform_rest_action(
            endpoint='/xrefs/symbol/{0}/{1}'.format(species, symbol), 
            params={'object_type': 'gene'}
        )
        if genes:
            stable_id = genes[0]['id']
            variants = self.perform_rest_action(
                '/overlap/id/{0}'.format(stable_id),
                params={'feature': 'variation'}
            )
            return variants
        return None


def run(species, symbol):
    client = EnsemblRestClient()
    variants = client.get_variants(species, symbol)
    if variants:
        for v in variants:
            print('{seq_region_name}:{start}-{end}:{strand} ==> {id} ({consequence_type})'.format(**v))

if __name__ == '__main__':
    if len(sys.argv) == 3:
        species, symbol = sys.argv[1:]
    else:
        species, symbol = 'human', 'BRAF'

    run(species, symbol)

## my mod

### gpt proposal

In [2]:
import requests

In [None]:
organism = "homo_sapiens"
assembly = "GRCh38"

In [3]:
def fetch_ensembl_genes(organism, assembly, kind="ENSG"):
    gene_ids = []

    # Set up REST API base URL
    base_url = "http://rest.ensembl.org"
    headers = {"Content-Type": "application/json"}

    # Fetch all chromosomes for the given assembly
    url = f"{base_url}/info/genomes/{organism}?content-type=application/json"
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an exception if the response contains an HTTP error status code
    genome_info = response.json()
    
    if not genome_info:
        raise ValueError("Invalid organism or no information available.")
    
    if assembly != genome_info["assembly_name"]:
        raise ValueError("Invalid assembly name provided.")
        
    chromosomes = [seq_region["name"] for seq_region in genome_info["top_level_region"] if seq_region["coord_system"] == "chromosome"]

    # Iterate through each chromosome and fetch all gene IDs
    for chromosome in chromosomes:
        url = f"{base_url}/overlap/region/{organism}/{chromosome}:1-1000000000?feature=gene;content-type=application/json"
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        genes = response.json()

        # Filter by gene kind and append gene IDs to the list
        for gene in genes:
            if gene["id"].startswith(kind):
                gene_ids.append(gene["id"])

    return gene_ids

In [None]:
# Example usage:
organism = "homo_sapiens"
assembly = "GRCh38"
gene_ids = fetch_ensembl_genes(organism, assembly, kind="ENSG")
print(gene_ids)