In [30]:
import requests, sys, json, re

In [53]:
def get_ensembl(ids: list):
    url = "https://rest.ensembl.org/lookup/id"
    https_args = {"Content-Type" : "application/json", "Accept" : "application/json"}
    json_ids = json.dumps({"ids": ids})
    https_function = requests.post(url, headers=https_args, data=json_ids)
    return https_function

def parse_response_ensembl(response: dict):
    resp = response.json()
    output = {}
    for val in resp:
        display_name = resp[val]['display_name']
        species = resp[val]['species']
        description = resp[val]['description']
        output[val] = {'gene':display_name, 'organism':species, 'geneInfo':description, 'type': 'gene'}
    return output

id_list =  ["ENSMUSG00000041147", "ENSG00000139618"]
response = get_ensembl(id_list)
parsed_data = parse_response_ensembl(response)
parsed_data

{'ENSG00000139618': {'gene': 'BRCA2',
  'organism': 'homo_sapiens',
  'geneInfo': 'BRCA2 DNA repair associated [Source:HGNC Symbol;Acc:HGNC:1101]',
  'type': 'gene'},
 'ENSMUSG00000041147': {'gene': 'Brca2',
  'organism': 'mus_musculus',
  'geneInfo': 'breast cancer 2, early onset [Source:MGI Symbol;Acc:MGI:109337]',
  'type': 'gene'}}

In [54]:
def get_uniprot(ids: list):
    accessions = ','.join(ids)
    url = "https://rest.uniprot.org/uniprotkb/accessions"
    http_function = requests.get
    http_args = {'params': {'accessions': accessions}}
    return http_function(url, **http_args)

def parse_response_uniprot(resp: dict):
    resp = resp.json()
    resp = resp["results"]
    output = {}
    for val in resp:
        acc = val['primaryAccession']
        species = val['organism']['scientificName']
        gene = val['genes'][0]['geneName']['value']
        seq = val['sequence']['length']
        output[acc] = {'organism':species, 'gene':gene, 'lenght':seq, 'type': 'protein'}
    return output

my_ids = ['P11473', 'Q91XI3']
resp = get_uniprot(my_ids)
parsed_data = parse_response_uniprot(resp)
print(parsed_data)

{'P11473': {'organism': 'Homo sapiens', 'gene': 'VDR', 'lenght': 427, 'type': 'protein'}, 'Q91XI3': {'organism': 'Ictidomys tridecemlineatus', 'gene': 'INS', 'lenght': 110, 'type': 'protein'}}


In [64]:
def process_ids(db_ids):

    uniprot_pattern = r'^[OPQ]\d{5}$'
    ensembl_pattern = r"^(ENS[A-Z0-9]+)"
    id_0 = db_ids[0]

    if re.match(uniprot_pattern, id_0):
        response = get_uniprot(db_ids)
        output = parse_response_uniprot(response)

    elif re.match(ensembl_pattern, id_0):
        response = get_ensembl(db_ids)
        output = parse_response_ensembl(response)

    else:
        output = "Error: input pattern doesn't match uniport or ensembl pattern"

    return output


id_list_1 = ["ENSMUSG00000041147", "ENSG00000139618"]
id_list_2 = ['P11473', 'Q91XI3']
print(process_ids(id_list_1))
print(process_ids(id_list_2))

{'ENSG00000139618': {'gene': 'BRCA2', 'organism': 'homo_sapiens', 'geneInfo': 'BRCA2 DNA repair associated [Source:HGNC Symbol;Acc:HGNC:1101]', 'type': 'gene'}, 'ENSMUSG00000041147': {'gene': 'Brca2', 'organism': 'mus_musculus', 'geneInfo': 'breast cancer 2, early onset [Source:MGI Symbol;Acc:MGI:109337]', 'type': 'gene'}}
{'P11473': {'organism': 'Homo sapiens', 'gene': 'VDR', 'lenght': 427, 'type': 'protein'}, 'Q91XI3': {'organism': 'Ictidomys tridecemlineatus', 'gene': 'INS', 'lenght': 110, 'type': 'protein'}}
