In [5]:
from Bio import Entrez, SeqIO

def get_gene_and_protein_positions(rsid, transcript_location):
    # Step 1: Query rsid to get gene and protein information
    Entrez.email = "liweihao0401@gmail.com"
    handle = Entrez.efetch(db="snp", id=rsid, rettype="gb", retmode="text")
    record = SeqIO.read(handle, "genbank")
    gene_name = record.annotations["gene"]
    protein_accession = record.features[0].qualifiers["protein_id"][0]

    # Step 2: Query transcript location to get gene and protein positions
    transcript_accession = transcript_location.split(":")[0]
    position = int(transcript_location.split(":")[1])

    handle = Entrez.efetch(db="nucleotide", id=transcript_accession, rettype="gb", retmode="text")
    transcript_record = SeqIO.read(handle, "genbank")

    for feature in transcript_record.features:
        if feature.type == "CDS":
            cds_start = feature.location.start.position
            cds_end = feature.location.end.position
            if cds_start <= position <= cds_end:
                gene_position = feature.qualifiers["gene"][0]
                protein_position = position - cds_start + 1
                return gene_name, gene_position, protein_accession, protein_position

    return None

# Example usage
rsid = "rs10011796"
transcript_location = "NC_000003.12:183917980"
result = get_gene_and_protein_positions(rsid, transcript_location)

if result:
    gene_name, gene_position, protein_accession, protein_position = result
    print(f"Gene Name: {gene_name}")
    print(f"Gene Position: {gene_position}")
    print(f"Protein Accession: {protein_accession}")
    print(f"Protein Position: {protein_position}")
else:
    print("No matching information found.")


ValueError: No records found in handle

In [1]:
import requests

def get_gene_and_protein_positions(rsid, transcript_location):
    # Step 1: Query rsid to get gene and protein information
    url = f"https://rest.ensembl.org/variation/human/{rsid}"
    response = requests.get(url, headers={"Content-Type": "application/json"})
    
    if response.status_code == 200:
        data = response.json()
        print(data['mappings'][0])
        gene_name = data['mappings'][0]['gene_symbol']
        protein_id = data['mappings'][0]['protein_id']
    else:
        print(f"Error: Unable to fetch information for rsid {rsid}")
        return None

    # Step 2: Parse transcript location to get gene and protein positions
    transcript_accession, position = transcript_location.split(":")
    position = int(position)

    return gene_name, transcript_accession, position, protein_id

# Example usage
rsid = "rs1000002"
transcript_location = "NC_000003.12:183917980"
result = get_gene_and_protein_positions(rsid, transcript_location)

if result:
    gene_name, transcript_accession, position, protein_id = result
    print(f"Gene Name: {gene_name}")
    print(f"Transcript Accession: {transcript_accession}")
    print(f"Position: {position}")
    print(f"Protein ID: {protein_id}")


{'location': '3:183917980-183917980', 'start': 183917980, 'strand': 1, 'ancestral_allele': 'C', 'allele_string': 'C/T', 'assembly_name': 'GRCh38', 'seq_region_name': '3', 'coord_system': 'chromosome', 'end': 183917980}


KeyError: 'gene_symbol'

In [2]:
import requests

def get_gene_and_protein_positions(rsid, transcript_location):
    # Step 1: Query rsid to get gene and protein information
    url = f"https://rest.ensembl.org/variation/human/{rsid}"
    response = requests.get(url, headers={"Content-Type": "application/json"})
    
    if response.status_code == 200:
        data = response.json()
        if 'mappings' in data and len(data['mappings']) > 0:
            transcript_accession = data['mappings'][0]['transcript_id']
            position = data['mappings'][0]['start']
        else:
            print(f"Error: Unable to fetch information for rsid {rsid}")
            return None
    else:
        print(f"Error: Unable to fetch information for rsid {rsid}")
        return None

    # Step 2: Parse transcript location to get gene and protein positions
    transcript_accession, position = transcript_location.split(":")
    position = int(position)

    return transcript_accession, position

# Example usage
rsid = "rs1000002"
transcript_location = "NC_000003.12:183917980"
result = get_gene_and_protein_positions(rsid, transcript_location)

if result:
    transcript_accession, position = result
    print(f"Transcript Accession: {transcript_accession}")
    print(f"Position: {position}")


KeyError: 'transcript_id'

In [1]:
import requests

def get_gene_and_protein_positions(rsid, transcript_location):
    # Step 1: Query transcript location to get gene and protein information
    transcript_accession, position = transcript_location.split(":")
    position = int(position)

    url = f"https://rest.ensembl.org/map/human/{transcript_accession}/{position}..{position}/GRCh38"
    response = requests.get(url, headers={"Content-Type": "application/json"})
    
    if response.status_code == 200:
        data = response.json()
        if 'mappings' in data and len(data['mappings']) > 0:
            gene_name = data['mappings'][0]['external_name']
            protein_id = data['mappings'][0]['protein_id']
        else:
            print(f"Error: Unable to fetch information for rsid {rsid}")
            return None
    else:
        print(f"Error: Unable to fetch information for rsid {rsid}")
        return None

    return gene_name, transcript_accession, position, protein_id

# Example usage
rsid = "rs1000002"
transcript_location = "NC_000003.12:183917980"
result = get_gene_and_protein_positions(rsid, transcript_location)

if result:
    gene_name, transcript_accession, position, protein_id = result
    print(f"Gene Name: {gene_name}")
    print(f"Transcript Accession: {transcript_accession}")
    print(f"Position: {position}")
    print(f"Protein ID: {protein_id}")


Error: Unable to fetch information for rsid rs1000002
