In [1]:
import os
from Bio import Entrez, SeqIO

In [5]:
# Set email for NCBI access
# Get email from environment variable
Entrez.email = os.getenv("NCBI_EMAIL", "@.")

# Accession number of target gene, HBB
accession = "NM_000518.5"


In [10]:
# Fet FASTA Seq from NCBI
handle = Entrez.efetch(db = "nucleotide", id = accession, rettype="fasta", retmode="text")
seq_record = SeqIO.read(handle, "fasta")
handle.close()


In [12]:
# Print sequence information
print(f"Gene ID: \t\t{seq_record.id}")
print(f"Sequence length: \t{len(seq_record.seq)} bp")
print(f"First 100 bases: \t{seq_record.seq[:100]}...")


Gene ID: 		NM_000518.5
Sequence length: 	628 bp
First 100 bases: 	ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACCATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGG...


In [15]:
# Save sequence as a FASTA file
file_name = "hbb_gene.fasta"
with open(file_name, "w") as fasta_file:
    SeqIO.write(seq_record, fasta_file, "fasta")

print(f"FASTA file saved: {file_name}")

FASTA file saved: hbb_gene.fasta
