In [1]:
# 📦 Install Biopython (if not already installed)
!pip install biopython

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


In [3]:
# 📚 Import Entrez from Biopython
from Bio import Entrez

# 📧 Set your email for Entrez usage
Entrez.email = "sheetal.reddy@g.austincc.edu"

In [8]:
# 🔎 List all available Entrez databases
handle = Entrez.einfo()
record = Entrez.read(handle)
print("Available Entrez Databases: ", record["DbList"])
handle.close()

Available Entrez Databases:  ['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'structure', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'medgen', 'mesh', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'proteinclusters', 'pcassay', 'protfam', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'gtr']


In [7]:
# 📘 Get information about the 'nucleotide' database
handle = Entrez.einfo(db="nucleotide")
record = Entrez.read(handle)
print("Database Description: ", record["DbInfo"]["Description"])
handle.close()

Database Description:  Core Nucleotide db


In [12]:
# 🧬 Search for TP53 gene in Homo sapiens
handle = Entrez.esearch(db="nucleotide", term="TP53[Gene] AND Homo sapiens[Organism]")
record = Entrez.read(handle)

print("Matching Nucleotide IDs: ", record["IdList"])
print("\n")
print("Total Matching Records: ", record["Count"])
handle.close()

Matching Nucleotide IDs:  ['2909903186', '383209646', '2870640676', '2246031203', '2246031143', '2246031136', '2246031134', '2246031125', '2246031099', '2246031086', '2246031073', '2246031070', '2246031054', '2246031052', '1894803104', '1894803103', '1894803100', '1894803099', '1894803061', '1894803053']


Total Matching Records:  2358


In [29]:
# 🧾 Fetch sequence in FASTA format
handle = Entrez.efetch(db="nucleotide",id=record["IdList"][0], rettype="fasta", retmode="text")
print("FASTA Format:\n")
print(handle.read())
handle.close()

FASTA Format:

>PQ741723.1 Homo sapiens isolate TWH-2033-0-1 mutant tumor protein p53 transcript variant 1 (TP53) mRNA, complete cds
ATGGAGGAGCCGCAGTCAGATCCTAGCGTCGAGCCCCCTCTGAGTCAGGAAACATTTTCAGACCTATGGA
AACTACTTCCTGAAAACAACGTTCTGTCCCCCTTGCCGTCCCAAGCAATGGATGATTTGATGCTGTCCCC
GGACGATATTGAACAATGGTTCACTGAAGACCCAGGTCCAGATGAAGCTCCCAGAATGCCAGAGGCTGCT
CCCCCCGTGGCCCCTGCACCAGCAGCTCCTACACCGGCGGCCCCTGCACCAGCCCCCTCCTGGCCCCTGT
CATCTTCTGTCCCTTCCCAGAAAACCTACCAGGGCAGCTACGGTTTCCGTCTGGGCTTCTTGCATTCTGG
GACAGCCAAGTCTGTGACTTGCACGTACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACC
TGCCCTGTGCAGCTGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACA
AGCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCAGATAGCGATGG
TCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTGTGGAGTATTTGGATGACAGAGAA
ACACTTTTCGACATAGTGTGGTGGTGCCCTATGAGCCGCCTGAGGTTGGCTCTGACTGTACCACCATCCA
CTACAACTACATGTGTAACAGTTCCTGCATGGGCGGCATGA




In [30]:
# 📄 Fetch full GenBank record
handle = Entrez.efetch(db="nucleotide", id=record["IdList"][0], rettype="gb", retmode="text")
print("GenBank Format:\n")
print(handle.read())
handle.close()

GenBank Format:

LOCUS       PQ741723                 741 bp    mRNA    linear   PRI 17-FEB-2025
DEFINITION  Homo sapiens isolate TWH-2033-0-1 mutant tumor protein p53
            transcript variant 1 (TP53) mRNA, complete cds.
ACCESSION   PQ741723
VERSION     PQ741723.1
KEYWORDS    .
SOURCE      Homo sapiens (human)
  ORGANISM  Homo sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; Homo.
REFERENCE   1  (bases 1 to 741)
  AUTHORS   Kwong,A., Ho,C.Y.S., Law,F.B.F., Au,T.C.H. and Ma,E.S.K.
  TITLE     Germline mutation spectrum in Hong Kong
  JOURNAL   Unpublished
REFERENCE   2  (bases 1 to 741)
  AUTHORS   Kwong,A., Ho,C.Y.S., Law,F.B.F., Au,T.C.H. and Ma,E.S.K.
  TITLE     Direct Submission
  JOURNAL   Submitted (11-DEC-2024) Dept of Surgery, Breast Division, The
            University of Hong Kong, Rm1401, Queen Mary Hospital, Pokfulam,
      