In [9]:
# 📦 Install Biopython
!pip install biopython



In [10]:
# 📚 Import necessary modules
from Bio.PDB import PDBParser, PDBList

In [11]:
# ⬇️ Download the PDB file for TP53 protein (7BWN)
pdbl = PDBList()
pdbl.retrieve_pdb_file("7BWN", file_format = "pdb", pdir = "dir")

Structure exists: 'dir/pdb7bwn.ent' 


'dir/pdb7bwn.ent'

In [12]:
# 📚 Import necessary modules
import warnings
from Bio import BiopythonWarning

# 🚫 Suppress Biopython warnings
warnings.simplefilter('ignore', BiopythonWarning)

# 🏗️ Parse the downloaded structure
parser = PDBParser()
structure = parser.get_structure("7BWN", "dir/pdb7bwn.ent")

In [13]:
# 📝 Print metadata for the structure '7BWN'
print("Structure Title:", structure.header["name"])
print("Resolution (Å):", structure.header["resolution"])
print("Keywords:", structure.header["keywords"])

Structure Title: crystal structure of a designed protein heterocatenane
Resolution (Å): 2.4
Keywords: heterocatenane, recombination


In [14]:
# ✅ Print the number of models in the structure
print(f"Number of models in the structure '7BWN': {len(structure)}\n")

# ✅ Get and print the number of chains in the first model (model 0)
num_chains = len(structure[0])
print(f"Number of chains in this model: {num_chains}")

Number of models in the structure '7BWN': 1

Number of chains in this model: 16


In [16]:
# ✅ Print each chain ID and their total number of residues for the first model
print("The chain IDs and their total number of residues for the first model:\n")
for chain in structure[0]:
    print(f"chain ID: {chain.id}")
    print(f"Number of residues: {len(list(chain))}\n")

The chain IDs and their total number of residues for the first model:

chain ID: F
Number of residues: 353

chain ID: L
Number of residues: 40

chain ID: A
Number of residues: 342

chain ID: B
Number of residues: 41

chain ID: C
Number of residues: 336

chain ID: D
Number of residues: 45

chain ID: E
Number of residues: 351

chain ID: G
Number of residues: 40

chain ID: H
Number of residues: 340

chain ID: I
Number of residues: 40

chain ID: J
Number of residues: 340

chain ID: K
Number of residues: 34

chain ID: M
Number of residues: 351

chain ID: N
Number of residues: 33

chain ID: O
Number of residues: 336

chain ID: P
Number of residues: 40



In [22]:
# 🏷️ Get the first chain object
first_chain = list(structure[0])[0]

# 🏷️ Build lists of residues
all_residues = list(first_chain)
non_water_residues = [res for res in all_residues if res.id[0] == " "]

# ✅ Print chain ID and counts for the first chain
print(f"Chain ID of the first chain: {first_chain.id}")
print(f"Total number of residues (including water) in the first chain: {len(all_residues)}")
print(f"Total number of residues (excluding water) in the first chain: {len(non_water_residues)}\n")

# ✅ Loop through only the first 20 residues (excluding water)
print("The first 20 residues in the first chain (excluding water) with their IDs and 3-letter amino acid codes:\n")
for residue in non_water_residues[:20]:
    print(f"ID: {residue.id}, Code: {residue.resname}")

Chain ID of the first chain: F
Total number of residues (including water) in the first chain: 353
Total number of residues (excluding water) in the first chain: 280

The first 20 residues in the first chain (excluding water) with their IDs and 3-letter amino acid codes:

ID: (' ', 1, ' '), Code: MET
ID: (' ', 2, ' '), Code: SER
ID: (' ', 3, ' '), Code: LYS
ID: (' ', 4, ' '), Code: GLY
ID: (' ', 5, ' '), Code: GLU
ID: (' ', 6, ' '), Code: GLU
ID: (' ', 7, ' '), Code: LEU
ID: (' ', 8, ' '), Code: PHE
ID: (' ', 9, ' '), Code: THR
ID: (' ', 10, ' '), Code: GLY
ID: (' ', 11, ' '), Code: VAL
ID: (' ', 12, ' '), Code: VAL
ID: (' ', 13, ' '), Code: PRO
ID: (' ', 14, ' '), Code: ILE
ID: (' ', 15, ' '), Code: LEU
ID: (' ', 16, ' '), Code: VAL
ID: (' ', 17, ' '), Code: GLU
ID: (' ', 18, ' '), Code: LEU
ID: (' ', 19, ' '), Code: ASP
ID: (' ', 20, ' '), Code: GLY
