# PDB rebuidl missing residue but with exact aa len as given pdb

In [1]:
#Installing biopython using pip
!pip install biopython

# Install pdb-tools if not already installed:
!pip install pdb-tools

#Installing py3Dmol using pip
!pip install py3Dmol

#And importing the py3Dmol module
import py3Dmol

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85
Collecting pdb-tools
  Downloading pdb_tools-2.5.0-py3-none-any.whl.metadata (6.6 kB)
Downloading pdb_tools-2.5.0-py3-none-any.whl (207 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pdb-tools
Successfully installed pdb-tools-2.5.0
Collecting py3Dmol
  Downloading py3Dmol-2.4.2-py2.py3-none-any.whl.metadata (1.9 kB)
Downloading py3Dmol-2.4.2-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.4.2


In [2]:
# Step 1: Download and extract Modeller tarball
!wget https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
!tar -zxf modeller-10.4.tar.gz
!mkdir -p /content/compiled/MODELLER

# Step 2: Install Modeller (replace MODELIRANJE with your license key if needed)
%cd modeller-10.4
with open('modeller_config', 'w') as f:
    f.write("2\n")
    f.write("/content/compiled/MODELLER\n")
    f.write("MODELIRANJE\n")  # <-- Replace with your Modeller license key
!./Install < modeller_config

# Step 3: Create symbolic link
!ln -sf /content/compiled/MODELLER/bin/mod10.4 /usr/bin/

# Verify install
!mod10.4 | awk 'NR==1{if($1=="usage:") print "✅ Modeller successfully installed"; else print "❌ Installation failed"}'

%cd /content


--2025-04-07 23:32:25--  https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
Resolving salilab.org (salilab.org)... 169.230.79.19
Connecting to salilab.org (salilab.org)|169.230.79.19|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38244158 (36M) [application/x-gzip]
Saving to: ‘modeller-10.4.tar.gz’


2025-04-07 23:32:25 (100 MB/s) - ‘modeller-10.4.tar.gz’ saved [38244158/38244158]

/content/modeller-10.4
[H[2JInstallation of MODELLER 10.4

This script will install MODELLER 10.4 into a specified directory
for which you have read/write permissions.

To accept the default answers indicated in [...], press <Enter> only.

------------------------------------------------------------------------

The currently supported architectures are as follows:

   1) Linux x86 PC (e.g. RedHat, SuSe).
   2) x86_64 (Opteron/EM64T) box (Linux).
   3) Alternative x86 Linux binary (e.g. for FreeBSD).
   4) Linux on 32-bit ARM (e.g. for Raspberry Pi).
   5) Linux on 64-bit ARM

In [3]:
# Example: 4bgq and UniProt ID O76039
pdb_id = "4bgq"
uniprot_id = "O76039"

# Download PDB
!wget -q https://files.rcsb.org/download/{pdb_id}.pdb

# Download UniProt FASTA
!wget -q https://www.uniprot.org/uniprot/{uniprot_id}.fasta -O {uniprot_id}.fasta

# Create working directory
!mkdir -p /content/4bgq_fix
!mv {pdb_id}.pdb {uniprot_id}.fasta /content/4bgq_fix/
%cd /content/4bgq_fix


/content/4bgq_fix


In [4]:
from Bio.PDB import PDBParser, PDBIO, Select

class StandardResidueSelect(Select):
    def accept_residue(self, residue):
        return residue.id[0] == ' '

parser = PDBParser(QUIET=True)
structure = parser.get_structure("4bgq", "4bgq.pdb")

io = PDBIO()
io.set_structure(structure)
io.save("4bgq_clean.pdb", select=StandardResidueSelect())

print("✅ Cleaned PDB saved as 4bgq_clean.pdb")


✅ Cleaned PDB saved as 4bgq_clean.pdb


In [5]:
from Bio import SeqIO
from Bio.PDB import PDBParser
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

# Parse PDB for residue range
structure = PDBParser(QUIET=True).get_structure("4bgq", "4bgq_clean.pdb")
chain = next(structure[0].get_chains())
residues = [r for r in chain.get_residues() if r.id[0] == ' ']
start_res, end_res = residues[0].id[1], residues[-1].id[1]

# Extract UniProt sequence
uniprot_seq = str(next(SeqIO.parse("O76039.fasta", "fasta")).seq)

# Trim UniProt to match PDB span
pdb_seq = "".join([r.resname for r in residues])
trimmed_seq = uniprot_seq[:end_res]

# Write PIR alignment
with open("alignment.ali", "w") as f:
    f.write(f""">P1;4bgq
structureX:4bgq:{start_res}:A:{end_res}:A::::
{trimmed_seq}*
>P1;target
sequence:target:{start_res}:A:{end_res}:A::::
{trimmed_seq}*
""")

print("✅ PIR alignment file created.")


✅ PIR alignment file created.


In [6]:
from Bio.PDB import PDBParser

def fix_alignment_ali(pdb_path, ali_path):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("4bgq", pdb_path)
    model = structure[0]
    chain = next(model.get_chains())
    chain_id = chain.id
    residues = [res for res in chain.get_residues() if res.id[0] == ' ']
    start_res = residues[0].id[1]
    end_res = residues[-1].id[1]

    with open(ali_path) as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        if line.startswith("structureX:"):
            lines[i] = f"structureX:4bgq:{start_res}:{chain_id}:{end_res}:{chain_id}::::\n"
        elif line.startswith("sequence:"):
            lines[i] = f"sequence:target:{start_res}:{chain_id}:{end_res}:{chain_id}::::\n"

    with open(ali_path, "w") as f:
        f.writelines(lines)

    print(f"✅ Fixed alignment headers with start={start_res}, end={end_res}, chain={chain_id}")
    return start_res, end_res, chain_id

fix_alignment_ali("4bgq_clean.pdb", "alignment.ali")


✅ Fixed alignment headers with start=9, end=302, chain=A


(9, 302, 'A')

In [7]:
from Bio.PDB import PDBParser
from Bio.Data.IUPACData import protein_letters_3to1
import os

def three_to_one(resname):
    return protein_letters_3to1.get(resname.capitalize(), 'X')

# Extract sequence from cleaned PDB
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("4bgq", "4bgq_clean.pdb")
chain = next(structure[0].get_chains())

residues = [r for r in chain.get_residues() if r.id[0] == ' ']
pdb_seq = "".join([three_to_one(r.get_resname()) for r in residues])

start_res = residues[0].id[1]
end_res = residues[-1].id[1]
chain_id = chain.id

# Write PIR alignment
with open("alignment.ali", "w") as f:
    f.write(f""">P1;4bgq
structureX:4bgq:{start_res}:{chain_id}:{end_res}:{chain_id}::::
{pdb_seq}*
>P1;target
sequence:target:{start_res}:{chain_id}:{end_res}:{chain_id}::::
{pdb_seq}*
""")

print(f"✅ Alignment written for {len(pdb_seq)} residues from {start_res} to {end_res} on chain {chain_id}")


✅ Alignment written for 276 residues from 9 to 302 on chain A


In [8]:
modeller_script = """
from modeller import *
from modeller.automodel import *

log.verbose()
env = environ()
env.io.hetatm = True
env.io.atom_files_directory = ['.']

a = automodel(env,
              alnfile='alignment.ali',
              knowns='4bgq',
              sequence='target',
              assess_methods=(assess.DOPE, assess.GA341))
a.starting_model = 1
a.ending_model = 1
a.make()
"""

with open("rebuild_missing_residues.py", "w") as f:
    f.write(modeller_script)

print("✅ Modeller script written. Running it...")
!mod10.4 rebuild_missing_residues.py


✅ Modeller script written. Running it...
'import site' failed; use -v for traceback


In [11]:
!pdb_reres -1 4bgq.pdb > 4bgq_renumbered.pdb