In [2]:
# -------------------------------
# Dependencies and Setup
# -------------------------------
!pip install biopython pdb-tools py3Dmol

# Download and extract Modeller tarball
!wget -q https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
!tar -zxf modeller-10.4.tar.gz
!mkdir -p /content/compiled/MODELLER

# Install Modeller (replace MODELIRANJE with your license key if needed)
%cd modeller-10.4
with open('modeller_config', 'w') as f:
    f.write("2\n")
    f.write("/content/compiled/MODELLER\n")
    f.write("MODELIRANJE\n")  # <-- Replace with your Modeller license key if needed
!./Install < modeller_config

# Create symbolic link so that "mod10.4" is available in PATH
!ln -sf /content/compiled/MODELLER/bin/mod10.4 /usr/bin/

# Verify install
!mod10.4 | awk 'NR==1{if($1=="usage:") print "✅ Modeller successfully installed"; else print "❌ Installation failed"}'
%cd /content

# -------------------------------
# Download Input Files
# -------------------------------
# Example: PDB ID 4bgq and UniProt ID O76039
pdb_id = "4bgq"
uniprot_id = "O76039"

# Download the PDB file and UniProt FASTA sequence
!wget -q https://files.rcsb.org/download/{pdb_id}.pdb
!wget -q https://www.uniprot.org/uniprot/{uniprot_id}.fasta -O {uniprot_id}.fasta

# Create working directory and move files there
!mkdir -p /content/4bgq_fix
!mv {pdb_id}.pdb {uniprot_id}.fasta /content/4bgq_fix/
%cd /content/4bgq_fix

# -------------------------------
# Clean the PDB file
# -------------------------------
from Bio.PDB import PDBParser, PDBIO, Select

class StandardResidueSelect(Select):
    def accept_residue(self, residue):
        return residue.id[0] == ' '

parser = PDBParser(QUIET=True)
structure = parser.get_structure("4bgq", "4bgq.pdb")
io = PDBIO()
io.set_structure(structure)
io.save("4bgq_clean.pdb", select=StandardResidueSelect())

print("✅ Cleaned PDB saved as 4bgq_clean.pdb")

# -------------------------------
# Create Corrected PIR Alignment Using Full-Length Template Sequence
# -------------------------------
from Bio.PDB import PDBParser
from Bio.Data.IUPACData import protein_letters_3to1
from Bio import SeqIO

def three_to_one(resname):
    return protein_letters_3to1.get(resname.capitalize(), 'X')

# Read the full UniProt sequence
uniprot_record = next(SeqIO.parse("O76039.fasta", "fasta"))
uniprot_seq = str(uniprot_record.seq)
full_length = len(uniprot_seq)  # Should be 960

# Parse cleaned PDB to extract observed residues
pdb_parser = PDBParser(QUIET=True)
structure = pdb_parser.get_structure("4bgq", "4bgq_clean.pdb")
chain = next(structure[0].get_chains())
residues = [r for r in chain.get_residues() if r.id[0]==' ']

# Build a dictionary: residue number -> one-letter code for each observed residue.
observed_dict = {}
for r in residues:
    # Use r.id[1] as the residue number from the PDB (e.g., 9, 10, …)
    observed_dict[r.id[1]] = three_to_one(r.get_resname())

# Determine the observed region (minimum and maximum residue number found)
if observed_dict:
    pdb_first = min(observed_dict.keys())
    pdb_last  = max(observed_dict.keys())
else:
    raise Exception("No observed residues found in the PDB.")

# Build the template sequence (length must equal full_length, e.g., 960)
# For each residue position 1 to full_length, use the observed letter if available; else add gap '-'
template_seq = "".join([observed_dict.get(i, "-") for i in range(1, full_length+1)])

# Sanity check: the template sequence must be exactly full_length long
assert len(template_seq) == full_length, f"Template sequence length {len(template_seq)} != {full_length}"

# Write the PIR alignment file.
# For the template header, we indicate the region that the PDB file actually covers.
with open("alignment.ali", "w") as f:
    f.write(f""">P1;4bgq
structureX:4bgq:{pdb_first}:A:{pdb_last}:A::::
{template_seq}*
>P1;target
sequence:target:1:A:{full_length}:A::::
{uniprot_seq}*
""")
print(f"✅ Alignment written for full length {full_length} with template observed region {pdb_first}-{pdb_last} on chain {chain.id}")

# -------------------------------
# Build Missing Residues with Modeller
# -------------------------------
modeller_script = """
from modeller import *
from modeller.automodel import *

log.verbose()
env = environ()
env.io.hetatm = True
env.io.atom_files_directory = ['.']

a = automodel(env,
              alnfile='alignment.ali',
              knowns='4bgq',
              sequence='target',
              assess_methods=(assess.DOPE, assess.GA341))
a.starting_model = 1
a.ending_model = 1
a.make()
"""

with open("rebuild_missing_residues.py", "w") as f:
    f.write(modeller_script)

print("✅ Modeller script written. Running it...")
!mod10.4 rebuild_missing_residues.py


/content/4bgq_fix/modeller-10.4
[H[2JInstallation of MODELLER 10.4

This script will install MODELLER 10.4 into a specified directory
for which you have read/write permissions.

To accept the default answers indicated in [...], press <Enter> only.

------------------------------------------------------------------------

The currently supported architectures are as follows:

   1) Linux x86 PC (e.g. RedHat, SuSe).
   2) x86_64 (Opteron/EM64T) box (Linux).
   3) Alternative x86 Linux binary (e.g. for FreeBSD).
   4) Linux on 32-bit ARM (e.g. for Raspberry Pi).
   5) Linux on 64-bit ARM (e.g. for Raspberry Pi 3 or later).

Select the type of your computer from the list above [2]: 

Full directory name in which to install MODELLER 10.4
[/root/bin/modeller10.4]: 

License key, obtained from our academic license server at
https://salilab.org/modeller/registration.html: 
------------------------------------------------------------------------

The following settings will be used: 

The typ