# 00 Installation

## 0.1 Install biopython, pdb-tools, py3dmol

In [1]:
#Installing biopython using pip
!pip install biopython

# Install pdb-tools if not already installed:
!pip install pdb-tools

#Installing py3Dmol using pip
!pip install py3Dmol

#And importing the py3Dmol module
import py3Dmol

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85
Collecting pdb-tools
  Downloading pdb_tools-2.5.0-py3-none-any.whl.metadata (6.6 kB)
Downloading pdb_tools-2.5.0-py3-none-any.whl (207 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pdb-tools
Successfully installed pdb-tools-2.5.0
Collecting py3Dmol
  Downloading py3Dmol-2.4.2-py2.py3-none-any.whl.metadata (1.9 kB)
Downloading py3Dmol-2.4.2-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.4.2


## 0.2 Install Modeller 10.4

In [2]:
# Step 1: Download and extract Modeller tarball
!wget https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
!tar -zxf modeller-10.4.tar.gz
!echo "Modeller extraction completed"

# Step 2: Install Modeller with your license key
# Change directory into the modeller folder
%cd modeller-10.4

# Create the configuration file required for installation.
# This file contains:
#   1. The number 2 (for minimal setup).
#   2. The installation directory where Modeller will be installed.
#   3. Your Modeller license key.
with open('modeller_config', 'w') as f:
    f.write("2\n")
    f.write("/content/compiled/MODELLER\n")
    # Replace the following line with your actual license key if needed.
    f.write("MODELIRANJE\n")

# Run the installer using the configuration file.
!./Install < modeller_config
!echo "Modeller set up completed"

# Change back to the base directory.
%cd /content/

# Step 3: Create a symbolic link to the mod10.4 executable for easy access
%cd modeller-10.4
!ln -sf /content/compiled/MODELLER/bin/mod10.4 /usr/bin/
%cd /content/

# Step 4: Verify that the Modeller command-line tool works.
!mod10.4 | awk 'NR==1{if($1=="usage:") print "Modeller successfully installed"; else print "Something went wrong"}'


--2025-04-07 21:53:43--  https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
Resolving salilab.org (salilab.org)... 169.230.79.19
Connecting to salilab.org (salilab.org)|169.230.79.19|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38244158 (36M) [application/x-gzip]
Saving to: ‘modeller-10.4.tar.gz’


2025-04-07 21:53:45 (23.1 MB/s) - ‘modeller-10.4.tar.gz’ saved [38244158/38244158]

Modeller extraction completed
/content/modeller-10.4
[H[2JInstallation of MODELLER 10.4

This script will install MODELLER 10.4 into a specified directory
for which you have read/write permissions.

To accept the default answers indicated in [...], press <Enter> only.

------------------------------------------------------------------------

The currently supported architectures are as follows:

   1) Linux x86 PC (e.g. RedHat, SuSe).
   2) x86_64 (Opteron/EM64T) box (Linux).
   3) Alternative x86 Linux binary (e.g. for FreeBSD).
   4) Linux on 32-bit ARM (e.g. for Raspberry

## 0.3 Configure modeller

In [3]:
# Install pdb-tools if not already installed:
# !pip install pdb-tools [alrady installed]

# Configure environment for Modeller:
import os, sys

# Set the root where Modeller was installed.
# (In our installation above, Modeller was installed into /content/compiled/MODELLER)
modeller_root = '/content/compiled/MODELLER'

# Append the "modlib" directory to the Python search path:
modlib_path = os.path.join(modeller_root, 'modlib')
if modlib_path not in sys.path:
    sys.path.append(modlib_path)
    print("Added modlib to sys.path:", modlib_path)

# Find the python module folder in modeller's lib folder and append it.
lib_base = os.path.join(modeller_root, 'lib', 'x86_64-intel8')
python_mod_dir = None
for folder in os.listdir(lib_base):
    if folder.startswith("python"):
        python_mod_dir = os.path.join(lib_base, folder)
        if python_mod_dir not in sys.path:
            sys.path.append(python_mod_dir)
            print("Added python module directory to sys.path:", python_mod_dir)
        break
if not python_mod_dir:
    print("Error: Could not find a python module folder in", lib_base)

# Set LD_LIBRARY_PATH so that shared libraries can be found.
ld_library_path = lib_base
current_ld = os.environ.get('LD_LIBRARY_PATH', '')
os.environ['LD_LIBRARY_PATH'] = ld_library_path + (':' + current_ld if current_ld else '')
print("LD_LIBRARY_PATH set to:", os.environ['LD_LIBRARY_PATH'])


Added modlib to sys.path: /content/compiled/MODELLER/modlib
Added python module directory to sys.path: /content/compiled/MODELLER/lib/x86_64-intel8/python3.3
LD_LIBRARY_PATH set to: /content/compiled/MODELLER/lib/x86_64-intel8:/usr/local/nvidia/lib:/usr/local/nvidia/lib64


## 0.4 Add modeller to python path

In [4]:
import sys
import os

# Add Modeller Python paths
sys.path.append('/content/compiled/MODELLER/modlib')
sys.path.append('/content/compiled/MODELLER/lib/x86_64-intel8/python3.3')

# Set environment variable to help Python find shared libraries
os.environ['LD_LIBRARY_PATH'] = '/content/compiled/MODELLER/lib/x86_64-intel8'

# Reload dynamic libraries (for some Colab environments)
!ldconfig /content/compiled/MODELLER/lib/x86_64-intel8

# Try importing Modeller again
try:
    from modeller import *
    from modeller.scripts import complete_pdb
    print("✅ Modeller Python module imported successfully!")
except Exception as e:
    print("❌ Still failed to import Modeller:", e)


/sbin/ldconfig.real: /usr/local/lib/libtcm.so.1 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libhwloc.so.15 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtcm_debug.so.1 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libumf.so.0 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libur_adapter_level_zero.so.0 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libur_adapter_opencl.so.0 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libur_loader.so.0 is

# 01 Make working directory

In [5]:
!pwd
!mkdir /content/4bgq_fix
%cd /content/4bgq_fix
!pwd

/content
/content/4bgq_fix
/content/4bgq_fix


# 02 Download PDB and UniProt

In [6]:
from Bio.PDB import PDBParser
from Bio import SeqIO, Entrez
import requests

# Download 4bgq PDB file
pdb_id = "4bgq"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
pdb_filename = f"{pdb_id}.pdb"
with open(pdb_filename, "w") as f:
    f.write(requests.get(url).text)

# Parse structure
parser = PDBParser(QUIET=True)
structure = parser.get_structure(pdb_id, pdb_filename)

# List HETATM and water
hetatm_residues = []
water_residues = []
for model in structure:
    for chain in model:
        for res in chain:
            if res.id[0].startswith("H") and res.resname != "HOH":
                hetatm_residues.append(res)
            if res.resname == "HOH":
                water_residues.append(res)

print(f"Total HETATM residues: {len(hetatm_residues)}")
print(f"Total water molecules: {len(water_residues)}")

# Check for missing residues
print("\n🧩 Residues in chain A:")
for res in structure[0]['A']:
    print(f"{res.resname} {res.id[1]}")

# Get Uniprot sequence for O76039
Entrez.email = "your_email@example.com"
handle = Entrez.efetch(db="protein", id="O76039", rettype="fasta", retmode="text")
seq_record = SeqIO.read(handle, "fasta")
handle.close()
print(f"\nUniprot sequence length: {len(seq_record.seq)}")
print(f"Uniprot sequence:\n{seq_record.seq}")


Total HETATM residues: 6
Total water molecules: 152

🧩 Residues in chain A:
VAL 9
MET 10
ASN 11
LYS 12
PHE 13
GLU 14
ILE 15
LEU 16
GLY 17
VAL 18
VAL 19
GLY 20
GLU 21
GLY 22
ALA 23
TYR 24
GLY 25
VAL 26
VAL 27
LEU 28
LYS 29
CYS 30
ARG 31
HIS 32
LYS 33
GLU 34
THR 35
HIS 36
GLU 37
ILE 38
VAL 39
ALA 40
ILE 41
LYS 42
LYS 43
PHE 44
LYS 45
VAL 53
LYS 54
GLU 55
THR 56
THR 57
LEU 58
ARG 59
GLU 60
LEU 61
LYS 62
MET 63
LEU 64
ARG 65
THR 66
LEU 67
LYS 68
GLN 69
GLU 70
ASN 71
ILE 72
VAL 73
GLU 74
LEU 75
LYS 76
GLU 77
ALA 78
PHE 79
ARG 80
ARG 81
ARG 82
GLY 83
LYS 84
LEU 85
TYR 86
LEU 87
VAL 88
PHE 89
GLU 90
TYR 91
VAL 92
GLU 93
LYS 94
ASN 95
MET 96
LEU 97
GLU 98
LEU 99
LEU 100
GLU 101
GLU 102
MET 103
PRO 104
ASN 105
GLY 106
VAL 107
PRO 108
PRO 109
GLU 110
LYS 111
VAL 112
LYS 113
SER 114
TYR 115
ILE 116
TYR 117
GLN 118
LEU 119
ILE 120
LYS 121
ALA 122
ILE 123
HIS 124
TRP 125
CYS 126
HIS 127
LYS 128
ASN 129
ASP 130
ILE 131
VAL 132
HIS 133
ARG 134
ASP 135
ILE 136
LYS 137
PRO 138
GLU 139
ASN 140
LEU 141
L

# 03 CLEAN PDB

In [9]:
!pdb_selchain -A 4bgq.pdb | pdb_delhetatm > 4bgq_clean.pdb

# 04 FIX MISSING RESIDUE

In [10]:
from Bio import SeqIO, pairwise2
from modeller import environ, automodel, log
import os
import requests

# === Setup ===
uniprot_id = "O76039"
pdb_file = "4bgq_clean.pdb"
alignment_file = "alignment.ali"
modeller_script = "model_build.py"
code = "4bgq"
chain = "A"
uniprot_fasta = f"{uniprot_id}.fasta"

# === Step 1: Download UniProt sequence ===
def download_uniprot_fasta(uniprot_id):
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta"
    response = requests.get(url)
    if response.status_code == 200:
        with open(uniprot_fasta, "w") as f:
            f.write(response.text)
        print(f"✅ Downloaded UniProt FASTA: {uniprot_fasta}")
    else:
        raise Exception(f"❌ Failed to fetch UniProt sequence for {uniprot_id}")

# === Step 2: Extract sequences ===
def get_uniprot_seq(filename):
    return str(next(SeqIO.parse(filename, "fasta")).seq)

def get_pdb_seq(pdb_path):
    from Bio.PDB import PDBParser, Polypeptide
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("pdb", pdb_path)
    for model in structure:
        for chain_ in model:
            if chain_.id == chain:
                ppb = Polypeptide.PPBuilder()
                seq = ""
                for pp in ppb.build_peptides(chain_):
                    seq += str(pp.get_sequence())
                return seq
    return ""

# === Step 3: Align UniProt with PDB and make .ali file ===
def align_and_make_ali(uniprot_seq, pdb_seq):
    alignment = pairwise2.align.globalxs(uniprot_seq, pdb_seq, -10, -0.5, one_alignment_only=True)[0]
    aligned_uni = alignment.seqA.replace("-", ".")
    aligned_pdb = alignment.seqB.replace("-", "-")

    with open(alignment_file, "w") as f:
        f.write(f""">P1;{code}
structureX:{code}:1:{chain}::960:{chain}::::
{aligned_pdb}*

>P1;target
sequence:target:1:{chain}::960:{chain}::::
{aligned_uni}*
""")

# === Step 4: Write Modeller script ===
def write_modeller_script():
    with open(modeller_script, "w") as f:
        f.write(f"""
from modeller import *
from modeller.automodel import *

log.verbose()
env = environ()
env.io.atom_files_directory = ['.', './']
a = automodel(env, alnfile='{alignment_file}',
              knowns='{code}', sequence='target')
a.starting_model = 1
a.ending_model = 1
a.make()
""")

# === Step 5: Run Modeller ===
def run_modeller_script():
    os.system(f"mod9.25 {modeller_script}")

# === Full Pipeline ===
download_uniprot_fasta(uniprot_id)
uniprot_seq = get_uniprot_seq(uniprot_fasta)
pdb_seq = get_pdb_seq(pdb_file)
align_and_make_ali(uniprot_seq, pdb_seq)
write_modeller_script()
run_modeller_script()


✅ Downloaded UniProt FASTA: O76039.fasta


# 05 Create the modeller script to rebuild missing residues

In [11]:
# Step 1: Create the modeller script to rebuild missing residues
modeller_script = """
from modeller import *
from modeller.automodel import *

log.verbose()
env = environ()

env.io.atom_files_directory = ['./', '/content/4bgq_fix']

class MyModel(automodel):
    def special_patches(self, aln):
        self.rename_segments(segment_ids=['A'], chain_ids=['A'])

a = MyModel(env,
            alnfile  = '/content/4bgq_fix/alignment.ali',
            knowns   = '4bgq',
            sequence = 'target')
a.starting_model = 1
a.ending_model   = 1
a.make()
"""

# Step 2: Write the script to a file
with open("/content/4bgq_fix/rebuild_missing_residues.py", "w") as f:
    f.write(modeller_script)

# Step 3: Run the modeller script
!mod10.4 /content/4bgq_fix/rebuild_missing_residues.py


'import site' failed; use -v for traceback
Traceback (most recent call last):
  File "/content/4bgq_fix/rebuild_missing_residues.py", line 20, in ?
    a.make()
  File "/content/compiled/MODELLER/modlib/modeller/automodel/automodel.py", line 141, in make
    self.homcsr(exit_stage)
  File "/content/compiled/MODELLER/modlib/modeller/automodel/automodel.py", line 612, in homcsr
    aln = self.read_alignment()
  File "/content/compiled/MODELLER/modlib/modeller/automodel/automodel.py", line 573, in read_alignment
    aln.append(file=self.alnfile, align_codes=codes)
  File "/content/compiled/MODELLER/modlib/modeller/alignment.py", line 82, in append
    allow_alternates)
_modeller.FileFormatError: parse_pir__E> Invalid PIR file header line: structureX:4bgq:1:A::960:A::::
There should be 10 fields separated by colons, :
This line actually contains 11 fields.



# 06 Fix the alignment.ali file in-place


In [12]:
# Fix the alignment.ali file in-place
with open("/content/4bgq_fix/alignment.ali", "r") as f:
    lines = f.readlines()

# Auto-correct the structure line if it's malformed
for i, line in enumerate(lines):
    if line.startswith("structureX:"):
        parts = line.strip().split(":")
        if len(parts) > 10:
            # Fix to exactly 10 fields
            lines[i] = ":".join(parts[:10]) + '\n'

with open("/content/4bgq_fix/alignment.ali", "w") as f:
    f.writelines(lines)


# 07

In [13]:
from Bio.PDB import PDBParser

def fix_alignment_header(pdb_path, ali_path):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("4bgq", pdb_path)

    # Extract first model, first chain
    model = structure[0]
    chain = next(model.get_chains())
    chain_id = chain.id

    residues = [res for res in chain.get_residues() if res.id[0] == ' ']
    start_res = residues[0].id[1]
    end_res = residues[-1].id[1]

    # Fix the alignment.ali file
    with open(ali_path) as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        if line.startswith("structureX:4bgq:"):
            # Replace with corrected header
            lines[i] = f"structureX:4bgq:{start_res}:{chain_id}:{start_res}:{chain_id}::::\n"

    with open(ali_path, "w") as f:
        f.writelines(lines)

    return start_res, end_res, chain_id

# Apply fix
pdb_file = "/content/4bgq_fix/4bgq_clean.pdb"
ali_file = "/content/4bgq_fix/alignment.ali"
start_res, end_res, chain_id = fix_alignment_header(pdb_file, ali_file)

print(f"✅ alignment.ali header fixed with: start={start_res}, end={end_res}, chain={chain_id}")


✅ alignment.ali header fixed with: start=9, end=302, chain=A


# ✅ 08 Full Automation Script:
Parse the cleaned PDB to extract correct start_res, end_res, and chain.

Fix both structureX: and sequence: lines in alignment.ali.

Run Modeller automatically after that.

In [14]:
from Bio.PDB import PDBParser
import os

# === Step 1: Fix alignment.ali header based on PDB ===
def fix_alignment_ali(pdb_path, ali_path):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("4bgq", pdb_path)
    model = structure[0]
    chain = next(model.get_chains())
    chain_id = chain.id

    residues = [res for res in chain.get_residues() if res.id[0] == ' ']
    start_res = residues[0].id[1]
    end_res = residues[-1].id[1]

    # Fix the alignment file
    with open(ali_path) as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        if line.startswith("structureX:"):
            lines[i] = f"structureX:4bgq:{start_res}:{chain_id}:{end_res}:{chain_id}::::\n"
        elif line.startswith("sequence:"):
            lines[i] = f"sequence:target:{start_res}:{chain_id}:{end_res}:{chain_id}::::\n"

    with open(ali_path, "w") as f:
        f.writelines(lines)

    print(f"✅ Fixed alignment headers with start={start_res}, end={end_res}, chain={chain_id}")
    return start_res, end_res, chain_id

# === Step 2: Rebuild missing residues using Modeller ===
def run_modeller_script():
    modeller_script = """
from modeller import *
from modeller.automodel import *

log.verbose()
env = environ()
env.io.hetatm = True
env.io.atom_files_directory = ['.']

a = automodel(env,
              alnfile='alignment.ali',
              knowns='4bgq',
              sequence='target',
              assess_methods=(assess.DOPE, assess.GA341))
a.starting_model = 1
a.ending_model = 1
a.make()
"""

    # Write script to file
    script_path = "/content/4bgq_fix/rebuild_missing_residues.py"
    with open(script_path, "w") as f:
        f.write(modeller_script)

    print("✅ Modeller script created. Running it now...")
    os.system(f"mod10.4 {script_path}")

# === Run everything ===
pdb_file = "/content/4bgq_fix/4bgq_clean.pdb"
ali_file = "/content/4bgq_fix/alignment.ali"
fix_alignment_ali(pdb_file, ali_file)
run_modeller_script()


✅ Fixed alignment headers with start=9, end=302, chain=A
✅ Modeller script created. Running it now...
