## Modeller



In [4]:
%%bash
# -------------------------------
# Install Dependencies & Modeller (bash cell)
# -------------------------------

# 1. Install Python dependencies
pip install biopython pdb-tools py3Dmol

# 2. Download & unpack Modeller 10.4
wget -q https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
tar -zxf modeller-10.4.tar.gz

# 3. Configure installer with your license key
cat > modeller_config <<EOF
2
$(pwd)/compiled/MODELLER
MODELIRANJE
EOF

# 4. Run installer and symlink mod10.4
mkdir -p compiled/MODELLER
cd modeller-10.4
./Install < ../modeller_config
ln -sf $(pwd)/../compiled/MODELLER/bin/mod10.4 /usr/local/bin/mod10.4

# 5. Verify installation
mod10.4 | head -1   # should print "usage:" if installed correctly


Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting pdb-tools
  Downloading pdb_tools-2.5.0-py3-none-any.whl.metadata (6.6 kB)
Collecting py3Dmol
  Downloading py3dmol-2.5.1-py2.py3-none-any.whl.metadata (2.1 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.3/3.3 MB 8.8 MB/s eta 0:00:00
Downloading pdb_tools-2.5.0-py3-none-any.whl (207 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.3/207.3 kB 15.4 MB/s eta 0:00:00
Downloading py3dmol-2.5.1-py2.py3-none-any.whl (7.2 kB)
Installing collected packages: py3Dmol, pdb-tools, biopython
Successfully installed biopython-1.85 pdb-tools-2.5.0 py3Dmol-2.5.1
[H[2JInstallation of MODELLER 10.4

This script will install MODELLER 10.4 into a specified directory
for which you have read/write permissions.

To accept the default answers indicated in [...], press <Enter>

In [9]:
%%bash
# -------------------------------
# Full Pipeline Fix: Remodeling with Modeller & CRYST1 Preservation
# -------------------------------

# --- Customize these ---
pdb_id="1lyz"
uniprot_id="P00698"
region_len=148

# -------------------------------
# 1) Fetch input files
# -------------------------------
wget -q https://files.rcsb.org/download/${pdb_id^^}.pdb
wget -q https://www.uniprot.org/uniprot/${uniprot_id}.fasta -O ${uniprot_id}.fasta
mkdir -p work && mv ${pdb_id^^}.pdb ${uniprot_id}.fasta work/
cd work
cp ${pdb_id^^}.pdb ${pdb_id}_orig.pdb

# -------------------------------
# 2) Clean PDB & extract CRYST1
# -------------------------------
python3 << 'PYCODE'
from Bio.PDB import PDBParser, PDBIO, Select
pdb="1lyz"
# grab CRYST1
cryst1 = next((l for l in open(f"{pdb}_orig.pdb") if l.startswith("CRYST1")), "")
# strip hetero/missing
class Std(Select):
    def accept_residue(self, r): return r.id[0]==' '
parser = PDBParser(QUIET=True)
structure = parser.get_structure("S", f"{pdb}_orig.pdb")
io = PDBIO(); io.set_structure(structure)
io.save(f"{pdb}_clean.pdb", Std())
with open("cryst1.txt","w") as f: f.write(cryst1)
print("✅ Cleaned PDB and extracted CRYST1")
PYCODE

# -------------------------------
# 3) Reconcile SEQRES vs UniProt
# -------------------------------
python3 << 'PYCODE'
from Bio import SeqIO
from Bio.Data.IUPACData import protein_letters_3to1 as aa3to1
pdb, uni, L = "1lyz", "P00698", 148
# SEQRES → one-letter
seqres = ""
for l in open(f"{pdb}_orig.pdb"):
    if l.startswith("SEQRES") and l[11]=="A":
        seqres += "".join(aa3to1.get(r.capitalize(),"X") for r in l.split()[4:])
uni_seq = str(SeqIO.read(f"{uni}.fasta","fasta").seq)[:L]
if seqres != uni_seq:
    print("⚠️ Mismatch; using UniProt slice.")
    seqres = uni_seq
else:
    print("✅ SEQRES matches UniProt.")
with open("final_seq.txt","w") as f: f.write(seqres)
PYCODE

# -------------------------------
# 4) Build template sequence
# -------------------------------
python3 << 'PYCODE'
from Bio.PDB import PDBParser
from Bio.Data.IUPACData import protein_letters_3to1 as aa3to1
pdb, L = "1lyz", 148
parser = PDBParser(QUIET=True)
s = parser.get_structure("S", f"{pdb}_clean.pdb")
chain = next(s[0].get_chains())
obs = {r.id[1]: aa3to1.get(r.get_resname().capitalize(),"X")
       for r in chain if r.id[0]==' '}
template = "".join(obs.get(i, "-") for i in range(1, L+1))
with open("template_seq.txt","w") as f: f.write(template)
print("✅ Template sequence built")
PYCODE

# -------------------------------
# 5) Write PIR alignment
# -------------------------------
python3 << 'PYCODE'
pdb, L = "1lyz", 148
templ = open("template_seq.txt").read().strip()
target = open("final_seq.txt").read().strip()
start = templ.find(next(c for c in templ if c!="-")) + 1
end   = templ.rfind(next(c for c in templ[::-1] if c!="-")) + 1
with open("alignment.ali","w") as f:
    f.write(f">P1;{pdb}\n")
    f.write(f"structureX:{pdb}:{start}:A:{end}:A::::\n{templ}*\n")
    f.write(">P1;target\n")
    f.write(f"sequence:target:1:A:{L}:A::::\n{target}*\n")
print("✅ alignment.ali written")
PYCODE

# -------------------------------
# 6) Run Modeller
# -------------------------------
mod10.4 << 'EOF'
from modeller import *
from modeller.automodel import *
env = environ()
env.io.hetatm = True
a = automodel(env,
              alnfile='alignment.ali',
              knowns='1lyz',
              sequence='target',
              assess_methods=(assess.DOPE, assess.GA341))
a.starting_model = 1
a.ending_model   = 1
a.make()
EOF

# -------------------------------
# 7) Re-insert CRYST1 into models
# -------------------------------
python3 << 'PYCODE'
import glob
cryst = open("cryst1.txt").read()
pdb = "1lyz"
for fn in glob.glob("target*.pdb") + glob.glob(f"{pdb}_clean.pdb"):
    out = fn.replace(".pdb", "_with_cryst.pdb")
    with open(fn) as inp, open(out, "w") as o:
        inserted = False
        for l in inp:
            if (l.startswith("ATOM") or l.startswith("HETATM")) and not inserted:
                o.write(cryst)
                inserted = True
            if not l.startswith("CRYST1"):
                o.write(l)
        if not inserted:
            o.write(cryst)
    print(f"✔ {out}")
print("✅ All models have CRYST1 inserted.")
PYCODE


✅ Cleaned PDB and extracted CRYST1
⚠️ Mismatch; using UniProt slice.
✅ Template sequence built
✅ alignment.ali written
usage: mod10.4 script [...]

✔ 1lyz_clean_with_cryst.pdb
✅ All models have CRYST1 inserted.


In [2]:
# -------------------------------
# Full Pipeline: Modeller Remodeling with UniProt Sequence & CRYST1 Preservation
# -------------------------------

# -------------------------------
# Step 0: Install Dependencies
# -------------------------------
!pip install biopython pdb-tools py3Dmol

# -------------------------------
# Step 1: Download and Install Modeller
# -------------------------------
!wget -q https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
!tar -zxf modeller-10.4.tar.gz
!mkdir -p /content/compiled/MODELLER

%cd modeller-10.4
with open('modeller_config', 'w') as f:
    f.write("2\n")
    f.write("/content/compiled/MODELLER\n")
    f.write("MODELIRANJE\n")  # <-- Replace with your license key
!./Install < modeller_config

# Link mod10.4 to PATH
!ln -sf /content/compiled/MODELLER/bin/mod10.4 /usr/bin/
!mod10.4 | awk 'NR==1{if($1=="usage:") print "✅ Modeller installed"; else print "❌ Installation failed"}'
%cd /content

# -------------------------------
# Step 2: Download Input Files
# -------------------------------
pdb_id = '4bgq'
uniprot_id = 'O76039'
full_length = 303  # region of interest

!wget -q https://files.rcsb.org/download/{pdb_id}.pdb
!wget -q https://www.uniprot.org/uniprot/{uniprot_id}.fasta -O {uniprot_id}.fasta

!mkdir -p /content/{pdb_id}_fix
!mv {pdb_id}.pdb {uniprot_id}.fasta /content/{pdb_id}_fix/
%cd /content/{pdb_id}_fix
!cp {pdb_id}.pdb {pdb_id}_orig.pdb

# -------------------------------
# Step 3: Clean the PDB
# -------------------------------
from Bio.PDB import PDBParser, PDBIO, Select

# Preserve CRYST1 record
cryst1_line = ''
with open(f"{pdb_id}_orig.pdb") as f:
    for line in f:
        if line.startswith('CRYST1'):
            cryst1_line = line
            break

class StandardResidueSelect(Select):
    def accept_residue(self, residue):
        return residue.id[0] == ' '

parser = PDBParser(QUIET=True)
structure = parser.get_structure(pdb_id, f"{pdb_id}.pdb")
io = PDBIO()
io.set_structure(structure)
io.save(f"{pdb_id}_clean.pdb", select=StandardResidueSelect())
print(f"✅ Cleaned PDB saved as {pdb_id}_clean.pdb")
!cp {pdb_id}_clean.pdb {pdb_id}.pdb

# -------------------------------
# Step 4: Extract SEQRES Sequence & Check UniProt
# -------------------------------
from Bio.Data.IUPACData import protein_letters_3to1
from Bio import SeqIO

def three_to_one(resname):
    return protein_letters_3to1.get(resname.capitalize(), 'X')

full_seq = ''
with open(f"{pdb_id}_orig.pdb") as f:
    for line in f:
        if line.startswith('SEQRES') and line[11] == 'A':
            for res in line.split()[4:]:
                full_seq += three_to_one(res)
if len(full_seq) == full_length + 1 and full_seq[0] != full_seq[1]:
    full_seq = full_seq[1:]
print(f"➡ SEQRES raw length: {len(full_seq)}")

uniprot_seq = str(SeqIO.read(f"{uniprot_id}.fasta", 'fasta').seq)
uniprot_region = uniprot_seq[:full_length]
if full_seq != uniprot_region:
    print(f"⚠️ SEQRES vs UniProt mismatch; using UniProt 1-{full_length}.")
    full_seq = uniprot_region
else:
    print("✅ SEQRES matches UniProt.")
print(f"✔ Final sequence length: {len(full_seq)}")

# -------------------------------
# Step 5: Build Template Sequence from ATOM
# -------------------------------
from Bio.PDB import PDBParser as _PDBParser
pdb_parser2 = _PDBParser(QUIET=True)
struc = pdb_parser2.get_structure(pdb_id, f"{pdb_id}.pdb")
chain = next(struc[0].get_chains())
observed = {r.id[1]: three_to_one(r.get_resname()) for r in chain if r.id[0]==' '}
if min(observed) == 0:
    observed = {k+1:v for k,v in observed.items()}
start, end = min(observed), max(observed)
print(f"➡ Observed residues {start}-{end}")
template_seq = ''.join(observed.get(i,'-') for i in range(1, full_length+1))
assert len(template_seq)==full_length
print("✔ Template sequence built")

# -------------------------------
# Step 6: Write PIR Alignment
# -------------------------------
with open('alignment.ali','w') as f:
    f.write(f">P1;{pdb_id}\n")
    f.write(f"structureX:{pdb_id}:{start}:A:{end}:A::::\n")
    f.write(template_seq+"*\n")
    f.write(f">P1;target\nsequence:target:1:A:{full_length}:A::::\n")
    f.write(full_seq+"*\n")
print("✅ alignment.ali written")

# -------------------------------
# Step 7: Run Modeller
# -------------------------------
mod_script = f'''from modeller import *
from modeller.automodel import *
env=environ()
env.io.hetatm=True

a=automodel(env,alnfile='alignment.ali',knowns='{pdb_id}',sequence='target',assess_methods=(assess.DOPE,assess.GA341))
a.starting_model=1
a.ending_model=1
a.make()'''
with open('run_modeller.py','w') as f:
    f.write(mod_script)
print("✅ Modeller script saved. Running...")
!mod10.4 run_modeller.py
print("🎉 Modelling complete.")

# -------------------------------
# Step 8: Insert CRYST1 in Correct PDB Location
# -------------------------------
import glob

for model_file in glob.glob('target*.pdb') + glob.glob(f"{pdb_id}.B*.pdb"):
    out_file = model_file.replace('.pdb', '_with_cryst.pdb')
    with open(model_file) as orig, open(out_file, 'w') as out:
        inserted = False
        for line in orig:
            # Copy existing header lines (HEADER, TITLE, REMARK, etc.)
            if not inserted and (line.startswith('ATOM') or line.startswith('HETATM')):
                # Before writing first ATOM/HETATM, insert CRYST1 line
                if cryst1_line:
                    out.write(cryst1_line)
                inserted = True
                out.write(line)
            elif line.startswith('CRYST1'):
                # Skip any CRYST1 lines in the model file
                continue
            else:
                out.write(line)
        # If file had no ATOM lines (unlikely), append CRYST1 at end
        if not inserted and cryst1_line:
            out.write(cryst1_line)
    print(f"✔️ Wrote {out_file} with CRYST1 inserted before ATOM records")

print("✅ All done. Model PDBs now follow PDB convention with CRYST1 in the proper location.")


SyntaxError: invalid syntax (ipython-input-2-2019661028.py, line 7)

# Visualize the protein

In [None]:
# -------------------------------
# Step 9: Visualize Model and Sequence (Colorful + Numbering)
# -------------------------------
import py3Dmol
from Bio.PDB import PDBParser

# Select first modeled PDB
model_file = glob.glob('*_with_cryst.pdb')[0]

# Extract and format sequence with numbering
parser = PDBParser(QUIET=True)
structure = parser.get_structure('model', model_file)
chain_A = structure[0]['A']
seq = ''.join(three_to_one(res.get_resname()) for res in chain_A if res.id[0]==' ')
chunk_size = 60
for i in range(0, len(seq), chunk_size):
    segment = seq[i:i+chunk_size]
    # Build ruler with numbers every 10 residues
    ruler = [' ']*len(segment)
    for j in range(len(segment)):
        if (j+1) % 10 == 0:
            label = str(i + j + 1)
            start_pos = j - len(label) + 1
            ruler[start_pos:j+1] = list(label)
    print(''.join(ruler))
    print(segment)

# 3D Visualization with spectrum coloring by residue index
view = py3Dmol.view(width=800, height=500)
with open(model_file) as f:
    pdb_data = f.read()
view.addModel(pdb_data, 'pdb')
# Apply spectrum coloring: rainbow from N- to C-terminus
view.setStyle({'chain':'A'}, {'cartoon': {'color':'spectrum'}})
view.setBackgroundColor('0xFFFFFF')
view.zoomTo({'chain':'A'})
view.show()


        10        20        30        40        50        60
MKIPNIGNVMNKFEILGVVGEGAYGVVLKCRHKETHEIVAIKKFKDSEENEEVKETTLRE
        70        80        90       100       110       120
LKMLRTLKQENIVELKEAFRRRGKLYLVFEYVEKNMLELLEEMPNGVPPEKVKSYIYQLI
       130       140       150       160       170       180
KAIHWCHKNDIVHRDIKPENLLISHNDVLKLCDFGFARNLSEGNNANYTEYVATRWYRSP
       190       200       210       220       230       240
ELLLGAPYGKSVDMWSVGCILGELSDGQPLFPGESEIDQLFTIQKVLGPLPSEQMKLFYS
       250       260       270       280       290       300
NPRFHGLRFPAVNHPQSLERRYLGILNSVLLDLMKNLLKLDPADRYLTEQCLNHPTFQTQ
   
RLL
