# **IgFold**: Fast, accurate antibody structure prediction

Official notebook for [IgFold](https://www.biorxiv.org/content/10.1101/2022.04.20.488972): Fast, accurate antibody structure prediction from deep learning on massive set of natural antibodies.  The code, data, and weights for this work are made available for non-commercial use. For commercial inquiries, please contact `jruffolo[at]jhu.edu`.

**Definitions and Sequences Set-up**

In [None]:
# @title Definitions and Sequences Setup
import sys
import os

# --- Essential Variables (Do Not Change) ---
# Determine the Python version for mamba installs
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
pred_dir = "predictions"

# --- User-Defined Parameters ---
name = "chimeric_Fv"  # @param {type:"string"} Name for the output PDB file
do_refine = False # @param {type:"boolean"} Whether to perform structural refinement using OpenMM
do_renum = False # @param {type:"boolean"} Whether to renumber the structure (e.g., Chothia numbering) <-- SET TO FALSE
single_model = False # @param {type:"boolean"} If True, generates only 1 model; if False, generates 4 models

# --- Antibody Sequences (Update these with your current sequences) ---
# IMPORTANT: Use the one-letter amino acid code, no spaces or special characters.
sequences = {
    "H": "QVQLVQSGAEVKKPGASVKVSCKASGYTFTRYTMHWVRQAPGQGLEWMGYINPSRGYTNYNQKFKDAVLMTRKTSTSAVYMELSSLRSEDTAVYYCARYYDDHYCLDYGQGTLVTVSS",
    "L": "EIVLTQSPATLSLSPGERATLSCSASSSVSYMMWYQQKPSQASRLLIYDTSKKLSGIPARHSRSGSGTDSYLTISSLEPEDFAVYYCQQWSSNPFTFGQGTKLEIK"
}

# Create the output directory
if not os.path.exists(pred_dir):
    os.makedirs(pred_dir)

print(f"✅ Definitions set. Output directory: {pred_dir}")

**Install Dependencies and Check Environment**



In [None]:
# @title Install dependencies and check environment
# Note: Using '!' for pip commands for better Colab compatibility.

# --- Install Mamba/Conda ---
if not os.path.isfile("CONDA_READY"):
    print("installing conda...")
    # Using Colab's native !wget is safer than os.system("wget...")
    !wget -qnc https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh
    os.system("bash Mambaforge-colab-Linux-x86_64.sh -bfp /usr/local")
    os.system("mamba config --set auto_update_conda false")
    os.system("touch CONDA_READY")

# --- Install IgFold and PyTorch ---
if not os.path.isfile("CODE_READY"):
    print("installing igfold...")

    # 1. CRITICAL STEP: FORCE INSTALL PyTorch 1.11.0
    # The 'index-url' ensures we get the older, compatible PyTorch versions.
    !pip install torch==1.11.0 torchvision==0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu --force-reinstall

    # 2. Install IgFold (it will use the compatible PyTorch we just installed)
    !pip install 'igfold>=0.3.0'

    # 3. Install visualization tools
    !pip install -q --no-warn-conflicts 'py3Dmol>=2.0.1' matplotlib seaborn

    os.system("touch CODE_READY")

# --- Install Amber/OpenMM for Refinement ---
if do_refine and not os.path.isfile("AMBER_READY"):
    print("installing amber/openmm and pdbfixer...")

    # 1. Use mamba for the core OpenMM suite (best for complex dependencies)
    # Keeping the mamba command here just in case it handles hidden OpenMM requirements better.
    !mamba install -y -q -c conda-forge openmm=7.7.0 python={PYTHON_VERSION} pdbfixer 2>&1 1>/dev/null

    # 2. CRITICAL: Use !pip as a GUARANTEE to install pdbfixer and openmm
    # This registers them in the main Python environment's site-packages path.
    print("Forcing installation via pip...")
    !pip install pdbfixer openmm

    os.system("touch AMBER_READY")

# --- Install Abnumber for Renumbering ---
if do_renum and not os.path.isfile("ABNUMBER_READY"):
    print("installing abnumber...")

    # 1. Use mamba for the core AbNumber suite (keeping this as the preferred method)
    !mamba install -y -q -c bioconda abnumber python={PYTHON_VERSION} 2>&1 1>/dev/null

    # 2. CRITICAL: Use !pip as a GUARANTEE to install abnumber
    print("Forcing installation of abnumber via pip...")
    !pip install abnumber pandas --force-reinstall # Keep pandas re-install just in case

    os.system("touch ABNUMBER_READY")

# --- Post-Install Check ---
import pkg_resources
try:
    pkg_resources.get_distribution('igfold')
    print("✅ IgFold installation check: SUCCESS.")
except pkg_resources.DistributionNotFound:
    print("❌ IgFold installation check: FAILED.")

print("\n--- ATTENTION ---")
print("You MUST restart the runtime now for the Mamba/Conda and IgFold installations to fully take effect.")

**Predict Antibody Structure with IgFold**

In [None]:
# @title Predict antibody structure with IgFold

# Ensure the new site-packages path is used after Mamba/Conda setup
if f"/usr/local/lib/python{python_version}/site-packages/" not in sys.path:
    sys.path.insert(0, f"/usr/local/lib/python{python_version}/site-packages/")

# --- CRITICAL FIX: Find and insert the correct path (from previous fix) ---
import sys
import os
from subprocess import getoutput

# This command finds the directory where 'igfold' was installed
igfold_path = getoutput("find /usr/local/lib/ -name 'igfold' | grep 'site-packages/igfold' | head -n 1 | sed 's/\/igfold//'")

if igfold_path and igfold_path not in sys.path:
    sys.path.insert(0, igfold_path)
    print(f"✅ Found and inserted IgFold path: {igfold_path}")
else:
    guessed_path = f"/usr/local/lib/python{python_version}/site-packages/"
    if guessed_path not in sys.path:
         sys.path.insert(0, guessed_path)
         print(f"⚠️ Inserting guessed path: {guessed_path}")

# Now, imports should work
try:
    from igfold.utils.visualize import *
    from igfold import IgFoldRunner
    # Import the necessary tool to allow model loading
    from torch.serialization import safe_globals

    # 1. IMPORT ALL BLOCKED CLASSES IDENTIFIED SO FAR
    from transformers.models.bert.configuration_bert import BertConfig
    from transformers.models.bert.tokenization_bert import BertTokenizer
    from transformers.tokenization_utils import Trie
    from transformers.models.bert.tokenization_bert import BasicTokenizer
    from transformers.models.bert.tokenization_bert import WordpieceTokenizer # <--- NEWLY ADDED CLASS

except ModuleNotFoundError:
    print("❌ ERROR: Could not import IgFold. Ensure all installation steps and restarts were completed.")
    sys.exit()

# Run the IgFold pipeline
num_models = 1 if single_model else 4

print(f"Starting IgFold prediction with {num_models} model(s)...")

# --- CRITICAL FIX: Use the context manager to bypass the UnpicklingError ---
# Now including all five blocked classes:
with safe_globals({BertConfig, BertTokenizer, Trie, BasicTokenizer, WordpieceTokenizer}):
    igfold = IgFoldRunner(num_models=num_models)

pred_pdb = os.path.join(pred_dir, f"{name}.pdb")
pred = igfold.fold(
    pred_pdb,
    sequences=sequences,
    do_refine=do_refine,
    use_openmm=True, # Use OpenMM if do_refine is True
    do_renum=do_renum,
)

print(f"✅ Prediction complete! PDB saved to: {pred_pdb}")


**Visualize the Predicted Structure**

In [None]:
# @title Visualize the Predicted Structure
import py3Dmol
import os

# Define the file path (this uses the same name variable from Cell 1)
name = "chimeric_Fv"
pred_pdb = os.path.join("predictions", f"{name}.pdb")

# Check if the file exists before attempting to load
if os.path.exists(pred_pdb):
    print(f"Loading structure from: {pred_pdb}")

    # Initialize the 3D viewer
    view = py3Dmol.view(width=800, height=450)

    # Load the PDB file content
    with open(pred_pdb, 'r') as f:
        pdb_data = f.read()

    # Add the model to the viewer
    view.addModel(pdb_data, 'pdb')

    # Apply styling: cartoon representation (for the backbone) colored by chain (spectrum)
    view.setStyle({'cartoon': {'color': 'spectrum'}})

    # Center and display the molecule
    view.zoomTo()
    view.show()
else:
    print(f"❌ Error: PDB file not found at {pred_pdb}. Please ensure the prediction cell ran successfully.")

In [None]:
#@title Plot per-residue predicted RMSD

prmsd_fig_file = os.path.join(pred_dir, f"{name}_prmsd.png")
plot_prmsd(sequences, pred.prmsd.cpu(), prmsd_fig_file, shade_cdr=do_renum, pdb_file=pred_pdb)

In [None]:
#@title Show predicted structure with predicted RMSD

#@markdown Structure is colored from low (blue) to high (red) pRMSD.

show_pdb(pred_pdb, len(sequences), bb_sticks=False, sc_sticks=True, color="b")

In [None]:
#@title Download results

#@markdown Download zip file containing structure prediction and annotation results. If download fails, results are also accessible from file explorer on the left panel of the notebook.

from google.colab import files
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!zip -FSr $name".result.zip" $pred_dir/ &> /dev/null
files.download(f"{name}.result.zip")