# **IgFold**: Fast, accurate antibody structure prediction

Official notebook for [IgFold](https://www.biorxiv.org/content/10.1101/2022.04.20.488972): Fast, accurate antibody structure prediction from deep learning on massive set of natural antibodies.  The code, data, and weights for this work are made available for non-commercial use. For commercial inquiries, please contact `jruffolo[at]jhu.edu`.

In [None]:
# @title Definitions and Sequences Setup
import sys
import os

# --- Essential Variables (Do Not Change) ---
# Determine the Python version for mamba installs
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
pred_dir = "predictions"

# --- Define Parameters ---
name = "chimeric_Fv"  # @param {type:"string"} Name for the output PDB file
do_refine = False # @param {type:"boolean"} Whether to perform structural refinement using OpenMM
do_renum = False # @param {type:"boolean"} Whether to renumber the structure (e.g., Chothia numbering) <-- SET TO FALSE
single_model = False # @param {type:"boolean"} If True, generates only 1 model; if False, generates 4 models

# --- Antibody Sequences
sequences = {
    "H": "QVQLVQSGAEVKKPGASVKVSCKASGYTFTRYTMHWVRQAPGQGLEWMGYINPSRGYTNYNQKFKDRVTMTRDTSTSTVYMELSSLRSEDTAVYYCARYYDDHYCLDYGQGTLVTVSS",
    "L": "EIVLTQSPATLSLSPGERATLSCSASSSVSYMNWYQQKPGQAPRLLIYDTSKLASGIPARFSGSGSGTDFTLTISSLEPEDFAVYYCQQWSSNPFTFGQGTKLEIK"
}

# Create output directory
if not os.path.exists(pred_dir):
    os.makedirs(pred_dir)

print(f"✅ Definitions set. Output directory: {pred_dir}")

✅ Definitions set. Output directory: predictions


In [None]:
# @title Install dependencies/ check environment
# Note: Using '!' for pip commands for better Colab compatibility.

# --- Install Mamba/Conda ---
if not os.path.isfile("CONDA_READY"):
    print("installing conda...")
    # Using Colab's native !wget is safer than os.system("wget...")
    !wget -qnc https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh
    os.system("bash Mambaforge-colab-Linux-x86_64.sh -bfp /usr/local")
    os.system("mamba config --set auto_update_conda false")
    os.system("touch CONDA_READY")

# --- Install IgFold and PyTorch ---
if not os.path.isfile("CODE_READY"):
    print("installing igfold...")

    # 1. CRITICAL STEP: FORCE INSTALL PyTorch 1.11.0
    # 'index-url' ensures older, compatible PyTorch versions.
    !pip install torch==1.11.0 torchvision==0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu --force-reinstall

    # 2. Install IgFold (it will use the compatible PyTorch we just installed)
    !pip install 'igfold>=0.3.0'

    # 3. Install visualization tools
    !pip install -q --no-warn-conflicts 'py3Dmol>=2.0.1' matplotlib seaborn

    os.system("touch CODE_READY")

# --- Install Amber/OpenMM for Refinement ---
if do_refine and not os.path.isfile("AMBER_READY"):
    print("installing amber/openmm and pdbfixer...")

    # 1. Use mamba for the core OpenMM suite (best for complex dependencies)
    # Keep mamba command just in case it handles hidden OpenMM requirements better.
    !mamba install -y -q -c conda-forge openmm=7.7.0 python={PYTHON_VERSION} pdbfixer 2>&1 1>/dev/null

    # 2. CRITICAL: Use !pip as a GUARANTEE to install pdbfixer and openmm
    # Note: registers them in the main Python environment's site-packages path.
    print("Forcing installation via pip...")
    !pip install pdbfixer openmm

    os.system("touch AMBER_READY")

# --- Install Abnumber for Renumbering ---
if do_renum and not os.path.isfile("ABNUMBER_READY"):
    print("installing abnumber...")

    # 1. Use mamba for the core AbNumber suite (keep as preferred method)
    !mamba install -y -q -c bioconda abnumber python={PYTHON_VERSION} 2>&1 1>/dev/null

    # 2. CRITICAL: Use !pip as a GUARANTEE to install abnumber
    print("Forcing installation of abnumber via pip...")
    !pip install abnumber pandas --force-reinstall # Keep pandas re-install just in case

    os.system("touch ABNUMBER_READY")

# --- Post-Install Check ---
import pkg_resources
try:
    pkg_resources.get_distribution('igfold')
    print("✅ IgFold installation check: SUCCESS.")
except pkg_resources.DistributionNotFound:
    print("❌ IgFold installation check: FAILED.")

print("\n--- ATTENTION ---")
print("You MUST restart the runtime now for the Mamba/Conda and IgFold installations to fully take effect.")

✅ IgFold installation check: SUCCESS.

--- ATTENTION ---
You MUST restart the runtime now for the Mamba/Conda and IgFold installations to fully take effect.


In [None]:
# @title Predict antibody structure with IgFold

# Ensure new site-packages path is used after Mamba/Conda setup
if f"/usr/local/lib/python{python_version}/site-packages/" not in sys.path:
    sys.path.insert(0, f"/usr/local/lib/python{python_version}/site-packages/")

# --- CRITICAL FIX: Find and insert the correct path (from previous fix) ---
import sys
import os
from subprocess import getoutput

# command finds the directory where 'igfold' was installed
igfold_path = getoutput("find /usr/local/lib/ -name 'igfold' | grep 'site-packages/igfold' | head -n 1 | sed 's/\/igfold//'")

if igfold_path and igfold_path not in sys.path:
    sys.path.insert(0, igfold_path)
    print(f"✅ Found and inserted IgFold path: {igfold_path}")
else:
    guessed_path = f"/usr/local/lib/python{python_version}/site-packages/"
    if guessed_path not in sys.path:
         sys.path.insert(0, guessed_path)
         print(f"⚠️ Inserting guessed path: {guessed_path}")

# Imports
try:
    from igfold.utils.visualize import *
    from igfold import IgFoldRunner
    # Import the necessary tool to allow model loading
    from torch.serialization import safe_globals

    # 1. IMPORT ALL BLOCKED CLASSES IDENTIFIED SO FAR
    from transformers.models.bert.configuration_bert import BertConfig
    from transformers.models.bert.tokenization_bert import BertTokenizer
    from transformers.tokenization_utils import Trie
    from transformers.models.bert.tokenization_bert import BasicTokenizer
    from transformers.models.bert.tokenization_bert import WordpieceTokenizer # <--- NEWLY ADDED CLASS

except ModuleNotFoundError:
    print("❌ ERROR: Could not import IgFold. Ensure all installation steps and restarts were completed.")
    sys.exit()

# Run the IgFold pipeline
num_models = 1 if single_model else 4

print(f"Starting IgFold prediction with {num_models} model(s)...")

# --- CRITICAL FIX: Use the context manager to bypass the UnpicklingError ---
# Now including all five blocked classes:
with safe_globals({BertConfig, BertTokenizer, Trie, BasicTokenizer, WordpieceTokenizer}):
    igfold = IgFoldRunner(num_models=num_models)

pred_pdb = os.path.join(pred_dir, f"{name}.pdb")
pred = igfold.fold(
    pred_pdb,
    sequences=sequences,
    do_refine=do_refine,
    use_openmm=True, # Use OpenMM if do_refine is True
    do_renum=do_renum,
)

print(f"✅ Prediction complete! PDB saved to: {pred_pdb}")
print(f"Predicted RMSD (pRMSD) confidence: {pred.prmsd.item():.4f}")

# Visualize the result
show_pdb(pred_pdb, len(sequences), bb_sticks=False, sc_sticks=True, color="rainbow")

  igfold_path = getoutput("find /usr/local/lib/ -name 'igfold' | grep 'site-packages/igfold' | head -n 1 | sed 's/\/igfold//'")


Starting IgFold prediction with 4 model(s)...

    The code, data, and weights for this work are made available for non-commercial use 
    (including at commercial entities) under the terms of the JHU Academic Software License 
    Agreement. For commercial inquiries, please contact awichma2[at]jhu.edu.
    License: https://github.com/Graylab/IgFold/blob/main/LICENSE.md
    
Loading 4 IgFold models...
Using device: cpu
Loading /usr/local/lib/python3.12/dist-packages/igfold/trained_models/IgFold/igfold_1.ckpt...
Loading /usr/local/lib/python3.12/dist-packages/igfold/trained_models/IgFold/igfold_2.ckpt...
Loading /usr/local/lib/python3.12/dist-packages/igfold/trained_models/IgFold/igfold_3.ckpt...
Loading /usr/local/lib/python3.12/dist-packages/igfold/trained_models/IgFold/igfold_5.ckpt...
Successfully loaded 4 IgFold models.
Loaded AntiBERTy model.
Completed folding in 48.40 seconds.
✅ Prediction complete! PDB saved to: predictions/chimeric_Fv.pdb


TypeError: unsupported format string passed to Tensor.__format__

In [None]:
#@title Plot per-residue predicted RMSD

prmsd_fig_file = os.path.join(pred_dir, f"{name}_prmsd.png")
plot_prmsd(sequences, pred.prmsd.cpu(), prmsd_fig_file, shade_cdr=do_renum, pdb_file=pred_pdb)

In [None]:
#@title Show predicted structure with predicted RMSD

#@markdown Structure is colored from low (blue) to high (red) pRMSD.

show_pdb(pred_pdb, len(sequences), bb_sticks=False, sc_sticks=True, color="b")

In [None]:
#@title Download results

#@markdown Download zip file containing structure prediction and annotation results. If download fails, results are also accessible from file explorer on the left panel of the notebook.

from google.colab import files
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!zip -FSr $name".result.zip" $pred_dir/ &> /dev/null
files.download(f"{name}.result.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>