# MutateBinder
Refine an existing binder by sampling ProteinMPNN variants and repredicting with AlphaFold.


## Prerequisites
- Activate the BindCraft conda environment before running.
- Update the paths below to point at your target and binder files.


In [None]:
from pathlib import Path
import json
import os
import subprocess
import shlex
from Bio.PDB import PDBIO


In [None]:
BINDCRAFT_ROOT = Path(r'/mnt/e/Code/BindCraft').resolve()
BINDCRAFT_SCRIPT = BINDCRAFT_ROOT / 'bindcraft' / 'bindcraft.py'

TARGETS_DIR = (BINDCRAFT_ROOT / 'InputTargets').resolve()
TARGET_PDB = TARGETS_DIR / '5E56.pdb'  # receptor/target structure
BINDER_PDB = TARGETS_DIR / '8GAC.pdb'  # binder to mutate

OUTPUT_DIR = (BINDCRAFT_ROOT / 'Results' / 'MutateBinder_5E56_8GAC').resolve()
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
MPNN_OUT = OUTPUT_DIR / 'MPNN'
MPNN_OUT.mkdir(exist_ok=True)

MAX_MPNN_VARIANTS = 5
print('Target PDB :', TARGET_PDB)
print('Binder PDB :', BINDER_PDB)
print('Output dir :', OUTPUT_DIR)


Target PDB : /mnt/e/Code/BindCraft/InputTargets/5E56.pdb
Binder PDB : /mnt/e/Code/BindCraft/InputTargets/8GAC.pdb
Output dir : /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC


## Generate ProteinMPNN variants


In [None]:
from colabdesign.mpnn import mk_mpnn_model
from Bio.PDB import PDBParser, PDBIO

AA3 = {
    'A': 'ALA','C': 'CYS','D': 'ASP','E': 'GLU','F': 'PHE','G': 'GLY','H': 'HIS','I': 'ILE','K': 'LYS','L': 'LEU',
    'M': 'MET','N': 'ASN','P': 'PRO','Q': 'GLN','R': 'ARG','S': 'SER','T': 'THR','V': 'VAL','W': 'TRP','Y': 'TYR'
}

if not TARGET_PDB.exists() or not BINDER_PDB.exists():
    raise FileNotFoundError('Check TARGET_PDB and BINDER_PDB paths.')

parser = PDBParser(QUIET=True)
binder_input_path = BINDER_PDB

if TARGET_PDB.resolve() == BINDER_PDB.resolve():
    full_structure = parser.get_structure('FULL', str(BINDER_PDB))
    protein_chains = [chain for chain in full_structure.get_chains() if len([res for res in chain if res.id[0] == ' ']) > 0]
    if len(protein_chains) < 2:
        raise ValueError('Binder PDB contains only one protein chain. Please supply a binder-only file.')
    binder_chain_id = protein_chains[-1].id or 'A'
    for chain in protein_chains[:-1]:
        for model in full_structure:
            if chain.id in model.child_dict:
                model.detach_child(chain.id)
    binder_only_path = BINDER_PDB.parent / (BINDER_PDB.stem + '_binder_only.pdb')
    io = PDBIO()
    io.set_structure(full_structure)
    io.save(str(binder_only_path))
    print(f'Extracted binder chain {binder_chain_id} to {binder_only_path}')
    binder_input_path = binder_only_path

binder_structure = parser.get_structure('BINDER', str(binder_input_path))
binder_chains = [chain for chain in binder_structure.get_chains() if len([res for res in chain if res.id[0] == ' ']) > 0]
if not binder_chains:
    raise ValueError('No protein chains detected in binder PDB.')
chain_ids = [chain.id or 'A' for chain in binder_chains]
chain_spec = ','.join(chain_ids)

mpnn_model = mk_mpnn_model(backbone_noise=0.0, weights='soluble')
mpnn_model.prep_inputs(pdb_filename=str(binder_input_path), chain=chain_spec, fix_pos=None)
designs = mpnn_model.sample(num=MAX_MPNN_VARIANTS, temperature=0.2)
seq_entries = designs['seq'] if isinstance(designs, dict) and 'seq' in designs else designs
try:
    seq_entries = seq_entries.tolist()
except AttributeError:
    pass
variant_fastas = []
for idx, entry in enumerate(seq_entries, 1):
    if isinstance(entry, dict):
        seq = ''.join(entry.get(chain_id, '') for chain_id in chain_ids)
    elif isinstance(entry, (list, tuple)):
        seq = ''.join(entry)
    else:
        seq = str(entry)
    fasta_path = MPNN_OUT / f'binder_variant_{idx}.fasta'
    with fasta_path.open('w') as fh:
        fh.write(f'>binder_variant_{idx}\n{seq}\n')
    variant_fastas.append(fasta_path)
    print('Wrote', fasta_path)

    structure_copy = parser.get_structure(f'BINDER_{idx}', str(binder_input_path))
    residue_blocks = []
    for chain in structure_copy.get_chains():
        if chain.id in chain_ids:
            residue_blocks.append((chain, [res for res in chain if res.id[0] == ' ']))
    total_residues = sum(len(block[1]) for block in residue_blocks)
    if total_residues != len(seq):
        print(f'Skipping binder_variant_{idx}: sequence length {len(seq)} does not match backbone residues {total_residues}.')
        continue
    seq_pos = 0
    for chain, residues in residue_blocks:
        for res in residues:
            aa = seq[seq_pos]
            if aa not in AA3:
                raise ValueError(f'Unsupported amino acid {aa} encountered.')
            res.resname = AA3[aa]
            seq_pos += 1
    pdb_path = OUTPUT_DIR / f'binder_variant_{idx}.pdb'
    io = PDBIO()
    io.set_structure(structure_copy)
    io.save(str(pdb_path))
    print('Wrote', pdb_path)


Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/MPNN/binder_variant_1.fasta
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/binder_variant_1.pdb
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/MPNN/binder_variant_2.fasta
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/binder_variant_2.pdb
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/MPNN/binder_variant_3.fasta
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/binder_variant_3.pdb
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/MPNN/binder_variant_4.fasta
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/binder_variant_4.pdb
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/MPNN/binder_variant_5.fasta
Wrote /mnt/e/Code/BindCraft/Results/MutateBinder_5E56_8GAC/binder_variant_5.pdb
