<a href="https://colab.research.google.com/github/sokrypton/ColabDesign/blob/alpha/af/examples/peptide_binder_design.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AfDesign - peptide binder design
For a given protein target and protein binder length, generate/hallucinate a protein binder sequence AlphaFold thinks will bind to the target structure. To do this, we maximize number of contacts at the interface and maximize pLDDT of the binder.

**WARNING**
1.   This notebook is in active development and was designed for demonstration purposes only.
2.   Using AfDesign as the only "loss" function for design might be a bad idea, you may find adversarial sequences (aka. sequences that trick AlphaFold).

In [None]:
#@title install
%%bash
if [ ! -d params ]; then
  pip -q install git+https://github.com/sokrypton/ColabDesign.git@alpha
  ln -s /usr/local/lib/python3.7/dist-packages/colabdesign colabdesign
  mkdir params
  curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar | tar x -C params
fi

In [None]:
#@title import libraries
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
from colabdesign import mk_afdesign_model, clear_mem
from colabdesign.shared.utils import copy_dict
from IPython.display import HTML
from google.colab import files
import numpy as np

#########################
def get_pdb(pdb_code=""):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("tmp.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  else:
    if len(pdb_code) == 4:
      os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
      return f"{pdb_code}.pdb"
    else:
      os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v3.pdb")
      return f"AF-{pdb_code}-F1-model_v3.pdb"

In [None]:
#@title **prep inputs**
import re
#@markdown ---
#@markdown **target info**
pdb = "4N5T" #@param {type:"string"}
#@markdown - enter PDB code or UniProt code (to fetch AlphaFoldDB model) or leave blink to upload your own
target_chain = "A" #@param {type:"string"}
target_hotspot = "" #@param {type:"string"}
if target_hotspot == "": target_hotspot = None
#@markdown - restrict loss to predefined positions on target (eg. "1-10,12,15")
target_flexible = False #@param {type:"boolean"}
#@markdown - allow backbone of target structure to be flexible

#@markdown ---
#@markdown **binder info**
binder_len = 14 #@param {type:"integer"}
#@markdown - length of binder to hallucination
binder_seq = "" #@param {type:"string"}
binder_seq = re.sub("[^A-Z]", "", binder_seq.upper())
if len(binder_seq) > 0:
  binder_len = len(binder_seq)
else:
  binder_seq = None
#@markdown - if defined, will initialize design with this sequence

binder_chain = "" #@param {type:"string"}
if binder_chain == "": binder_chain = None
#@markdown - if defined, supervised loss is used (binder_len is ignored)

#@markdown ---
#@markdown **model config**
use_multimer = False #@param {type:"boolean"}
#@markdown - use alphafold-multimer for design
num_recycles = 0 #@param ["0", "1", "3", "6"] {type:"raw"}
num_models = "all" #@param ["1", "2", "3", "4", "5", "all"] {type:"raw"}
if num_models == "all": num_models = 5
#@markdown - number of trained models to use during optimization


x = {"pdb_filename":pdb,
     "chain":target_chain,
     "binder_len":binder_len,
     "binder_chain":binder_chain,
     "hotspot":target_hotspot,
     "use_multimer":use_multimer,
     "rm_target_seq":target_flexible}
     
x["pdb_filename"] = get_pdb(x["pdb_filename"])     

if "x_prev" not in dir() or x != x_prev:
  clear_mem()
  model = mk_afdesign_model(protocol="binder",
                            use_multimer=x["use_multimer"],
                            num_recycles=num_recycles)
  model.prep_inputs(**x,
                    ignore_missing=False)
  x_prev = copy_dict(x)
  print("target length:", model._target_len)
  print("binder length:", model._binder_len)
  binder_len = model._binder_len

In [None]:
#@title **run AfDesign**

optimizer = "pssm_semigreedy" #@param ["3stage", "semigreedy", "pssm_semigreedy"]
#@markdown - `3stage` - gradient based optimization (GD) (logits → softmax → argmax)
#@markdown - `semigreedy` - tries X random mutations, accepts those that decrease loss
#@markdown - `pssm_semigreedy` - uses GD to get a sequence profile (PSSM), then uses the PSSM to bias semigreedy opt. (Recommended)

model.restart(seq=binder_seq)

models = model._model_names[:num_models]
if optimizer == "3stage":
  model.design_3stage(120, 60, 10, num_recycles=num_recycles, models=models)

if optimizer == "semigreedy":
  model.design_pssm_semigreedy(0, 32, num_recycles=num_recycles, models=models)

if optimizer == "pssm_semigreedy":
  model.design_pssm_semigreedy(120, 32, num_recycles=num_recycles, models=models)


In [None]:
#@markdown ## display hallucinated protein {run: "auto"}
color = "pLDDT" #@param ["chain", "pLDDT", "rainbow"]
show_sidechains = False #@param {type:"boolean"}
show_mainchains = False #@param {type:"boolean"}
color_HP = False #@param {type:"boolean"}
animate = True #@param {type:"boolean"}
model.save_pdb(f"{model.protocol}.pdb")
model.plot_pdb(show_sidechains=show_sidechains,
               show_mainchains=show_mainchains,
               color=color, color_HP=color_HP, animate=animate)

In [None]:
HTML(model.animate(dpi=100))

In [None]:
model.get_seqs()

In [None]:
# log
model._best["aux"]["log"]