<a href="https://colab.research.google.com/github/sokrypton/ColabDesign/blob/main/af/examples/2stage_binder_hallucination.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AfDesign - two-stage binder hallucination
For a given protein target and protein binder length, generate/hallucinate a protein binder sequence AlphaFold thinks will bind to the target structure. To do this, we minimize PAE and maximize number of contacts at the interface and within the binder, and we maximize pLDDT of the binder.

**WARNING**
1.   This notebook is in active development and was designed for demonstration purposes only.
2.   Using AfDesign as the only "loss" function for design might be a bad idea, you may find adversarial sequences (aka. sequences that trick AlphaFold).

In [None]:
#@title install
%%bash
if [ ! -d params ]; then
  pip -q install git+https://github.com/sokrypton/ColabDesign.git
  mkdir params
  curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar | tar x -C params
  for W in openfold_model_ptm_1 openfold_model_ptm_2 openfold_model_no_templ_ptm_1
  do wget -qnc https://files.ipd.uw.edu/krypton/openfold/${W}.npz -P params; done
fi

In [None]:
#@title #import libraries
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
from colabdesign import mk_afdesign_model, clear_mem
from IPython.display import HTML
from google.colab import files
import numpy as np

#########################
def get_pdb(pdb_code=""):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("tmp.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  else:
    os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
    return f"{pdb_code}.pdb"

In [None]:
#@title # Prep Inputs
pdb = "4N5T" #@param {type:"string"}
chain = "A" #@param {type:"string"}
binder_len =  50#@param {type:"integer"}
hotspot = "" #@param {type:"string"}
if hotspot == "": hotspot = None

x = {"pdb_filename":pdb, "chain":chain, "binder_len":binder_len, "hotspot":hotspot}
if "x_prev" not in dir() or x != x_prev:
  x["pdb_filename"] = get_pdb(x["pdb_filename"])
  
  clear_mem()
  model = mk_afdesign_model(protocol="binder")
  model.prep_inputs(**x)

  pre_model = mk_afdesign_model(protocol="hallucination")
  pre_model.prep_inputs(length=binder_len)

  x_prev = x
  print("target length:", model._target_len)
  print("binder length:", model._binder_len)

In [None]:
#@title #stage 1 - Pre-hallucinate binder scaffold
#@markdown ---
#@markdown ####Weights
#@markdown - Minimizing `pae` or maximizing `plddt` often results in a single helix.
#@markdown To avoid this, we start with a random sequence and instead try to optimize 
#@markdown defined `num`ber of `con`tacts per position. 
pae = 0.1 #@param ["0.01", "0.1", "0.5", "1.0"] {type:"raw"}
plddt = 0.1 #@param ["0.01", "0.1", "0.5", "1.0"] {type:"raw"}
helix = 0.0 
con = 1.0 #@param ["0.01", "0.1", "0.5", "1.0"] {type:"raw"}
#@markdown ####Contact Definition
#@markdown - The contact definition is based on Cb-Cb diststance `cutoff`. To avoid 
#@markdown biasing towards helical contact, only contacts with sequence seperation > 
#@markdown `seqsep` are considered.

seqsep = 9 #@param ["0","5","9"] {type:"raw"}
cutoff = "14" #@param ["8", "14", "max"]
num = "2" #@param ["1", "2", "3", "4", "8", "max"]
binary = True #@param {type:"boolean"}
if cutoff == "max": cutoff = 21.6875
if num == "max": num = binder_len

pre_opt = {"con":{"seqsep":int(seqsep),"cutoff":float(cutoff),"num":int(num),
           "binary":binary}}
pre_weights = {"con":float(con),"helix":float(helix),
               "pae":float(pae),"plddt":float(plddt)}

# pre-design with gumbel initialization and softmax activation
pre_model.restart(mode="gumbel", opt=pre_opt, weights=pre_weights)
pre_model.design_soft(50)
save_seq = np.asarray(pre_model.aux["seq"]["pseudo"])

# refine
pre_model.restart(seq=save_seq, opt=pre_opt, weights=pre_weights, keep_history=True)
pre_model.design(50, soft=0.0, e_soft=1.0)
save_seq = np.asarray(pre_model.aux["seq"]["pseudo"])

In [None]:
#@markdown ## display pre-hallucinated binder scaffold {run: "auto"}
color = "pLDDT" #@param ["chain", "pLDDT", "rainbow"]
show_sidechains = False #@param {type:"boolean"}
show_mainchains = False #@param {type:"boolean"}
pre_model.plot_pdb(show_sidechains=show_sidechains,
                   show_mainchains=show_mainchains,
                   color=color)

In [None]:
HTML(pre_model.animate())

In [None]:
#@title #state 2 - binder design
#@markdown ---
#@markdown ####interface Weights
i_pae = 1.0 #@param ["0.01", "0.1", "0.5", "1.0"] {type:"raw"}
i_con = 0.5 #@param ["0.01", "0.1", "0.5", "1.0"] {type:"raw"}
weights = {"i_pae":float(i_pae),
           "i_con":float(i_con),
           **pre_weights}

#@markdown ####interface Contact Definition
cutoff = "max" #@param ["8", "14", "max"]
num = "max" #@param ["1", "2", "4", "8", "max"]
binary = True #@param {type:"boolean"}
if cutoff == "max": cutoff = 21.6875
if num == "max": num = binder_len

opt = {"i_con":{"cutoff":float(cutoff),"num":int(num),
                "binary":binary},
       **pre_opt}

model.restart(seq=save_seq, opt=opt, weights=weights)
model.design_3stage(100,100,10)

In [None]:
#@markdown ## display hallucinated binder {run: "auto"}
color = "chain" #@param ["chain", "pLDDT", "rainbow"]
show_sidechains = False #@param {type:"boolean"}
show_mainchains = False #@param {type:"boolean"}
model.save_pdb(f"{model.protocol}.pdb")
model.plot_pdb(show_sidechains=show_sidechains,
                   show_mainchains=show_mainchains,
                   color=color)

In [None]:
HTML(model.animate())

In [None]:
model.get_seqs()