<a href="https://colab.research.google.com/github/sokrypton/ColabDesign/blob/beta/af/design.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#AfDesign (beta version)
Backprop through AlphaFold for protein design.

**WARNING**
1.   This notebook is in active development and was designed for demonstration purposes only.
2.   Using AfDesign as the only "loss" function for design might be a bad idea, you may find adversarial sequences (aka. sequences that trick AlphaFold).

In [None]:
#@title install
%%bash
if [ ! -d af_backprop ]; then
  git clone https://github.com/sokrypton/af_backprop.git
  pip -q install biopython dm-haiku==0.0.5 ml-collections py3Dmol
fi
if [ ! -d params ]; then
  mkdir params
  curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar | tar x -C params
fi
wget -qnc https://raw.githubusercontent.com/sokrypton/ColabFold/main/beta/colabfold.py
wget -qnc https://raw.githubusercontent.com/sokrypton/ColabDesign/beta/af/design.py
wget -qnc https://raw.githubusercontent.com/sokrypton/ColabDesign/beta/af/bkg_prob.txt

In [None]:
#@title import libraries
import sys
sys.path.append('/content/af_backprop')

import os
from google.colab import files
import numpy as np
from IPython.display import HTML
from design import mk_design_model, clear_mem

#########################
def get_pdb(pdb_code=""):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("tmp.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  else:
    os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
    return f"{pdb_code}.pdb"

# hallucination
For a given length, generate/hallucinate a protein sequence that AlphaFold thinks folds into a well structured protein (high plddt, low pae, many contacts).

In [None]:
clear_mem()
model = mk_design_model(protocol="hallucination")
model.prep_inputs(length=100)

print("length",model._len)
print("weights",model.opt["weights"])

In [None]:
# pre-design with gumbel initialization and softmax activation
model.restart(seq_init="gumbel")
model.design(50, soft=True)

# three stage design  
model._state = model._init_fun({"seq":model._outs["seq_pseudo"]})
model.design_3stage(50,50,10)

In [None]:
HTML(model.animate())

In [None]:
model.get_seqs()

In [None]:
model.plot_pdb()

In [None]:
model.save_pdb(f"{model.protocol}.pdb")

# fixed backbone design (fixbb)
For a given protein backbone, generate/design a new sequence that AlphaFold thinks folds into that conformation. 

In [None]:
clear_mem()
model = mk_design_model(protocol="fixbb")
model.prep_inputs(pdb_filename=get_pdb("1TEN"), chain="A")

print("length",  model._len)
print("weights", model.opt["weights"])

In [None]:
model.restart()
model.design_3stage()

In [None]:
model.plot_traj()  

In [None]:
HTML(model.animate())

In [None]:
model._best_outs

In [None]:
model.get_seqs()

In [None]:
model.plot_pdb()

In [None]:
model.save_pdb(f"{model.protocol}.pdb")

# binder hallucination
For a given protein target and protein binder length, generate/hallucinate a protein binder sequence AlphaFold thinks will bind to the target structure. To do this, we minimize PAE and maximize number of contacts at the interface and within the binder, and we maximize pLDDT of the binder.

In [None]:
clear_mem()
model = mk_design_model(protocol="binder")
model.prep_inputs(pdb_filename=get_pdb("4MZK"), chain="A", binder_len=19)

print("target_length",model._target_len)
print("binder_length",model._binder_len)
print("weights",model.opt["weights"])

In [None]:
model.restart(seq_init="soft_gumbel")
model.design_3stage(100,100,10)

In [None]:
model.plot_pdb()

In [None]:
HTML(model.animate())

In [None]:
model.get_seqs()

In [None]:
model.save_pdb(f"{model.protocol}.pdb")