# BindStuff: Protein binder design

<img src="https://github.com/martinpacesa/BindStuff/blob/main/pipeline.png?raw=true">

Simple binder design pipeline using AlphaFold2 backpropagation, MPNN, and PyRosetta. Select your target and let the script do the rest of the work and finish once you have enough designs to order!

The designs will be saved on your Google Drive under BindStuff/[design_name]/ and you can continue running the design pipeline if the session times out and it will continue adding new designs.

In [None]:
#@title Installation
import os
import time

if not os.path.isfile("bindstuff/params/done.txt"):
  print("Installing required BindStuff components")

  print("Pulling BindStuff code from Github")
  os.makedirs('/content/bindstuff/', exist_ok=True)
  os.system("git clone https://github.com/martinpacesa/BindStuff /content/bindstuff/")
  os.system("chmod +x /content/bindstuff/functions/dssp")
  os.system("chmod +x /content/bindstuff/functions/DAlphaBall.gcc")

  print("Installing ColabDesign")
  os.system("(mkdir bindstuff/params; apt-get install aria2 -qq; \
  aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar; \
  tar -xf alphafold_params_2022-12-06.tar -C bindstuff/params; touch bindstuff/params/done.txt )&")
  os.system("pip install git+https://github.com/sokrypton/ColabDesign.git")
  # for debugging purposes
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")

  print("Installing ProDy, and protein-design-tools"),  os.system("pip install prody freesasa protein-design-tools"),
  # download params
  if not os.path.isfile("bindstuff/params/done.txt"):
    print("downloading AlphaFold params")
    while not os.path.isfile("bindstuff/params/done.txt"):
      time.sleep(5)

  print("BindStuff installation is finished, ready to run!")
else:
  print("BindStuff components already installed, ready to run!")

In [None]:
#@title Mount your Google Drive to save design results
from google.colab import drive
from datetime import datetime
drive.mount('/content/drive')
currenttime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Google drive mounted at: {currenttime}")

bindstuff_google_drive = '/content/drive/My Drive/BindStuff/'
os.makedirs(bindstuff_google_drive, exist_ok=True)
print("BindStuff folder successfully created in your drive!")

In [None]:
#@title Binder design settings
import os
import json
from datetime import datetime
from functions.generic_utils import create_target_settings_from_form
# @markdown ---
# @markdown Enter path where to save your designs. We recommend to save on Google drive so that you can continue generating at any time.
design_path = "/content/drive/MyDrive/BindStuff/PDL1/" # @param {"type":"string","placeholder":"/content/drive/MyDrive/BindStuff/PDL1/"}

# @markdown Enter the name that should be prefixed to your binders (generally target name).
binder_name = "PDL1" # @param {"type":"string","placeholder":"PDL1"}

# @markdown The path to the .pdb structure of your target. Can be an experimental or AlphaFold2 structure. We recommend trimming the structure to as small as needed, as the whole selected chains will be backpropagated through the network and can significantly increase running times.
starting_pdb = "/content/bindstuff/example/PDL1.pdb" # @param {"type":"string","placeholder":"/content/bindstuff/example/PDL1.pdb"}

# @markdown Which chains of your PDB to target? Can be one or multiple, in a comma-separated format. Other chains will be ignored during design.
chains = "A" # @param {"type":"string","placeholder":"A,C"}

# @markdown What positions to target in your protein of interest? For example `1,2-10` or chain specific `A1-10,B1-20` or entire chains `A`. If left blank, an appropriate site will be selected by the pipeline.
target_hotspot_residues = "" # @param {"type":"string","placeholder":""}

# @markdown What is the minimum and maximum size of binders you want to design? Pipeline will randomly sample different sizes between these values.
lengths = "70,150" # @param {"type":"string","placeholder":"70,150"}

# @markdown How many binder designs passing filters do you require?
number_of_final_designs = 100 # @param {"type":"integer","placeholder":"100"}
# @markdown ---
# @markdown Enter path on your Google drive (/content/drive/MyDrive/BindStuff/[binder_name].json) to previous target settings to continue design campaign. If left empty, it will use the settings above and generate a new settings json in your design output folder.
load_previous_target_settings = "" # @param {"type":"string","placeholder":""}
# @markdown ---

target_settings_path = create_target_settings_from_form(design_path, binder_name, starting_pdb, chains, target_hotspot_residues, lengths, number_of_final_designs, load_previous_target_settings)

currenttime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Binder design settings updated at: {currenttime}")
print(f"New .json file with target settings has been generated in: {target_settings_path}")

In [None]:
#@title Advanced settings
from datetime import datetime
from functions.generic_utils import get_advanced_settings_path_from_form
# @markdown ---
# @markdown Which binder design protocol to run? Default is recommended. "Beta-sheet" promotes the design of more beta sheeted proteins, but requires more sampling. "Peptide" is optimised for helical peptide binders.
design_protocol = "Default" # @param ["Default","Beta-sheet","Peptide"]
# @markdown What prediction protocol to use?. "Default" performs single sequence prediction of the binder. "HardTarget" uses initial guess to improve complex prediction for difficult targets, but might introduce some bias.
prediction_protocol = "Default" # @param ["Default","HardTarget"]
# @markdown What interface design method to use?. "AlphaFold2" is the default, interface is generated by AlphaFold2. "MPNN" uses soluble MPNN to optimise the interface.
interface_protocol = "AlphaFold2" # @param ["AlphaFold2","MPNN"]
# @markdown What target template protocol to use? "Default" allows for limited amount flexibility. "Masked" allows for greater target flexibility on both sidechain and backbone level.
template_protocol = "Default" # @param ["Default","Masked"]
# @markdown ---

advanced_settings_path = get_advanced_settings_path_from_form(design_protocol, interface_protocol, template_protocol, prediction_protocol)

currenttime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Advanced design settings updated at: {currenttime}")

In [None]:
#@title Filters
from datetime import datetime
from functions.generic_utils import get_filter_settings_path_from_form
# @markdown ---
# @markdown Which filters for designs to use? "Default" are recommended, "Peptide" are for the design of peptide binders, "Relaxed" are more permissive but may result in fewer experimental successes, "Peptide_Relaxed" are more permissive filters for non-helical peptides, "None" is for benchmarking.
filter_option = "Default" # @param ["Default", "Peptide", "Relaxed", "Peptide_Relaxed", "None"]
# @markdown ---

filter_settings_path = get_filter_settings_path_from_form(filter_option)

currenttime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Filter settings updated at: {currenttime}")

# Everything is set, BindStuff is ready to run!

In [None]:
# @title Run BindStuff
import os
from functions.generic_utils import (
    check_jax_gpu,
    load_json_settings,
    load_af2_models,
    perform_advanced_settings_check,
    generate_directories,
    generate_dataframe_labels,
    create_dataframe,
    generate_filter_pass_csv,
)
from functions.pipeline import run_pipeline

check_jax_gpu()

target_settings, advanced_settings, filters = load_json_settings(target_settings_path, filter_settings_path, advanced_settings_path)

settings_file = os.path.basename(target_settings_path).split('.')[0]
filters_file = os.path.basename(filter_settings_path).split('.')[0]
advanced_file = os.path.basename(advanced_settings_path).split('.')[0]

design_models, prediction_models, multimer_validation = load_af2_models(advanced_settings["use_multimer_design"])

bindstuff_folder = "colab"
advanced_settings = perform_advanced_settings_check(advanced_settings, bindstuff_folder)

design_paths = generate_directories(target_settings["design_path"])

trajectory_labels, design_labels, final_labels = generate_dataframe_labels()

trajectory_csv = os.path.join(target_settings["design_path"], 'trajectory_stats.csv')
mpnn_csv = os.path.join(target_settings["design_path"], 'mpnn_design_stats.csv')
final_csv = os.path.join(target_settings["design_path"], 'final_design_stats.csv')
failure_csv = os.path.join(target_settings["design_path"], 'failure_csv.csv')

create_dataframe(trajectory_csv, trajectory_labels)
create_dataframe(mpnn_csv, design_labels)
create_dataframe(final_csv, final_labels)
generate_filter_pass_csv(failure_csv, filter_settings_path)

run_pipeline(
    target_settings=target_settings,
    advanced_settings=advanced_settings,
    filters=filters,
    design_models=design_models,
    prediction_models=prediction_models,
    multimer_validation=multimer_validation,
    design_paths=design_paths,
    trajectory_csv=trajectory_csv,
    mpnn_csv=mpnn_csv,
    final_csv=final_csv,
    failure_csv=failure_csv,
    settings_file=settings_file,
    filters_file=filters_file,
    advanced_file=advanced_file,
    trajectory_labels=trajectory_labels,
    design_labels=design_labels,
    final_labels=final_labels
)
