<a href="https://colab.research.google.com/github/patrickbryant1/binder_design/blob/main/EvoBind.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Connect to Google drive
#@markdown You have to allow to **connect to Google drive** in order to run EvoBind.
#Mount the drive to be able to save files
from google.colab import drive
import os, sys
drive.mount('/content/gdrive') #All the output will be written here

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
#@title Install dependencies

#@markdown Make sure your runtime is GPU. 
#@markdown In the menu above do: Runtime --> Change runtime type --> Hardware accelerator (set to GPU)

#@markdown **Press play.**

#@markdown You will have to restart the runtime after this finishes to include the new packages.
#@markdown In the menu above do: Runtime --> Restart runtime 

#@markdown **After restarting** - reconnect to Google drive.
#!pip install "jax[cuda]=='0.3.22" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
#!pip install  dm-haiku==0.0.7
#!pip install  tensorflow-cpu==2.5.0

!pip install  biopython==1.79
!pip install  ml-collections==0.1.0
!pip install  chex==0.0.7
!pip install  dm-tree==0.1.6
!pip install  immutabledict==2.0.0
!pip install  numpy==1.19.5
!pip install  pandas==1.3.4
!pip install  scipy==1.7.0
!pip install  py3Dmol

In [None]:
#@title Clone the EvoBind github repository
import shutil
try:
  shutil.rmtree('/content/binder_design', ignore_errors=True)
except:
  print('')

!git clone https://github.com/patrickbryant1/binder_design.git

In [3]:
#@title #Follow all steps outlined below to design a binder.
#@markdown To try the **test case** [3SQG](https://www.rcsb.org/3d-view/3SQG), press the play button to the left.
\
#@markdown If you don't want to run the test case, **change the input parameters**.

#@markdown #Parameters
#@markdown - *PDBID* - PDB id of the receptor structure 
#@markdown - *RECEPTOR_CHAIN* - what chain in the PDB file to use as receptor
#@markdown - **Optional**: *UPLOAD_PDB* - if you prefer to upload a file instead, you can simply do this. See "Upload the MSA" below and ensure the PDBID matches the name of your uploaded file.
import sys, os
from google.colab import files
import pandas as pd
import numpy as np
import urllib.request
import py3Dmol
import matplotlib.pyplot as plt
import glob
sys.path.insert(0,'/content/binder_design/src')
PDBID = "3SQG" #@param {type:"string"}
RECEPTOR_CHAIN = "C" #@param {type:"string"}
UPLOAD_PDB = False #@param {type:"boolean"}


OUTDIR="/content/gdrive/MyDrive/"+PDBID+'/'
#Make outdir
if not os.path.exists(OUTDIR):
  os.mkdir(OUTDIR)
#Get structure
RECEPTOR_STRUCTURE = "https://files.rcsb.org/download/"+PDBID

#Load the PDB
if UPLOAD_PDB==True:
  RECEPTOR_STRUCTURE='/content/'+PDBID+'.pdb'
else:
  if not os.path.exists(OUTDIR+PDBID+".cif"):
    try:
      urllib.request.urlretrieve(RECEPTOR_STRUCTURE+".cif", OUTDIR+PDBID+".cif")
    except:
      print("Can't download file: "+RECEPTOR_STRUCTURE+'. Ensure that the PDBID is correct.')

#Parse the intended chain

In [None]:
#@markdown #Run *EvoBind*

#@markdown Click play to design a binder. 

#@markdown The whole process will take approximately **7 hours** (for 300 iterations). Relax and wait for your binder. 
#@markdown The run will continue where you left it if it was interrupted for some reason.

#@markdown The iteration, interface distance for the peptide, interface distance for the receptor, plDDT, delta COM, loss and best peptide sequence are displayed after each iteration.

#@markdown The AF2 params are fetched here (if they are not already downloaded).
import shutil
PARAMS="/content/gdrive/MyDrive/AF/params/"
if not os.path.exists(PARAMS):
  if not os.path.exists('/content/gdrive/MyDrive/AF/'):
    os.mkdir('/content/gdrive/MyDrive/AF/')
  os.mkdir(PARAMS)
  !wget https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar 
  shutil.move('/content/alphafold_params_2021-07-14.tar', PARAMS)
  #Extract
  !tar -xvf /content/gdrive/MyDrive/AF/params/alphafold_params_2021-07-14.tar -C /content/gdrive/MyDrive/AF/params/
sys.path.insert(0,'/content/EvoBind/src/AF2')


from mc_design_colab import main
MAX_RECYCLES=8 #max_recycles (default=3)
MODEL_NAME='model_1' #model_1_ptm
main(RECEPTOR_FASTA, 'design', TARGET_RESIDUES, RECEPTOR_CAs,
     RECEPTOR_MSA, BINDER_LENGTH, BINDER_COM, OUTDIR, NITER,
     [MODEL_NAME], MAX_RECYCLES, "/content/gdrive/MyDrive/AF/", START_SEQUENCE)

In [None]:
#@markdown #Analyse the results
#@markdown The TOP_FRACTION represents how many percent of the designs to select. 

#@markdown Only the best model is visualised. As a rule of thumb, a **plDDT value above 80** represents a reliable binder.

#@markdown Click the DOWNLOAD box to download the top models and their sequences.

#@markdown Click the DOWNLOAD_START box to download the start model.

TOP_FRACTION =  100#@param {type:"integer"}
RECEPTOR_STYLE = "cartoon" #@param ["cartoon", "sphere", "stick"]
BINDER_STYLE = "stick" #@param ["cartoon", "sphere", "stick"]
DOWNLOAD = False #@param {type:"boolean"}
DOWNLOAD_START = False #@param {type:"boolean"}
loss = np.load(OUTDIR+'loss.npy')[1:]
seqs = np.load(OUTDIR+'sequence.npy')[1:]
plddt = np.load(OUTDIR+'plddt.npy')[1:]
#Get top
sorted_models = np.argsort(loss)
n_select = int(TOP_FRACTION/100*len(loss))
top_loss = loss[sorted_models][:n_select]
top_sequence = seqs[sorted_models][:n_select]
top_plddt = plddt[sorted_models][:n_select]
top_models = sorted_models[:n_select]

#Print
print('The best sequences, losses and plDDT values are:')
for i in range(len(top_loss)):
  print(top_sequence[i], top_loss[i], top_plddt[i])
#Vis
view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js',) 
view.addModel(open(OUTDIR+'unrelaxed_'+str(top_models[0])+'.pdb','r').read(),'pdb')
view.setStyle({'chain':'A'},{RECEPTOR_STYLE: {'color':'green'}})
view.setStyle({'chain':'B'},{BINDER_STYLE: {'color':'cyan'}})
view.zoomTo()
view.show()

#@title Download the results
import shutil
if not os.path.exists(OUTDIR+'best_models'):
  os.mkdir(OUTDIR+'best_models')

#Download
if DOWNLOAD==True:
  rank=1
  for model in top_models:
    shutil.copy(OUTDIR+'unrelaxed_'+str(model)+'.pdb', OUTDIR+'best_models/rank_'+str(rank)+'.pdb')
    rank+=1

  for file in glob.glob(OUTDIR+'best_models/rank_*.pdb'):
    files.download(file)

  #Write a fasta file with the top seqs
  rank=1
  with open(OUTDIR+'best_models/top_seqs.fasta', 'w') as file:
    for seq in top_sequence:
      file.write('>rank_'+str(rank)+'\n')
      file.write(seq+'\n')
      rank+=1
  files.download(OUTDIR+'best_models/top_seqs.fasta')

if DOWNLOAD_START==True:
  files.download(OUTDIR+'unrelaxed_start.pdb')
#@markdown ### The receptor is depicted in green and the binder in cyan. Change the style above to view the design differently. 
#@markdown ### Try the sphere representation to see how all atoms fit together.

The best sequences, losses and plDDT values are:
DQQMVNMQVAE 0.00756058471263991 91.93883535398467
DIDMVNMQVAE 0.00967687181827096 92.24413755028702
DIQMVNMQVLE 0.015208505095756051 91.92278654617388
DNQMVNMQVAE 0.01626824010813077 91.11554264765529
