<a href="https://colab.research.google.com/github/patrickbryant1/binder_design/blob/main/EvoBind.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Connect to Google drive
#@markdown You have to allow to **connect to Google drive** in order to run EvoBind.
#Mount the drive to be able to save files
from google.colab import drive
import os, sys
drive.mount('/content/gdrive') #All the output will be written here

Mounted at /content/gdrive


In [None]:
#@title Install dependencies

#@markdown Make sure your runtime is GPU. 
#@markdown In the menu above do: Runtime --> Change runtime type --> Hardware accelerator (set to GPU)

#@markdown **Press play.**

#@markdown You will have to restart the runtime after this finishes to include the new packages.
#@markdown In the menu above do: Runtime --> Restart runtime 

#@markdown **After restarting** - reconnect to Google drive.
#!pip install "jax[cuda]=='0.3.22" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
#!pip install  dm-haiku==0.0.7
#!pip install  tensorflow-cpu==2.5.0
import shutil
try:
  shutil.rmtree('/content/binder_design', ignore_errors=True)
except:
  print('')

!git clone https://github.com/patrickbryant1/binder_design.git
!pip install -q condacolab
import condacolab
condacolab.install()
!conda env create -f /content/binder_design/esm_env.yml

!pip install  biopython==1.79
!pip install  ml-collections==0.1.0
!pip install  chex==0.0.7
!pip install  dm-tree==0.1.6
!pip install  immutabledict==2.0.0
!pip install  numpy==1.19.5
!pip install  pandas==1.3.4
!pip install  scipy==1.7.0
!pip install  py3Dmol
!pip install biotite
!pip install git+https://github.com/facebookresearch/esm.git
!pip install torch
!pip install torch_geometric
!pip install torch_sparse

In [2]:
#@title Clone the EvoBind github repository
import shutil
try:
  shutil.rmtree('/content/binder_design', ignore_errors=True)
except:
  print('')

!git clone https://github.com/patrickbryant1/binder_design.git

Cloning into 'binder_design'...
remote: Enumerating objects: 262, done.[K
remote: Counting objects: 100% (262/262), done.[K
remote: Compressing objects: 100% (208/208), done.[K
remote: Total 262 (delta 82), reused 215 (delta 48), pack-reused 0[K
Receiving objects: 100% (262/262), 38.55 MiB | 28.32 MiB/s, done.
Resolving deltas: 100% (82/82), done.


In [3]:
#@title #Follow all steps outlined below to design a binder.
#@markdown To try the **test case** [3SQG](https://www.rcsb.org/3d-view/3SQG), press the play button to the left.
\
#@markdown If you don't want to run the test case, **change the input parameters**.

#@markdown #Parameters
#@markdown - *PDBID* - PDB id of the receptor structure 
#@markdown - *TARGET_CHAIN* - what chain in the PDB file to design towards
#@markdown - **Optional**: *UPLOAD_PDB* - if you prefer to upload a file instead, you can simply do this. See "Upload the MSA" below and ensure the PDBID matches the name of your uploaded file.
import sys, os
from google.colab import files
import pandas as pd
import numpy as np
import urllib.request
import py3Dmol
import matplotlib.pyplot as plt
import glob
sys.path.insert(0,'/content/binder_design/src/process')
PDBID = "3SQG" #@param {type:"string"}
TARGET_CHAIN = "C" #@param {type:"string"}
UPLOAD_PDB = False #@param {type:"boolean"}


OUTDIR="/content/gdrive/MyDrive/"+PDBID+'/'
#Make outdir
if not os.path.exists(OUTDIR):
  os.mkdir(OUTDIR)
#Get structure
TARGET_STRUCTURE = "https://files.rcsb.org/download/"+PDBID

#Load the PDB
print('Getting structure file...')
if UPLOAD_PDB==True:
  TARGET_STRUCTURE='/content/'+PDBID+'.cif'
else:
  if not os.path.exists(OUTDIR+PDBID+".cif"):
    try:
      urllib.request.urlretrieve(TARGET_STRUCTURE+".cif", OUTDIR+PDBID+".cif")
    except:
      print("Can't download file: "+TARGET_STRUCTURE+'. Ensure that the PDBID is correct.')

#Parse the intended chain
from generate_diverse_seeds import prepare_input
import warnings
warnings.filterwarnings('ignore')
prepare_input(OUTDIR+PDBID+".cif", TARGET_CHAIN, OUTDIR, PDBID)
#Vis
view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js',)
view.addModel(open(OUTDIR+PDBID+'_'+TARGET_CHAIN+'.pdb','r').read(),'pdb')
view.setStyle({'chain':TARGET_CHAIN},{'cartoon': {'color':'green'}})
view.zoomTo()
view.show()
print('Ensure this is your intended target.')


Getting structure file...


Ensure this is your intended target.


In [45]:
#@markdown #Run [Foldseek](https://search.foldseek.com) 
#@markdown Press play and paste the resulting id into the next cell to download the results
INFILE=OUTDIR+PDBID+'_'+TARGET_CHAIN+'.pdb'
! curl -X POST -F q=@$INFILE -F 'mode=3diaa'  -F 'database[]=pdb100' https://search.foldseek.com/api/ticket


{"id":"uBq8mM1aExPQpG3fzxIcSPMkBr22BxGwvleNJQ","status":"COMPLETE"}


In [None]:
#@markdown #Download results
SEARCH_ID = "uBq8mM1aExPQpG3fzxIcSPMkBr22BxGwvleNJQ" #@param {type:"string"}
#Download
! wget https://search.foldseek.com/api/result/download/$SEARCH_ID -O $OUTDIR/result.tar.gz
#Unzip
! tar -xvzf $OUTDIR/result.tar.gz -C $OUTDIR

In [None]:
#@markdown #Parse the hits and select seeds for the design
from generate_diverse_seeds import parse_results, write_ids_for_download, get_interaction_seeds, write_seeds_for_design
import pandas as pd
#Parse the results
aln_seqs, pdb_ids, pdb_chains = parse_results(OUTDIR)
#Write the IDs
write_ids_for_download(pdb_ids, OUTDIR+'ids.txt')
#Download
print('Downloading hits from the PDB...')
IDS=OUTDIR+'ids.txt'
MMCIFDIR=OUTDIR+'mmcif/'
! if [ -d "$MMCIFDIR" ]; then   echo "$MMCIFDIR exists..."; else bash /content/binder_design/src/process/batch_download.sh -f $IDS -c -o $MMCIFDIR; fi

#Unzip
!gunzip $MMCIFDIR/*.gz 
#Get seeds
SEEDDIR=MMCIFDIR+'seeds/'
!mkdir $SEEDDIR
#@markdown Min and max lengths of seeds for the binders:
MIN_LENGTH = 10 #@param {type:"integer"}
MAX_LENGTH = 50 #@param {type:"integer"}
#@markdown Min contact density (contacts per position):
MIN_CONTACT_DENSITY = 1 #@param {type:"slider", min:0, max:5, step:0.1}
#@markdown Min centre of mass distance between seeds (Ångström):
DELTA_COM = 2 #@param {type:"number"}
print('Getting seeds')
seed_df = get_interaction_seeds(pdb_ids, pdb_chains, MMCIFDIR, MIN_LENGTH, MAX_LENGTH, SEEDDIR)
#Pick seeds based on contact density and COM diff (avoid repetitive seeds)
print('Picking seeds...')
write_seeds_for_design(seed_df, OUTDIR+PDBID+".cif", MMCIFDIR, SEEDDIR, MIN_CONTACT_DENSITY, DELTA_COM)

In [102]:
#@markdown #Visualize the binder seeds
#@markdown How many seeds to pick (based on highest contact density)
NUM_SEEDS = 3 #@param {type:"slider", min:1, max:10, step:1}
import glob
import py3Dmol
SEEDS=glob.glob(SEEDDIR+'*.pdb')
view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js',)
COLORS=["orange", "cyan","magenta","yellow","salmon","white",
        "skyblue","lime", "grey","brown"]

print('The green chains are the target backbones and the blue the seeds.')
for model in SEEDS[:NUM_SEEDS]:
  view.addModel(open(model,'r').read(),'pdb')
  view.setStyle({'chain':'A'},{'stick': {'color':'green'}})
  view.setStyle({'chain':'B'},{'stick': {'color':'blue'}})
view.zoomTo()
view.show()



The green chains are the target backbones and the blue the seeds.


In [None]:
#@markdown #Design sequences using a modified version of [ESM-IF1](https://www.biorxiv.org/content/10.1101/2022.04.10.487779v2)
#@markdown How many examples to generate per seed (using the number of seeds picked above):
NUM_DESIGNS_PER_SEED = 10 #@param {type:"integer"}
import esm
model, alphabet = esm.pretrained.esm_if1_gvp4_t16_142M_UR50()
model = model.eval()
print(model)

In [9]:
!rm -r /content/sample_data*