In [1]:
#@title 1. Install Dependencies (~1 min)
%%capture
![ ! -d SPfast ] && git clone https://github.com/tlitfin/SPfast.git
%pip install pybind11
%pip install itables
!cd SPfast/src && make gnu
%pip install -e SPfast/
%pip install biopython
![ ! -f mkdssp-4.4.0-linux-x64 ] && wget https://github.com/PDB-REDO/dssp/releases/download/v4.4.0/mkdssp-4.4.0-linux-x64 && chmod +x mkdssp-4.4.0-linux-x64

In [2]:
#@title 2. Download AFDB-clusters SPfast structure files (~8 min)
%%capture
!wget https://spfast.tomlitfin.workers.dev/afdb-clu.db.tar.gz
!tar xvf afdb-clu.db.tar.gz && rm afdb-clu.db.tar.gz

In [7]:
#@title 3. Select parameters for SPfast search
#%%capture # Can't capture with file-upload
from google.colab import files
from pathlib import Path

UniProt_ID = "A0A0F2PPL2" # @param {type:"string"}
#@markdown **OR**
PDB_ID = "" # @param {type:"string"}
Chain = "" # @param {type:"string"}
#@markdown - Leave ID fields blank for custom **monomer** upload in PDB format
#@markdown  - The *first* chain will be extracted from a multi-chain query
Optimization_objective = 'SPfscore' # @param ["SPfscore", "SPscore"]
#score_cutoff = 0.4 # @param {type:"number"}
#d0 = 4.0 # @param {type:"number"}
#finalgap0 = 0.2 # @param {type:"number"}
#alpha = 0.3 # @param {type:"number"}
#coarsecut = -1.0 # @param {type:"number"}
#segcut = 5.0 # @param {type:"number"}
fast = True # @param {type:"boolean"}
trim = True # @param {type:"boolean"}

if UniProt_ID == "" and PDB_ID == "":
  uploaded = files.upload()
  fn = list(uploaded.keys())[0]
  !python SPfast/utils/extract_chain.py {fn}
  #HACKY WORKAROUND for DSSP - may not be required always
  !cat <(echo "HEADER    SPFAST-SEARCH                           01-JAN-25   1ABC") {fn} > tmpfile && mv tmpfile {fn}

  AFDB_ID = Path(fn).stem
elif UniProt_ID != "":
  AFDB_ID = f'AF-{UniProt_ID}-F1-model_v4'
  ![ ! -f {AFDB_ID}.pdb ] && wget https://alphafold.ebi.ac.uk/files/{AFDB_ID}.pdb &> /dev/null
  !grep -v "^DBREF" {AFDB_ID}.pdb > tmp.pdb && mv tmp.pdb {AFDB_ID}.pdb
elif PDB_ID != "" and Chain != "":
  AFDB_ID = f'{PDB_ID}_{Chain}'
  ![ ! -f {PDB_ID}.pdb ] && wget https://files.rcsb.org/download/{PDB_ID}.pdb &> /dev/null
  !python SPfast/utils/extract_chain.py {PDB_ID}.pdb {Chain}
  #HACKY WORKAROUND for DSSP - may not be required always
  !cat <(echo "HEADER    SPFAST-SEARCH                           01-JAN-25   {PDB_ID}") {AFDB_ID}.pdb > tmpfile && mv tmpfile {AFDB_ID}.pdb

![ ! -d {AFDB_ID} ] && mkdir {AFDB_ID}
fast_flag = ''
sp_flag = ''
if fast:
  fast_flag = '-fast'

if Optimization_objective == 'SPscore':
  sp_flag = '-SPscore'


In [8]:
#@title 4. Prepare query structure
%%capture
!./mkdssp-4.4.0-linux-x64 "{AFDB_ID}.pdb" --output-format=dssp > {AFDB_ID}/{AFDB_ID}.dssp
if trim:
  !python SPfast/utils/idealize.py <(echo {AFDB_ID}) --dssdir {AFDB_ID}/ --sdir ./ --odir {AFDB_ID}/ --af2model --trim
else:
  !python SPfast/utils/idealize.py <(echo {AFDB_ID}) --dssdir {AFDB_ID}/ --sdir ./ --odir {AFDB_ID}/ --af2model
!./SPfast/src/prepare_bin.gnu -q {AFDB_ID}/{AFDB_ID}.ideal

In [9]:
#@title 5. Search AFDB-clusters database
#@markdown Table shows **top 200** hits during search - *full results available for download*
import pandas as pd
from math import ceil
from itables import show
from IPython.display import clear_output, HTML, display

def display_dat(fn):
  with open(fn) as f:
    df = pd.read_csv(f, delimiter=' ', names=['query', 'db', 'score', 'raw', 'ss_prefilter', 'q_len', 'db_len', 'eff_len', 'seqid', 'ali_len', 'seeds', 'pass_seeds', 'seg_score'])
  show(df[['query', 'db', 'score', 'seqid', 'q_len', 'db_len', 'ali_len']], order=[[2]], maxBytes=0, lengthMenu=[10, 20, 100])

def progress(value, max=100):
    return HTML("""
        <progress
            value='{value}'
            max='{max}',
            style='width: 100%'
        >
            {value}
        </progress>
    """.format(value=value, max=max))

![ -f {AFDB_ID}/{AFDB_ID}.sp1 ] && rm {AFDB_ID}/{AFDB_ID}.sp1
![ -f {AFDB_ID}/display.sp1 ] && rm {AFDB_ID}/display.sp1
![ -f {AFDB_ID}/tmp1.sp1 ] && rm {AFDB_ID}/tmp1.sp1
out = display(progress(0, 100), display_id=True)

total_size = 2302899
#shard_size=20000
shard_size = ceil(total_size/200)

i0=0
!touch {AFDB_ID}/display.sp1
while i0<total_size:
  batchstart=i0
  batchend=min(i0+shard_size, total_size)

  # Run search
  !SPfast/src/SPfast.gnu -q {AFDB_ID}/{AFDB_ID}.ideal.bin -tdb afdb-clu.db -batchstart {batchstart} -batchend {batchend} {sp_flag} {fast_flag} -ssprefcut -1. | sed 's/\.ideal\.bin//g' | sed 's/\.ideal//g' > {AFDB_ID}/tmp1.sp1

  # Top 200 to display
  !cat {AFDB_ID}/tmp1.sp1 >> {AFDB_ID}/{AFDB_ID}.sp1
  !cat {AFDB_ID}/tmp1.sp1 {AFDB_ID}/display.sp1 | sort -rnk3 | head -n200 > {AFDB_ID}/tmp2.sp1
  !mv {AFDB_ID}/tmp2.sp1 {AFDB_ID}/display.sp1

  # Update display
  clear_output()
  out.update(progress(min(100, 100*(batchend)/total_size), 100))
  print(f"{batchend}/{total_size} structures searched ({round(100*batchend/total_size,2)}%)")
  display_dat(f'{AFDB_ID}/display.sp1')
  i0+=shard_size
  #break #for testing

# Return entire output
!sort -rnk3 {AFDB_ID}/{AFDB_ID}.sp1 -o {AFDB_ID}/{AFDB_ID}.txt



2302899/2302899 structures searched (100.0%)


query,db,score,seqid,q_len,db_len,ali_len
Loading ITables v2.3.0 from the internet... (need help?),,,,,,


In [11]:
#@title 6. Download result file
#%%capture
files.download(f"{AFDB_ID}/{AFDB_ID}.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>