<a href="https://colab.research.google.com/github/rvanasa/deep-antibody/blob/master/docking_hex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Google Drive setup

from IPython.display import clear_output
from google.colab import drive
drive.mount('/gdrive')
clear_output()

In [None]:
#@title Workspace setup

!pip install -q biopython pdb-tools

from IPython.display import clear_output, display
clear_output()

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import Bio
import Bio.PDB
from Bio.PDB import DSSP

contact_buffer = 4
contact_window_size = contact_buffer * 2 + 1

amino_acids = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', '???']
oneletters = 'ARNDCEQGHILKMFPSTWYV'
ssletters = 'HBEGITS'

parser = Bio.PDB.PDBParser(QUIET=True)

def parse(ident, cache_dir=None):
  cache_dir = cache_dir.rstrip('/') + '/' if cache_dir else ''
  filename = ident if ident.endswith('.pdb') else f'{cache_dir}{ident}.pdb'
  if '.' not in ident and not os.path.exists(filename):
    !wget -nc https://files.rcsb.org/download/{ident}.pdb
    !mv {ident}.pdb {cache_dir}
  return parser.get_structure(ident, filename)

loaded_dssp = False

def run_dssp(filename, cache_dir=None):
  global loaded_dssp
  if not loaded_dssp:
    loaded_dssp = True
    !wget -N ftp://ftp.cmbi.ru.nl/pub/software/dssp/dssp-2.0.4-linux-amd64 -O /usr/local/bin/dssp && chmod +x /usr/local/bin/dssp
  
  structure = parse(filename, cache_dir)
  adfs = []
  for model in structure.get_models():
    index_lookup = {(chain.id, res.id): i for chain in model.get_chains() for i, res in enumerate(trim_residues(chain.get_residues()))}

    dssp = DSSP(model, filename)
    rows = [(key, index_lookup[key, res_id], *v[1:]) for (key, res_id), v in dssp.property_dict.items()]

    dfs = pd.DataFrame(rows, columns=[
        'Key', 'Index', 'Residue',
        'SS', 'ASA', 'Phi', 'Psi',
        'NH->O_1_relidx', 'NH->O_1_energy',
        'O->NH_1_relidx', 'O->NH_1_energy',
        'NH->O_2_relidx', 'NH->O_2_energy',
        'O->NH_2_relidx', 'O->NH_2_energy'])
    
    dfs.insert(0, 'Model', model.id)
    adfs.append(dfs)
  return pd.concat(adfs)


def trim_residues(rs):
  rs = [r for r in rs if r.resname != 'HOH']
  while rs and rs[0].resname not in amino_acids:
    rs = rs[1:]
  while rs and rs[-1].resname not in amino_acids:
    rs = rs[:-1]
  return rs


def create_seq(rs):
  return ''.join(oneletters[amino_acids.index(r)] if r in amino_acids else 'X' for r in rs)


def cmd(command, always_succeed=False):
  if not isinstance(command, str):
    for c in command:
      cmd(c)
  elif os.system(command) and not always_succeed:
    raise Exception(f'Non-zero exit code in command: $ {command}')

In [None]:
#@title CUDA installation

!apt update -qq
!wget -nc https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
!dpkg -i cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
!apt-key add /var/cuda-repo-8-0-local-ga2/7fa2af80.pub
!apt update -qq
# !apt install cuda gcc-5 g++5 -y -qq
!apt install gcc-5 g++5 -y -qq ###
!ln -s /usr/bin/gcc-5 /usr/local/cuda/bin/gcc
!ln -s /usr/bin/g++5 /usr/local/cuda/bin/g++
!apt install cuda-8.0
clear_output()

In [None]:
#@title Hex installation

%env HEX_VERSION=hex-8.1.1-x64-centos7-cuda-8.0
%env HEX_ROOT=/root/hex
%env HEX_CACHE=/root/hex_cache
!wget -nc http://hex.loria.fr/dist800/$HEX_VERSION.run
!chmod +x $HEX_VERSION.run
!printf 'y\ny\n\ny\ny\ny\n\ny\ny\ny\n' | sudo ./$HEX_VERSION.run
clear_output()

In [None]:
#@title Hex docking macro

%%file hex_dock.mac

docking_fft_device  2
docking_fft_type    5

# moving_thing 1
# randomise_molecule
# commit_view
# moving_thing 2
# randomise_molecule
# commit_view
# moving_thing -1

# docking_correlation 1

# receptor_range_angle 180
# ligand_range_angle 30
# twist_range_angle 360

docking_main_scan   16
docking_main_search 25

activate_docking 1

# activate_matching
# view_matching_solution -1
# save_range 1 500 /content/ results_match pdb

save_range 1 100 /content/results result pdb


# open_complex /content/result.pdb
# fit_ligand
# save_both /content/result_fit.pdb


Writing hex_dock.mac


In [None]:
#@title Hex docking process

!mkdir -p inputs/

def dock(bname, bmodel, aname, amodel, keys, split_models=True, replace=True):
  hk, lk, ak = keys

  output_file = f'outputs/{bname}_{bmodel}_{aname}_{amodel}_{hk}{lk}{ak}_Docked_Hex.pdb'
  if not replace and os.path.exists(output_file):
    print('Skipping:', bname, bmodel, hk, lk, '<>', aname, amodel, ak)
    return

  if split_models:
    for name, model in [(bname, bmodel), (aname, amodel)]:
      if not os.path.exists(f'inputs/{name}_{model}.pdb'):
        structure = parse(name, 'inputs/')
        for s_model in structure.get_models():
          if s_model.id != model:
            structure.detach_child(s_model)
        
        from Bio.PDB.PDBIO import PDBIO
        io = PDBIO()
        io.set_structure(structure)
        io.save(f'inputs/{name}_{model}.pdb')

        # cmd([
        #     f'cp {name}.pdb inputs/{name}.pdb',
        #     f'pdb_splitmodel inputs/{name}.pdb',
        # ])
  else:
    parse(bname, 'inputs/')
    parse(aname, 'inputs/')
    cmd([
        f'cp inputs/{bname}.pdb inputs/{bname}_{bmodel}.pdb',
        f'cp inputs/{aname}.pdb inputs/{aname}_{amodel}.pdb',
    ])
  
  clear_output()
  print(bname, bmodel, ''.join([hk, lk]), '<>', aname, amodel, ak)

  cmd([
       f'pdb_selchain -{hk},{lk} inputs/{bname}_{bmodel}.pdb | pdb_tidy > inputs/{bname}_{bmodel}_{hk}{lk}.pdb',
       f'pdb_selchain -{ak} inputs/{aname}_{amodel}.pdb | pdb_tidy > inputs/{aname}_{amodel}_{ak}.pdb',
       f'rm -rf results/ && mkdir results/',
  ])
  cmd(f'sudo /root/hex/exe/hex8.1.1-cuda.x64 -nogui /content/inputs/{bname}_{bmodel}_{hk}{lk}.pdb /content/inputs/{aname}_{amodel}_{ak}.pdb -e /content/hex_dock.mac > hex_log.txt', True)
  # !sudo /root/hex/exe/hex8.1.1-cuda.x64 -nogui /content/inputs/{bname}_{bmodel}_{hk}{lk}.pdb /content/inputs/{aname}_{amodel}_{ak}.pdb -e /content/hex_dock.mac > hex_log.txt
  result_files = ' '.join('results/' + s for s in os.listdir('results/'))
  assert len(result_files), 'No result files'
  cmd([
      f'pdb_mkensemble {result_files} > {output_file}',
  ])

In [None]:
!rm -f ./outputs && ln -s "/gdrive/Shared drives/TA(CO)^2 Re-Epitoping/Data/Hex" ./outputs

In [None]:
!wget -N https://raw.githubusercontent.com/rvanasa/deep-antibody/master/thera_collection.zip
!unzip -nq thera_collection.zip
clear_output()

dfdx = pd.read_csv('docked_preprocessed.csv')
dfdx = dfdx[dfdx.File.isin([f'{name}.pdb' for name in '2dd8, 2ghw, 3bgf, 4k7p, 5vzr, 5zv3, 6nb7, 6w41, 6wps, 6yla, 6yor, 7bz5'.split(', ')])]
print(list(dfdx.File.unique()))

['2dd8.pdb', '2ghw.pdb', '3bgf.pdb', '5zv3.pdb', '6nb7.pdb', '6w41.pdb', '6wps.pdb', '6yla.pdb', '6yor.pdb', '7bz5.pdb']


In [None]:
dft = pd.read_csv('thera_prioritized.csv')
dft = pd.concat([dfdx, dft])

# controls = [('6w41', 0, 'C'), ('7bz5', 0, 'A'), ('6yor', 0, 'A'), ('6yla', 0, 'A'), ('6wps', 0, 'A'), ('6nb7', 0, 'A'), ('6wps', 0, 'B'), ('6yla', 0, 'E'), ('6yor', 0, 'E'), ('6wps', 0, 'E')]
controls = [('6w41', 0, 'C'), ('7bz5', 0, 'A'), ('6yor', 0, 'A'), ('6yla', 0, 'A'), ('6wps', 0, 'A'), ('6nb7', 0, 'A'), ('6wps', 0, 'B')]

for t in controls:
  assert t in dfdx[['File', 'Model', 'AKey']].values, f'{t} not found'

for aname, amodel, ak in controls:
  for i, row in dft.iterrows():
    bname, bmodel, hk, lk = row.File.replace('.pdb', ''), row.Model, row.HKey, row.LKey

    # assert hk != lk != ak
    if not (hk != lk != ak):
      print('Key collision')
      continue
    
    dock(bname, bmodel, aname, amodel, (hk, lk, ak), split_models=False, replace=False)

clear_output()
print('Done')

Done


In [None]:
# dft = pd.read_csv('thera_prioritized_0.csv')

# dfb = pd.read_csv('B_agg.csv')
# dfb = dfb.rename(columns=dict(BFile='File')).merge(dft, on='File').sort_values('Result', ascending=False)

# dfa = dft[dft.Priority == max(dft.Priority)]
# dfa = dfa[::len(dfa) // 10]
# dfa = pd.concat([dfdx, dfa])

# for _, brow in dfb.iterrows():
#   bname, bmodel, hk, lk = brow.File.replace('.pdb', ''), brow.Model, brow.HKey, brow.LKey

#   for _, arow in dfa[::-1].append(brow)[::-1].iterrows():
#     aname, amodel, ak = arow.File.replace('.pdb', ''), arow.Model, arow.AKey

#     # assert hk != lk != ak
#     if not (hk != lk != ak):
#       print('Key collision')
#       continue

#     dock(bname, bmodel, aname, amodel, (hk, lk, ak), split_models=False, replace=False)

# clear_output()
# print('Done')

In [None]:
# dock('6w41', 0, '6w41', 0, 'HLC', split_models=False, replace=False)