## Graph Attention Site Prediction (GrASP): Identifying Druggable Binding Sites Using Graph Neural Networks with Attention

Zachary Smith,<sup>1, 2, a)</sup> Michael Strobel,<sup>3, a)</sup> Bodhi P. Vani,<sup>1</sup> and Pratyush Tiwary<sup>1, 4, b)</sup>

<sup>1)</sup>Institute for Physical Science and Technology, University of Maryland, College Park 20742, USA. \
<sup>2)</sup>Biophysics Program, University of Maryland, College Park 20742, USA. \
<sup>3)</sup>Department of Computer Science, University of Maryland, College Park 20742,
USA. \
<sup>4)</sup>Department of Chemistry and Biochemistry, University of Maryland, College Park 20742, USA. \

<sup>a)</sup>These authors contributed equally. \
<sup>b)</sup>Electronic mail: ptiwary@umd.edu

For more details about the model architecture and training see below:

[Github](https://github.com/tiwarylab/GrASP/tree/colab)

[Preprint](https://www.biorxiv.org/content/10.1101/2023.07.25.550565v1.abstract)

---

This notebook contains everything you need to run GrASP. It is recomended that you run GrASP using a GPU for the best experience (see Runtime > Change Runtime Type).

Please submit all issues on Github

In [None]:
# Run the following cell: This _will_ crash your runtime.
# Wait for this to happen, then continue running the next cells

!pip install -q condacolab
import condacolab
condacolab.install()

In [1]:
# @title Clone the Repository and Install Packages { display-mode: "form" }
# @markdown Note that the package install will take approximately 15 minutes.
import os

if not os.path.isdir("/content/GrASP"):
  !git clone https://github.com/tiwarylab/GrASP
  %cd /content/GrASP
else:
  pass

from IPython.display import clear_output

if not ('installs_completed' in vars() or 'install_completed' in globals()):
  # Install py3Dmol in python 3.10 for colab visualization
  !pip install py3Dmol

  # Install python 3.7 for openbabel, mdtraj, and mdanalysis
  !conda install --no-pin python=3.7 # Risky risky
  !mamba install -y --no-pin  'python_abi=*=*cp*'
  !mamba install -y --no-pin openbabel=2.4.1 mdtraj==1.9.7 mdanalysis==2.1.0

  # Install the remainder of the open babel environment in python 3.7
  # (anything python scripts in the the command line will use 3.7,
  # python cells will use 3.10)
  !pip install -r /content/GrASP/envs/ob_env_colab.txt

  # Install Pytorch Geometric in a virtual environment
  !python3 -m venv ./pytorch_venv
  !source ./pytorch_venv/bin/activate; python3 -m pip install -U --force-reinstall pip; pip install -r /content/GrASP/envs/pytorch_env_colab.txt
  !source ./pytorch_venv/bin/activate;pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-1.13.0+cu117.html


  installs_completed = True
  clear_output()
else:
  pass

%cd /content/GrASP

/content/GrASP


In [2]:
import requests
import os
from glob import glob
import ipywidgets as widgets
from IPython.display import display
from google.colab import files
import shutil

#@title Enter PDB ID or Upload an Input File { display-mode: "form" }

save_path = "./benchmark_data_dir/production/unprocessed_inputs"
if not os.path.isdir(save_path): os.makedirs(save_path)



def initialize_file_ui():
  # PDB ID Entry Widget
  global downloader
  downloader = widgets.Text(
              value='',
              placeholder='PDB id (e.g., 7UXE)',
              description='',
              disabled=False
          )
  #RCSB Download button
  download_button = widgets.Button(description="Fetch from RCSB")
  download_button.on_click(rcsb_download_func)

  # File Upload Widgit
  global uploader
  uploader = widgets.FileUpload(description="Select Files", multiple=True)

  # Upload button
  upload_button = widgets.Button(description="Upload Files")
  upload_button.on_click(upload_func)

  # Clear Uploads Button
  clear_dir_button = widgets.Button(description="Clear Upload Directory")
  clear_dir_button.on_click(clear_dir)


  column1 = widgets.VBox([downloader, download_button])
  column2 = widgets.VBox([uploader, upload_button])
  column3 = widgets.VBox([clear_dir_button])

  grid_box = widgets.GridBox([column1, column2, column3], layout=widgets.Layout(grid_template_columns='repeat(3, 25em)'))
  display(grid_box)


def upload_func(_):
  uploads_dict = uploader.value
  for k, v in uploads_dict.items():
    with open(os.path.join(save_path,k), "wb") as fp:
        fp.write(v['content'])

  clear_output()
  initialize_file_ui()
  print(f"Uploaded {len(uploads_dict)} files")

def rcsb_download_func(_):
  PDB_ID = downloader.value
  url = f"http://files.rcsb.org/download/{PDB_ID}.pdb"
  response = requests.get(url)
  if response.status_code == 200:
      with open(f"{save_path}/{PDB_ID}.pdb", "wb") as f:
          f.write(response.content)
      print("PDB file downloaded successfully.")
  else:
      print(f"Failed to download PDB file. Status code: {response.status_code}")

def clear_dir(_):
  files = glob(save_path + '/*')
  for f in files:
    os.remove(f)
  print(f"Removed {len(files)} files.")

downloader = None
uploader = None
initialize_file_ui()

GridBox(children=(VBox(children=(Text(value='', placeholder='PDB id (e.g., 7UXE)'), Button(description='Fetch …

In [3]:
# @title Inference { display-mode: "form" }
from time import time

def grasp_clear_dir():
  files = glob("/content/GrASP/benchmark_data_dir/production/**/*")
  files += ["/content/GrASP/test_metrics/production/probs/"]
  for f in files:
    if os.path.isfile(f):
      os.remove(f)
    if os.path.isdir(f):
      shutil.rmtree(f)

def initialize_run_ui():
  clear_output()
  global hydrogen_box
  hydrogen_box = widgets.Checkbox(
      value=True,
      description='Strip and Add New Hydrogens',
  )

  inference_button = widgets.Button(description="Run GrASP")
  inference_button.on_click(run_GrASP)

  inf_column1 = widgets.VBox([inference_button])
  inf_column2 = widgets.VBox([hydrogen_box])
  inf_grid_box = widgets.GridBox([inf_column1, inf_column2],
                                 layout=widgets.Layout(grid_template_columns='10% 25%'))
  display(inf_grid_box)

def run_GrASP(n):
  print("Running GrASP.")
  clear_output()
  start_time = time()
  print("Building protein graphs...")
  if hydrogen_box.value:
    !python3 /content/GrASP/parse_files.py production
  else:
    !python3 /content/GrASP/parse_files.py production --skip_hydrogen_cleanup
  !source ./pytorch_venv/bin/activate; python3 infer_test_set.py
  !python3 color_pdb.py
  initialize_run_ui()
  print(f"Done in {time() - start_time:.2f} seconds.")
  grasp_clear_dir()

hydrogen_box = None
initialize_run_ui()

# @markdown If the below box is checked, all hydrogens will be stripped from the
# @markdown molecule and new hydrogens will be added with OpenBabel. \
# @markdown This is recommended for downloaded PDB files but can be skipped when
# @markdown using previously processed structures.

GridBox(children=(VBox(children=(Button(description='Run GrASP', style=ButtonStyle()),)), VBox(children=(Check…

In [4]:
import datetime
import py3Dmol

output_path = "./test_metrics/production/colors/train_full/trained_model_s_train_full_ag_multi_1680643832.8660116/cv_0/epoch_49/"
output_glob = output_path + "*.pdb"
# @title Display and Download Structures { display-mode: "form" }
# @markdown Structures are shown in a roygb gradient with blue being low
# @markdown druggability, and red being high druggability.
def initialize_input(_,):
  # global file_selection_slider
  global file_selection
  global opacity_slider
  clear_output()

  output_files = glob(output_glob)

  output_files_split = [x.split('/')[-1] for x in output_files]

  # File Selection Dropdown
  file_selection = widgets.Dropdown(
      options=output_files_split,
      value=None,
      description='Output File:',
      disabled=False,
  )

  # Refresh Choices button
  refresh_button= widgets.Button(description="Refresh List")
  refresh_button.on_click(initialize_input)

  disp_column1 = widgets.VBox([file_selection])
  disp_column2 = widgets.VBox([refresh_button])
  disp_grid_box = widgets.GridBox([disp_column1, disp_column2], layout=widgets.Layout(grid_template_columns='repeat(2, 23em)'))
  display(disp_grid_box)

  # Opacity Slider
  opacity_slider = widgets.FloatSlider(min=0, max=1, value=surface_opacity, step=0.1, description='Opacity:')
  display(opacity_slider)

  # Display Molecule Button
  display_button = widgets.Button(description="Display Molecule")
  display_button.on_click(display_molecule)
  display(display_button)

  # Download all button
  download_all_button = widgets.Button(description="Download All")
  download_all_button.on_click(download_all_files)
  display(download_all_button)

def download_all_files(b):
  current_date_time = datetime.datetime.now()
  formatted_date_time = current_date_time.strftime("%Y-%m-%d_%H-%M-%S")
  shutil.make_archive("/content/GrASP_Download_"+formatted_date_time, 'zip', output_path)
  files.download("/content/GrASP_Download_"+formatted_date_time + ".zip")
  # Delete old downloads to save space
  for export_file in glob("/content/GrASP_Download_*.zip"):
    if formatted_date_time not in export_file:
      os.remove(export_file)

def display_molecule(b):
  output_selection = file_selection.get_interact_value()
  if output_selection == None:
    return

  clear_output()

  global surface_opacity
  surface_opacity = opacity_slider.get_interact_value()
  output_files = glob(output_glob)

  output_files_split = [x.split('/')[-1] for x in output_files]

  out_index = [i for i, split in enumerate(output_files_split) if output_selection == split][0]
  out_file = output_files[out_index]
  print("Showing", output_selection)
  with open(out_file) as f:
    pdb_string = f.read()
    view = py3Dmol.view()
    view.addModel(pdb_string,'pdb')
    view.setStyle('cartoon')
    # view.setStyle({'cartoon': {'colorscheme':{'prop':'b','gradient':'rwb','min':1,'max':0}}})
    view.addSurface(py3Dmol.VDW,{'opacity':surface_opacity,'colorscheme':{'prop':'b','gradient':'roygb','min':1,'max':0}})
    view.center()
    view.zoomTo()
    view.show()


    def download_file(b):
      files.download(out_file)

    download_button = widgets.Button(description="Download Molecule")
    download_button.on_click(download_file)
    display(download_button)

    reselect_button = widgets.Button(description="Back to Menu")
    reselect_button.on_click(initialize_input)
    display(reselect_button)

file_selection_slider = None
surface_opacity = 0.8

initialize_input('_')


GridBox(children=(VBox(children=(Dropdown(description='Output File:', options=(), value=None),)), VBox(childre…

FloatSlider(value=0.8, description='Opacity:', max=1.0)

Button(description='Display Molecule', style=ButtonStyle())

Button(description='Download All', style=ButtonStyle())