# Metal 1D

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lcbc-epfl/metal-site-prediction/blob/main/Metal1D/ColabMetal1D.ipynb)

Quickly find out where in a protein potential metal binding sites are located. 

*If using please cite:* 
>Accurate prediction of transition metal ion location via deep learning
>S.L. Dürr, A. Levy, U. Rothlisberger
>bioRxiv 2022.XXXXXXXX; doi: https://doi.org/XXXXXXXXXXXX

In [None]:
#@title Install dependencies
!pip install py3Dmol > /dev/null 
!pip install biopandas > /dev/null

In [5]:
#@title Imports
import urllib.request
import re 
import sys
import warnings
import ipywidgets as widgets
from ipywidgets import interact, fixed

import py3Dmol

from utils.helpers import *

sys.executable = '/usr/local/bin/python'

In [6]:
#@title Input PDB or upload file, choose parameters and then run remaining cells.
from google.colab import files
import os.path

#@markdown Use a 4 letter code for a entries in the RCSB Protein data bank (e.g `3FQQ`), use Uniprot accession codes for AlphaFold structures e.g(`Q5VSL9`)
pdbfile = '3FQQ' #@param {type:"string"}
use_local_file = False #@param {type:"boolean"}
#@markdown If using a local file, upload it via the left toolbar and provide the path in the `pdbfile` field.

# number of models to use
#use_amber = False #@param {type:"boolean"}

#@markdown ### Prediction Parameters

#@markdown    * Metal ion of interest (default ZN)
Metal_Ion = "ZN" #@param  {type:"string"}
#@markdown    * Radius used to perform search around each amino acid reference atom(s). <br> 
#@markdown    For a given metal with metal-aa distance D, SearchRadius = 2*D + eps
#@markdown    (where eps accounts for possible rearrangements/structure relaxation)
Search_Radius = "5.5" #@param  {type:"string"}

#@markdown     Default value (5.5) obtained from average LINK distance (2.2+-0.2) for ZN structures.


#@markdown    * Prediction done considering residues with score within 
#@markdown    ScoreThreshold% of the highest-scored one. <br>
#@markdown    Final re-scoring excludes sites below ScoreThreshold% of the highest-scored one
Score_Threshold = 0.75 #@param  {type:"string"}

#@markdown    Default (0.75) resulted to be the best compromise 
#@markdown    between sites found and false positives for ZN testset             
     
#@markdown ### Saving Option
save_to_google_drive = False #@param {type:"boolean"}

#@markdown Files will be saved in `Metal1D/` in your home folder

if save_to_google_drive:
  from google.colab import drive
  drive.mount('/content/drive')
  print('Google Drive mounted')

In [7]:
#@title Predictions

# check that pdbfile exists and is readable
if use_local_file:
  pdb = os.path.basename(pdbfile).split('.')[0]
  pdb_file = pdb + '.pdb'
  if not os.path.isfile(pdb_file):
    raise Exception(f"File {pdb_file} does not exist")
  if not os.access(pdb_file, os.R_OK):
    raise Exception(f"File {pdb_file} is not readable")
else:
  if len(pdbfile)==4:
    urllib.request.urlretrieve(f'http://files.rcsb.org/download/{pdbfile.lower()}.pdb1', f'{pdbfile}.pdb')
  else:
    try:
      if re.match('[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}', pdbfile).group() == pdbfile:
        print('using alphafold structure')
        urllib.request.urlretrieve(f'https://alphafold.ebi.ac.uk/files/AF-{pdbfile}-F1-model_v2.pdb', f'{pdbfile}.pdb')
    except AttributeError:
      raise ValueError('pdb code must be 4 letters or Uniprot code does not match')
  pdb = pdbfile
  pdb_file = pdb + '.pdb'

print(pdbfile)

# predict binding sites for metal ions
ProbMap = ExtractProbMap('probmaps/resultsCOORD_'+Metal_Ion.lower()+'.txt') # extract probability map 
ppdb_ATOM, Chains = ProteinRead(pdb_file) # read protein structure
CreateOutFile(pdb_file)    

for ch_indx in range(0,len(Chains)):  # predictions made for each chain
                                      # considering all residues in the chain and all other chains     
  Chain = Chains[ch_indx]
  print('\tScan Chain '+Chain)
  SitesPredict(Chain, pdb_file, ppdb_ATOM, ProbMap, ScoreThreshold = float(Score_Threshold), SearchRadius = float(Search_Radius), ChemicalElement = Metal_Ion) # site prediction
SortPredictions(pdb_file, ScoreThreshold = float(Score_Threshold)) # final sorting of predicted sites according to Metal1D score 

if save_to_google_drive:
  !mkdir -p '/content/drive/MyDrive/Metal1D/{pdb}'
  !cp '/content/{pdb}_PredictedSites.xyz' '/content/drive/MyDrive/Metal1D/{pdb}/'
  !cp '/content/{pdb_file}' '/content/drive/MyDrive/Metal1D/{pdb}/'

3FQQ
	Scan Chain A
	Scan Chain B
----------
SCAN COMPLETED
	Predicted sites can be found in:
	3FQQ_PredictedSites.xyz
----------


In [8]:
#@title Visualization 
#@markdown Metals in the structure (if present) represented as transparent spheres.

#@markdown Predicted sites as full coloured spheres (blue = higher score). Click one probe to check its number ([0] = higher score).


interact(show_map, pdb=fixed(pdb));

interactive(children=(Checkbox(value=False, description='show_sticks_all'), Checkbox(value=True, description='…