# PoseView: Molecular Interaction Patterns at a Glance
This notebook will introduce the ProteinsPlus API endpoints for PoseView. In general we will need a 3D protein and a ligand to generate 2D projections of protein-ligand interactions. The ligand does not need to be the native ligand and can be anywhere relative to the protein as long as it is meaningful to you.

[Katrin Stierand and Matthias Rarey
Journal of Cheminformatics 2010, 2, No. 1 p. P50](https://doi.org/10.1186/1758-2946-2-S1-P50)

In [1]:
# install dependencies
!pip install biopython &>> output.log
!pip install nglview &>> output.log
!pip install rdkit-pypi &>> output.log

In [2]:
# imports
import os
import io
from pathlib import Path
import requests
import sys
import time
from urllib.parse import urljoin
import warnings

from IPython.display import Image
from Bio.PDB import *
from Bio.PDB.PDBExceptions import PDBConstructionWarning
from rdkit import Chem

In [3]:
# constants
PROTEINS_PLUS_URL = 'https://proteins.plus/api/v2/'
UPLOAD = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/upload/')
UPLOAD_JOBS = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/upload/jobs/')
PROTEINS = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/proteins/')
LIGANDS = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/ligands/')
POSEVIEW = urljoin(PROTEINS_PLUS_URL, 'poseview/')
POSEVIEW_JOBS = urljoin(PROTEINS_PLUS_URL, 'poseview/jobs/')

In [4]:
#@title Utils functions to call API (unhide if you're interested)

# check server connection
try:
    response = requests.get(PROTEINS_PLUS_URL)
except requests.ConnectionError as error:
    if 'Connection refused' in str(error):
        print('WARNING: could not establish a connection to the server', file=sys.stderr)
    raise
    
def poll_job(job_id, poll_url, poll_interval=1, max_polls=10):
    """Poll the progress of a job
    
    Continuosly polls the server in regular intervals and updates the job information, especially the status.
    
    :param job_id: UUID of the job to poll
    :type job_id: str
    :param poll_url: URl to send the polling request to
    :type poll_url: str
    :param poll_interval: time interval between polls in seconds
    :type poll_interval: int
    :param max_polls: maximum number of times to poll before exiting
    :type max_polls: int
    :return: polled job
    :rtype: dict
    """
    job = requests.get(poll_url + job_id + '/').json()
    status = job['status']
    current_poll = 0
    while status == 'pending' or status == 'running':
        print(f'Job {job_id} is { status }')
        current_poll += 1
        if current_poll >= max_polls:
            print(f'Job {job_id} has not completed after {max_polls} polling requests' \
                  f' and {poll_interval * max_polls} seconds')
            return job
        time.sleep(poll_interval)
        job = requests.get(poll_url + job_id + '/').json()
        status = job['status']
    print(f'Job {job_id} completed with { status }')
    return job

## Generate a PoseView picture for a PDB file 

To get a PoseView picture of your binding site you can upload the protein and the ligand defining your binding site. 



### Run PoseView with a protein and ligand file

In [5]:
#@title Download PDB file and extract ligand as SDF using biopython and rdkit


# selector to extract the ligand we want from the biopython structure
class SingleResidueSelect(Select):

  def __init__(self, name, chain, identifier):
    """Selector to select specific residue from biopython structure.

    Residue can be amino acid, ligand, metal, water, etc. 
        
    :param name: residue name
    :type name: str
    :param chain: chain id
    :type chain: str
    :param identifier: ligand infile id
    :type identifier: int
    """
    self.name = name
    self.chain = chain
    self.identifier = identifier

  def accept_residue(self, residue):
    """Accept residue or refuses it

    :param name: residue name
    :type name: Bio.PDB.residue.residue
    :return: 1 if residue should be selected. 0 otherwise.
    :rtype: int
    """
    chain = residue.get_full_id()[2]
    identifier = residue.get_id()[1]
    if residue.get_resname() == self.name \
        and self.chain == chain \
        and self.identifier == identifier:
      return 1
    else:
      return 0

# fetch the protein 4agn from the PDB
file_4agn = Path(PDBList().retrieve_pdb_file('4agn', file_format='pdb'))
os.rename(file_4agn, file_4agn.stem[3:] + '.pdb')
file_4agn = file_4agn.stem[3:] + '.pdb' # ProteinsPlus needs .pdb extension
with warnings.catch_warnings():
  warnings.simplefilter('ignore', PDBConstructionWarning)
  structure_4agn = PDBParser().get_structure('4agn', file_4agn)
# save ligand NXG_A_1294 to PDB file using biopython
pdbio = PDBIO()
pdbio.set_structure(structure_4agn)
pdbio.save("NXG_A_1294.pdb", SingleResidueSelect('NXG', 'A', 1294))
# read ligand again and save it as SDF with rdkit
mol_NXG_A_1294 = Chem.MolFromPDBFile("NXG_A_1294.pdb")
with Chem.SDWriter("NXG_A_1294.sdf") as w:
  w.write(mol_NXG_A_1294)
print(f'Saved protein to {file_4agn}')
print('Saved ligand to NXG_A_1294.sdf')

Downloading PDB structure '4agn'...
Saved protein to 4agn.pdb
Saved ligand to NXG_A_1294.sdf


Now we can access the protein structure in the 4agn.pdb file and it's ligand in the NXG_A_1294.sdf file

In [6]:
with open('NXG_A_1294.sdf') as upload_ligand_file:
    with open('4agn.pdb') as upload_file:
        query = {'protein_file': upload_file, 'ligand_file': upload_ligand_file}
        job_submission = requests.post(POSEVIEW, files=query).json()
poseview_job = poll_job(job_submission['job_id'], POSEVIEW_JOBS)
print(poseview_job['image'])
Image(url=poseview_job['image'], width=600, height=600)  # freely scalabe SVG

Job faea490e-5d0d-4af9-bf8a-23771c9fa8b2 completed with success
https://proteins.plus/static/media/poseview/tmpw110xf_a.svg


PoseView images are freely scalable and editable SVGs. The PoseView job model contains a URL to the actual image. If you want to keep the image please download the image (ex. you could "right click" -> "save target as" the URL above).


### Using the ProteinsPlus preprocessor to obtain ligand file

You can also use the preprocessor to split ligands from an uploaded protein file and then use one of them to generate a PoseView image with. Here we will upload the same file but to the preprocessor:

In [7]:
with open('4agn.pdb') as upload_file:
    query = {'protein_file': upload_file}
    preprocessing_job_submission = requests.post(UPLOAD, files=query).json()
preprocessing_job = poll_job(preprocessing_job_submission['job_id'], UPLOAD_JOBS)

Job 0892fc1d-6ab3-4f58-8eb9-18b3807451bf completed with success


Then instead of taking the ligand of the A chain we will take the ligand of the B chain and generate a PoseView image for it:

In [8]:
protein = requests.get(PROTEINS + preprocessing_job['output_protein'] + '/').json()
ligands = [requests.get(LIGANDS + ligand_id + '/').json() for ligand_id in protein['ligand_set']]
for ligand in ligands:
    if ligand['name'] == 'NXG_B_1292':  # get the ligand of the B chain
        other_ligand = ligand
        break

query = {'protein_id': protein['id'], 'ligand_id': ligand['id']}
job_submission = requests.post(POSEVIEW, data=query).json()
poseview_job = poll_job(job_submission['job_id'], POSEVIEW_JOBS)
print(poseview_job['image'])
Image(url=poseview_job['image'], width=600, height=600)  # freely scalabe SVG

Job 0f9168d8-355a-4aec-ad4b-3ee59d70c871 completed with success
https://proteins.plus/static/media/poseview/tmpb2atvtvv.svg


The image is nearly the same but this time with references to B chain residues.


## Make 2D depictions with a non-native ligand

Another thing we can do is generate PoseView images for ligands that are non-native to the protein. For that we will extract a ligand from PDB code: 4AGO and put it in our preprocessed 4AGM entry.

In [9]:
# preprocess 4AGO
query = {'pdb_code': '4ago'}
preprocessing_job_submission = requests.post(UPLOAD, data=query).json()
preprocessing_job = poll_job(preprocessing_job_submission['job_id'], UPLOAD_JOBS)
other_protein = requests.get(PROTEINS + preprocessing_job['output_protein'] + '/').json()
other_ligands = [requests.get(LIGANDS + ligand_id + '/').json() for ligand_id in other_protein['ligand_set']]

# submit the preprocessed 4AGM from before and a ligand from 4AGO
query = {'protein_id': protein['id'], 'ligand_id': other_ligands[0]['id']}
job_submission = requests.post(POSEVIEW, data=query).json()
poseview_job = poll_job(job_submission['job_id'], POSEVIEW_JOBS)
print(poseview_job['image'])
Image(url=poseview_job['image'], width=600, height=600)  # freely scalabe SVG

Job 20ffbd14-b57e-426e-adb5-19dd3ba400a3 completed with success
Job 7f61ab04-fda3-48b5-a499-2952546f7870 completed with success
https://proteins.plus/static/media/poseview/tmp7n42goa7.svg


The above picture is of P74 from 4AGO in 4AGM. You could do similar operations with, for example, poses from a docking.