In [1]:
# imports
import io
from pathlib import Path
import requests
import sys
import time
from urllib.parse import urljoin

from IPython.display import Image
from Bio.PDB import PDBParser
import nglview as nv
from rdkit import Chem



In [2]:
# constants
TEST_FILES = Path('../test_files/')
PROTEINS_PLUS_URL = 'http://localhost:8000/'
UPLOAD = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/upload/')
UPLOAD_JOBS = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/upload/jobs/')
PROTEINS = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/proteins/')
LIGANDS = urljoin(PROTEINS_PLUS_URL, 'molecule_handler/ligands/')
PROTOSS = urljoin(PROTEINS_PLUS_URL, 'protoss/')
PROTOSS_JOBS = urljoin(PROTEINS_PLUS_URL, 'protoss/jobs/')

In [3]:
# utils

# check server connection
try:
    response = requests.get(PROTEINS_PLUS_URL)
except requests.ConnectionError as error:
    if 'Connection refused' in str(error):
        print('WARNING: could not establish a connection to the server', file=sys.stderr)
    raise
    
def poll_job(job_id, poll_url, poll_interval=1, max_polls=10):
    """Poll the progress of a job
    
    Continuosly polls the server in regular intervals and updates the job information, especially the status.
    
    :param job_id: UUID of the job to poll
    :type job_id: str
    :param poll_url: URl to send the polling request to
    :type poll_url: str
    :param poll_interval: time interval between polls in seconds
    :type poll_interval: int
    :param max_polls: maximum number of times to poll before exiting
    :type max_polls: int
    :return: polled job
    :rtype: dict
    """
    job = requests.get(poll_url + job_id).json()
    status = job['status']
    current_poll = 0
    while status == 'pending' or status == 'running':
        print(f'Job {job_id} is { status }')
        current_poll += 1
        if current_poll >= max_polls:
            print(f'Job {job_id} has not completed after {max_polls} polling requests' \
                  f' and {poll_interval * max_polls} seconds')
            return job
        time.sleep(poll_interval)
        job = requests.get(poll_url + job_id).json()
        status = job['status']
    print(f'Job {job_id} completed with { status }')
    return job

Protoss calculates hydrogen positions for proteins. You can find a detailed explanation of the method in this [publication](https://doi.org/10.1186/1758-2946-6-12).

The following PDB file does not have any hydrogens:

In [4]:
protein_structure = PDBParser().get_structure('4agm', TEST_FILES / '4agm.pdb')
view = nv.show_biopython(protein_structure)
view.add_representation(repr_type='ball+stick', selection='protein')
view



NGLWidget()

We can uplod a file and start a protoss job like this:

In [5]:
with open(TEST_FILES / '4agm.pdb') as upload_file:
    query = {'protein_file': upload_file}
    job_submission = requests.post(PROTOSS, files=query).json()
protoss_job = poll_job(job_submission['job_id'], PROTOSS_JOBS)
protossed_protein = requests.get(PROTEINS + protoss_job['output_protein']).json()

Job 5f821d9b-4afa-410a-aff8-103a0a23046f completed with success


As you can see the protein now has hydrogens:

In [6]:
protein_file = io.StringIO(protossed_protein['file_string'])
protein_structure = PDBParser().get_structure(protossed_protein['name'], protein_file)
view = nv.show_biopython(protein_structure)
view.add_representation(repr_type='ball+stick', selection='protein')
view



NGLWidget()

Let's optimize the hydrogens for a non-native ligand. We will use the ligand NXG_A_1294 from PDB code: 4AGN and place it into 4AGM, replacing clashing ligands in the process:

In [7]:
with open(TEST_FILES / 'NXG_A_1294.sdf') as upload_ligand_file:
    with open(TEST_FILES / '4agm.pdb') as upload_file:
        query = {'protein_file': upload_file, 'ligand_file': upload_ligand_file}
        other_job_submission = requests.post(PROTOSS, files=query).json()
other_protoss_job = poll_job(other_job_submission['job_id'], PROTOSS_JOBS)
other_protossed_protein = requests.get(PROTEINS + other_protoss_job['output_protein']).json()
other_protossed_ligand = requests.get(LIGANDS + other_protossed_protein['ligand_set'][0]).json()

Job 4f55a95e-9220-42b0-9032-1be675368748 completed with success


In [8]:
other_protein_file = io.StringIO(other_protossed_protein['file_string'])
other_protein_structure = PDBParser().get_structure(other_protossed_protein['name'], other_protein_file)
ligand_structure = Chem.MolFromMolBlock(other_protossed_ligand['file_string'], removeHs=False)

view = nv.show_biopython(other_protein_structure)
# uncomment for protein hydrogens
# view.add_representation(repr_type='ball+stick', selection='protein')
view.add_structure(nv.RdkitStructure(ligand_structure))
view



NGLWidget()

NXG_A_1294 overwrote one of the ligands in the PDB entry, but the other was kept. If you really want to completely remove all detected ligands you can preprocess the protein first and the submit the empty protein with a custom ligand to protoss:

In [9]:
with open(TEST_FILES / '4agm.pdb') as upload_file:
    query = {'protein_file': upload_file}
    preprocessing_job_submission = requests.post(UPLOAD, files=query).json()
preprocessing_job = poll_job(preprocessing_job_submission['job_id'], UPLOAD_JOBS)

with open(TEST_FILES / 'NXG_A_1294.sdf') as upload_ligand_file:
    query = {'ligand_file': upload_ligand_file}
    params = {'protein_id': preprocessing_job['output_protein']}  # remember to pass params that aren't files as data
    replacing_protoss_job_submission = requests.post(PROTOSS, data=params, files=query).json()
replacing_protoss_job = poll_job(replacing_protoss_job_submission['job_id'], PROTOSS_JOBS)
replaced_protossed_protein = requests.get(PROTEINS + replacing_protoss_job['output_protein']).json()
replaced_protossed_ligand = requests.get(LIGANDS + replaced_protossed_protein['ligand_set'][0]).json()

Job f502685a-a37e-45d2-812c-de2efceb4dfb completed with success
Job 7d3b8efe-3a27-461b-b750-aee6270ba6a9 completed with success


In [10]:
replaced_protein_file = io.StringIO(replaced_protossed_protein['file_string'])
replaced_protein_structure = PDBParser().get_structure(replaced_protossed_protein['name'], replaced_protein_file)
replaced_ligand_structure = Chem.MolFromMolBlock(replaced_protossed_ligand['file_string'], removeHs=False)

view = nv.show_biopython(replaced_protein_structure)
# uncomment for protein hydrogens
# view.add_representation(repr_type='ball+stick', selection='protein')
view.add_structure(nv.RdkitStructure(replaced_ligand_structure))
view



NGLWidget()