In [1]:
import sys
import numpy as np
sys.path.append('/home/vit/Projects/cryptoshow-analysis/src/utils')
import eval_utils
import cryptoshow_utils
import vis_utils

CBS_DATA_PATH = '/home/vit/Projects/cryptoshow-analysis/data/data-extraction/cryptobench-clustered-binding-sites.csv'
PREDICTIONS_PATH = '/home/vit/Projects/cryptoshow-analysis/data/visualizations/predictions'

# load ground truth binding residues: these have mmcif numbering and need to be mapped to auth labeling
binding_residues_mmcifed, _ = eval_utils.read_test_binding_residues(data_path=CBS_DATA_PATH)
binding_residues_mmcifed = vis_utils.reformat_binding_residues(binding_residues_mmcifed)

# load predictions
model_predictions_mmcifed = vis_utils.read_predictions(data_path=f'{PREDICTIONS_PATH}/cryptobench-with-clustering', protein_ids=binding_residues_mmcifed.keys())
smoothed_model_predictions_mmcifed = {pid: np.array(preds) for pid, (_, preds) in model_predictions_mmcifed.items()}
model_predictions_mmcifed = {pid: [np.array(i) for i in preds] for pid, (preds, _) in model_predictions_mmcifed.items()}

# map binding residues to auth labeling
binding_residues = {protein_id: [cryptoshow_utils.map_mmcif_numbering_to_auth(protein_id[:4], protein_id[4:], pocket) for pocket in pockets] for protein_id, pockets in binding_residues_mmcifed.items()}
smoothed_model_predictions = {protein_id: cryptoshow_utils.map_mmcif_numbering_to_auth(protein_id[:4], protein_id[4:], pocket) for protein_id, pocket in smoothed_model_predictions_mmcifed.items()}
model_predictions = {protein_id: [cryptoshow_utils.map_mmcif_numbering_to_auth(protein_id[:4], protein_id[4:], pocket) for pocket in pockets] for protein_id, pockets in model_predictions_mmcifed.items()}

In [None]:
import sys
import pymol
_stdouterr = sys.stdout, sys.stderr
pymol.finish_launching(['/usr/bin/pymol', '-q'])
sys.stdout, sys.stderr = _stdouterr

# load something into the PyMOL window
from pymol import cmd

CIF_FILES = '/home/vit/Projects/deeplife-project/data/cif_files'

In [4]:
for protein_id in binding_residues.keys():
    if protein_id not in model_predictions or protein_id not in smoothed_model_predictions:
        continue
    this_model_predictions = np.concatenate((model_predictions[protein_id]), axis=0)
    this_smoothed_model_predictions = smoothed_model_predictions[protein_id]
    cmd.reinitialize()
    cmd.set('fetch_path', cmd.exp_path(CIF_FILES), quiet=0)
    cmd.fetch(protein_id)
    cmd.zoom(protein_id)
    cmd.color('grey', protein_id)

    if len(this_model_predictions) != 0:

        cmd.color('blue', vis_utils.generate_pymol_algebra_selection(protein_id, this_model_predictions))
        if len(this_smoothed_model_predictions) > 0:
            cmd.color('green', vis_utils.generate_pymol_algebra_selection(protein_id, this_smoothed_model_predictions))
    
    cmd.show('surface', protein_id)
    cmd.zoom(protein_id)
    user_input = input(">Press Enter for the next protein (press 'q' to quit)...\n")
    if user_input.lower() == 'q':
        break

 Setting: fetch_path set to /home/vit/Projects/deeplife-project/data/cif_files.
 ExecutiveLoad-Detail: Detected mmCIF


In [None]:
COLORS = ['pink', 'red', 'blue', 'green', 'brown', 'forest', 'sand', 'skyblue', 'slate', 'smudge', 'splitpea', 'sulfur', 'teal', 'tv_blue', 'tv_green', 'tv_orange', 'tv_red', 'tv_yellow']

skip = True
for protein_id in binding_residues.keys():
    if skip:
        if protein_id == '5yj2C':
            skip = False
        else:
            continue
    if protein_id not in model_predictions:
        continue
    this_model_predictions = model_predictions[protein_id]
    cmd.reinitialize()
    cmd.set('fetch_path', cmd.exp_path(CIF_FILES), quiet=0)
    cmd.fetch(protein_id)
    cmd.zoom(protein_id)
    cmd.color('grey', protein_id)

    for i, pocket, in enumerate(this_model_predictions):
        if len(pocket) == 0:
            continue
        cmd.color(COLORS[i % len(COLORS)], vis_utils.generate_pymol_algebra_selection(protein_id, pocket))
    
    cmd.show('surface', protein_id)
    cmd.zoom(protein_id)
    user_input = input(">Press Enter for the next protein (press 'q' to quit)...\n")
    if user_input.lower() == 'q':
        break