# Generate PyMOL visualizations
For each structure in both benchmarks, generate the PyMOL visualization

## CryptoBench test set
Generate visualizations for the CBS benchmark dataset - CryptoBench test set.

In [None]:
import sys
import numpy as np
sys.path.append('/home/vit/Projects/cryptoshow-analysis/src/utils')
import eval_utils
import cryptoshow_utils
import vis_utils

CBS_DATA_PATH = '/home/vit/Projects/cryptoshow-analysis/data/data-extraction/cryptobench-clustered-binding-sites.csv'
PREDICTIONS_PATH = '/home/vit/Projects/cryptoshow-analysis/data/visualizations/predictions'

# load ground truth binding residues: these have mmcif numbering and need to be mapped to auth labeling
binding_residues_mmcifed, _ = eval_utils.read_test_binding_residues(data_path=CBS_DATA_PATH)
binding_residues_mmcifed = vis_utils.reformat_binding_residues(binding_residues_mmcifed)

# load predictions
model_predictions_mmcifed = vis_utils.read_predictions(data_path=f'{PREDICTIONS_PATH}/cryptobench-with-clustering', protein_ids=binding_residues_mmcifed.keys())
model_predictions_mmcifed = {pid: [np.array(i) for i in preds] for pid, (preds, _) in model_predictions_mmcifed.items()}

# map binding residues to auth labeling
binding_residues = {protein_id: [cryptoshow_utils.map_mmcif_numbering_to_auth(protein_id[:4], protein_id[4:], pocket) for pocket in pockets] for protein_id, pockets in binding_residues_mmcifed.items()}
model_predictions = {protein_id: [cryptoshow_utils.map_mmcif_numbering_to_auth(protein_id[:4], protein_id[4:], pocket) for pocket in pockets] for protein_id, pockets in model_predictions_mmcifed.items()}

In [3]:
COLORS = ['pink', 'red', 'blue', 'green', 'brown', 'forest', 'sand', 'skyblue', 'slate', 'smudge', 'splitpea', 'sulfur', 'teal', 'tv_blue', 'tv_green', 'tv_orange', 'tv_red', 'tv_yellow']

PYMOL_DATA_PATH = '/home/vit/Projects/cryptoshow-analysis/data/visualizations/pymol-scripts/cryptobench'

def generate_pymol_scripts():
    for protein_id in binding_residues.keys():
        if protein_id not in model_predictions:
            continue
        with open(f'{PYMOL_DATA_PATH}/{protein_id}.pml', 'w') as f:
            f.write('reinitialize\n')
            f.write(f'fetch {protein_id}\n')
            f.write(f'zoom {protein_id}\n')
            f.write(f'color grey, {protein_id}\n')
            for i, pocket, in enumerate(model_predictions[protein_id]):
                if len(pocket) == 0:
                    continue
                selection = vis_utils.generate_pymol_algebra_selection(protein_id, pocket)
                f.write(f'color {COLORS[i % len(COLORS)]}, {selection}\n')
            f.write(f'show surface, {protein_id}\n')
            f.write(f'zoom {protein_id}\n')

generate_pymol_scripts()

## LIGYSIS
Generate visualizations for the GBS benchmark dataset - LIGYSIS.

In [8]:
import sys
import numpy as np
sys.path.append('/home/vit/Projects/cryptoshow-analysis/src/utils')
import eval_utils
import cryptoshow_utils
import vis_utils

CBS_DATA_PATH = '/home/vit/Projects/cryptoshow-analysis/data/data-extraction/ligysis_for_pocket_level_evaluation.csv'
PREDICTIONS_PATH = '/home/vit/Projects/cryptoshow-analysis/data/visualizations/predictions'

# load ground truth binding residues: these have mmcif numbering and need to be mapped to auth labeling
binding_residues_mmcifed, _ = eval_utils.read_test_binding_residues(data_path=CBS_DATA_PATH, pocket_types=['NON_CRYPTIC'])
binding_residues_mmcifed = vis_utils.reformat_binding_residues(binding_residues_mmcifed)

# load predictions
model_predictions_mmcifed = vis_utils.read_predictions(data_path=f'{PREDICTIONS_PATH}/ligysis', protein_ids=binding_residues_mmcifed.keys())
model_predictions_mmcifed = {pid: [np.array(i) for i in preds] for pid, (preds, _) in model_predictions_mmcifed.items()}

# map binding residues to auth labeling
binding_residues = {protein_id: [cryptoshow_utils.map_mmcif_numbering_to_auth(protein_id[:4], protein_id[4:], pocket) for pocket in pockets] for protein_id, pockets in binding_residues_mmcifed.items()}
model_predictions = {protein_id: [cryptoshow_utils.map_mmcif_numbering_to_auth(protein_id[:4], protein_id[4:], pocket) for pocket in pockets] for protein_id, pockets in model_predictions_mmcifed.items()}

In [9]:
COLORS = ['pink', 'red', 'blue', 'green', 'brown', 'forest', 'sand', 'skyblue', 'slate', 'smudge', 'splitpea', 'sulfur', 'teal', 'tv_blue', 'tv_green', 'tv_orange', 'tv_red', 'tv_yellow']

PYMOL_DATA_PATH = '/home/vit/Projects/cryptoshow-analysis/data/visualizations/pymol-scripts/ligysis'

def generate_pymol_scripts():
    for protein_id in binding_residues.keys():
        if protein_id not in model_predictions:
            continue
        with open(f'{PYMOL_DATA_PATH}/{protein_id}.pml', 'w') as f:
            f.write('reinitialize\n')
            f.write(f'fetch {protein_id}\n')
            f.write(f'zoom {protein_id}\n')
            f.write(f'color grey, {protein_id}\n')
            for i, pocket, in enumerate(model_predictions[protein_id]):
                if len(pocket) == 0:
                    continue
                selection = vis_utils.generate_pymol_algebra_selection(protein_id, pocket)
                f.write(f'color {COLORS[i % len(COLORS)]}, {selection}\n')
            f.write(f'show surface, {protein_id}\n')
            f.write(f'zoom {protein_id}\n')

generate_pymol_scripts()