In [15]:
import io
import prody as pr
from pathlib import Path

from pprint import pprint
from pdbecif.mmcif_io import CifFileReader
from collections import namedtuple

import sys
import subprocess
sys.path.append('LASErMPNN')
from utils.burial_calc import calc_residue_burial


# input_path = Path('/nfs/polizzi/bfry/programs/NISE/design_campaign_adn/pose_03/input_backbones/seq_0028_model_0.pdb')
# input_path = Path('/nfs/polizzi/bfry/programs/CARPdock/apixaban_ntf2_screen/nise_round1_apixaban_ntf2_top20/seq_0543_model_0_rank_04.pdb')
# input_path = Path('/nfs/polizzi/bfry/programs/CARPdock/apixaban_ntf2_screen/nise_round1_apixaban_ntf2_top20/seq_1073_model_0_rank_05.pdb')
# input_path = Path('/nfs/polizzi/bfry/programs/NISE/design_campaign_exa_ntf2/pose_00/input_backbones/seq_1060_model_0_rank_01.pdb')
# input_path = Path('/nfs/polizzi/bfry/programs/NISE/design_campaign_exa_ntf2/pose_01/input_backbones/seq_1081_model_0_rank_02.pdb')
# input_path = Path('/nfs/polizzi/bfry/programs/NISE/design_campaign_exa_ntf2/pose_02/input_backbones/seq_1157_model_0_rank_03.pdb')
# input_path = Path('/nfs/polizzi/bfry/programs/NISE/design_campaign_apx_ntf2/pose_03/input_backbones/seq_0898_model_0_rank_01.pdb')
# input_path = Path('/nfs/polizzi/bfry/programs/NISE/design_campaign_apx_ntf2/pose_04/input_backbones/seq_0935_model_0_rank_04.pdb')
input_path = Path('/nfs/polizzi/bfry/programs/NISE/design_campaign_exa_ntf2/r2_rf3_pose_02/input_backbones/02_9735_model_0.pdb')

### Run DSSP with `mkdssp`

In [16]:
output_lines = ['HEADER    LASErMPNN GENERATED STRUCTURE           13-MAY-25   XXXX              ']
with input_path.open('r') as file:
    for line in file.readlines():
        if not (line.startswith('ATOM') or line.startswith('HETATM')):
            continue
        output_lines.append(line.strip())

final_stream = io.StringIO('\n'.join(output_lines))

with Path('dssp_input.pdb').open('w') as f:
    f.write(final_stream.getvalue())

subprocess.run('mkdssp dssp_input.pdb --calculate-accessibility > dssp_output.cif', shell=True)

protein = pr.parsePDB('dssp_input.pdb')

### Create a mask tracking which protein residues have secondary structure.

In [17]:
reader = CifFileReader()
data = reader.read('./dssp_output.cif')
constrain_ala_set = set()
dssp_output_data = data['XXXX']['_dssp_struct_summary']
DSSP_OUT = namedtuple('DSSP_OUT', list(dssp_output_data.keys()))
for dssp_out in zip(*dssp_output_data.values()):
    dssp_out = DSSP_OUT(*dssp_out)

    # check that region has secondary structure
    if dssp_out.secondary_structure in ('H', 'G', 'I', 'E'):
        constrain_ala_set.add((dssp_out.label_asym_id, int(dssp_out.label_seq_id)))

### Create a mask tracking which protein residues are buried in a convex hull.

In [18]:
sys.path.append('./LASErMPNN')
from utils.burial_calc import calc_residue_burial

cb_buried, lig_buried, pr_protein = calc_residue_burial(input_path, num_rays=100)

In [19]:
labels = []
mask = [x in constrain_ala_set for x in zip(protein.ca.getChids(), protein.ca.getResnums())]
protein.setBetas(0.0)
idx = 0
for ridx, residue in enumerate(protein.iterResidues()):
    if all(x in residue.getNames() for x in ('CA', 'C', 'N')):
        residue.setBetas(mask[ridx] * (~cb_buried[ridx]))
        # residue.setBetas((~cb_buried)[idx])
        # print(residue, cb_buried[idx])
        labels.append(f'{residue}')
        idx += 1

In [20]:
constrained_residues = []
for residue in protein.iterResidues():
    if residue.ca is not None:
        if residue.ca.getBetas()[0]:
            constrained_residues.append(residue.ca.getResnums()[0])

constraint_string = ' or '.join([f'resnum {x}' for x in constrained_residues])
constraint_string


'resnum 3 or resnum 4 or resnum 5 or resnum 7 or resnum 8 or resnum 11 or resnum 12 or resnum 15 or resnum 18 or resnum 19 or resnum 23 or resnum 24 or resnum 27 or resnum 28 or resnum 32 or resnum 33 or resnum 35 or resnum 37 or resnum 43 or resnum 48 or resnum 49 or resnum 52 or resnum 55 or resnum 56 or resnum 63 or resnum 65 or resnum 66 or resnum 67 or resnum 68 or resnum 70 or resnum 72 or resnum 75 or resnum 77 or resnum 79 or resnum 81 or resnum 83 or resnum 85 or resnum 86 or resnum 89 or resnum 90 or resnum 91 or resnum 92 or resnum 94 or resnum 96 or resnum 98 or resnum 100 or resnum 102 or resnum 105 or resnum 107 or resnum 108 or resnum 110 or resnum 112 or resnum 113 or resnum 114'

In [21]:
pr.writePDB('dssp_and_burial_masked.pdb', protein)

'dssp_and_burial_masked.pdb'