In [7]:
import io
import prody as pr
from pathlib import Path

from pprint import pprint
from pdbecif.mmcif_io import CifFileReader
from collections import namedtuple

import sys
import subprocess
sys.path.append('LASErMPNN')
from utils.burial_calc import calc_residue_burial


input_path = Path('./example_pdbs/16_pose26_en_-5p044_no_CG_top1_of_1_n4_00374_looped_master_6_gly_0001_trim_H_98.pdb')

### Run DSSP with `mkdssp`

In [8]:
output_lines = ['HEADER    LASErMPNN GENERATED STRUCTURE           13-MAY-25   XXXX              ']
with input_path.open('r') as file:
    for line in file.readlines():
        if not (line.startswith('ATOM') or line.startswith('HETATM')):
            continue
        output_lines.append(line.strip())

final_stream = io.StringIO('\n'.join(output_lines))

with Path('dssp_input.pdb').open('w') as f:
    f.write(final_stream.getvalue())

subprocess.run('mkdssp dssp_input.pdb --calculate-accessibility > dssp_output.cif', shell=True)

protein = pr.parsePDB('dssp_input.pdb')

### Create a mask tracking which protein residues have secondary structure.

In [9]:
reader = CifFileReader()
data = reader.read('./dssp_output.cif')
constrain_ala_set = set()
dssp_output_data = data['XXXX']['_dssp_struct_summary']
DSSP_OUT = namedtuple('DSSP_OUT', list(dssp_output_data.keys()))
for dssp_out in zip(*dssp_output_data.values()):
    dssp_out = DSSP_OUT(*dssp_out)

    # check that region has secondary structure
    if dssp_out.secondary_structure in ('H', 'G', 'I', 'E'):
        constrain_ala_set.add((dssp_out.label_asym_id, int(dssp_out.label_seq_id)))

### Create a mask tracking which protein residues are buried in a convex hull.

In [11]:
sys.path.append('./LASErMPNN')
from utils.burial_calc import calc_residue_burial

cb_buried, lig_buried, pr_protein = calc_residue_burial(input_path, num_rays=100)

In [12]:
labels = []
mask = [x in constrain_ala_set for x in zip(protein.ca.getChids(), protein.ca.getResnums())]
protein.setBetas(0.0)
idx = 0
for ridx, residue in enumerate(protein.iterResidues()):
    if all(x in residue.getNames() for x in ('CA', 'C', 'N')):
        residue.setBetas(mask[ridx] * (~cb_buried[ridx]))
        # residue.setBetas((~cb_buried)[idx])
        # print(residue, cb_buried[idx])
        labels.append(f'{residue}')
        idx += 1

In [13]:
constrained_residues = []
for residue in protein.iterResidues():
    if residue.ca is not None:
        if residue.ca.getBetas()[0]:
            constrained_residues.append(residue.ca.getResnums()[0])

constraint_string = ' or '.join([f'resnum {x}' for x in constrained_residues])
constraint_string


'resnum 2 or resnum 5 or resnum 6 or resnum 8 or resnum 9 or resnum 12 or resnum 13 or resnum 15 or resnum 16 or resnum 19 or resnum 20 or resnum 22 or resnum 23 or resnum 26 or resnum 27 or resnum 29 or resnum 30 or resnum 33 or resnum 34 or resnum 37 or resnum 38 or resnum 39 or resnum 41 or resnum 42 or resnum 45 or resnum 46 or resnum 48 or resnum 49 or resnum 52 or resnum 53 or resnum 55 or resnum 56 or resnum 59 or resnum 60 or resnum 62 or resnum 63 or resnum 66 or resnum 67 or resnum 69 or resnum 70 or resnum 78 or resnum 79 or resnum 81 or resnum 82 or resnum 84 or resnum 85 or resnum 88 or resnum 89 or resnum 91 or resnum 92 or resnum 95 or resnum 96 or resnum 98 or resnum 99 or resnum 102 or resnum 103 or resnum 106 or resnum 109 or resnum 110 or resnum 115 or resnum 116 or resnum 118 or resnum 119 or resnum 122 or resnum 123 or resnum 125 or resnum 126 or resnum 129 or resnum 130 or resnum 132 or resnum 133 or resnum 136 or resnum 137 or resnum 139 or resnum 140 or resnum 1

In [14]:
pr.writePDB('dssp_and_burial_masked.pdb', protein)

'dssp_and_burial_masked.pdb'