In [1]:
from prody import *
import numpy as np
import matplotlib.pyplot as plt
import os
import fnmatch

In [2]:
def new_dihedral(p):
    """Praxeolitic formula
    1 sqrt, 1 cross product
    https://stackoverflow.com/questions/20305272/dihedral-torsion-angle-from-four-points-in-cartesian-coordinates-in-python
    """
    p0, p1, p2, p3 = p

    b0 = p1 - p0
    b1 = p2 - p1
    b2 = p3 - p2

    b1 = b1 / np.linalg.norm(b1)

    v = b0 - np.dot(b0, b1) * b1
    w = b2 - np.dot(b2, b1) * b1

    x = np.dot(v, w)
    y = np.dot(np.cross(b1, v), w)
    return np.degrees(np.arctan2(y, x))


def process_pdb_only(pdb_file):
    pdb = parsePDB(pdb_file)

    # Atom selections (do once)
    sel_1 = pdb.select('calpha and resnum 5 to 68')
    sel_2 = pdb.select('calpha and resnum 92 to 147')
    sel_3 = pdb.select('calpha and resnum 69')
    sel_4 = pdb.select('calpha and resnum 91')

    if any(s is None for s in [sel_1, sel_2, sel_3, sel_4]):
        raise ValueError(
            f"Selection failed in {pdb_file}. Check residue numbering and that CA atoms exist."
        )

    distances = []
    dihedrals = []

    # Loop over all coordinate sets:
    # - single PDB: typically 1 coordset
    # - NMR PDB: multiple coordsets (one per MODEL)
    n_sets = pdb.numCoordsets()
    for i in range(n_sets):
        pdb.setACSIndex(i)

        coord_1 = calcCenter(sel_1.getCoords(), weights=sel_1.getMasses())
        coord_4 = calcCenter(sel_2.getCoords(), weights=sel_2.getMasses())
        coord_2 = sel_3.getCoords()[0]
        coord_3 = sel_4.getCoords()[0]

        dihedral = new_dihedral([coord_1, coord_2, coord_3, coord_4]) + 180
        if dihedral > 180:
            dihedral -= 360

        distance = calcDistance(coord_2, coord_3)

        # Same exclusion logic you had
        if not ((dihedral > 175 or dihedral < -175) and (distance < 10.75 or distance > 49.75)):
            dihedrals.append(dihedral)
            distances.append(distance)

    base_name = os.path.splitext(os.path.basename(pdb_file))[0]
    np.savetxt(f"{base_name}_distance.dat", distances)
    np.savetxt(f"{base_name}_dihedral.dat", dihedrals)
    print(f"{pdb_file}: wrote {len(distances)} entries (from {n_sets} MODEL(s)/coordset(s))")


# Process all PDB files in the folder (including NMR multi-model PDBs)
pdb_files = fnmatch.filter(os.listdir(), "*.pdb")
for pdb_file in pdb_files:
    process_pdb_only(pdb_file)


@> 1348 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 2267 atoms and 20 coordinate set(s) were parsed in 0.14s.


1prw.pdb: wrote 1 entries (from 1 MODEL(s)/coordset(s))
6y95.pdb: wrote 20 entries (from 20 MODEL(s)/coordset(s))


@> 2352 atoms and 30 coordinate set(s) were parsed in 0.12s.


1mux.pdb: wrote 30 entries (from 30 MODEL(s)/coordset(s))


@> 2263 atoms and 160 coordinate set(s) were parsed in 0.54s.


2k0e.pdb: wrote 160 entries (from 160 MODEL(s)/coordset(s))
