In [None]:
from proteusPy import (
    DisulfideList,
    Disulfide,
    Load_PDB_SS,
    load_disulfides_from_id,
    prune_extra_ss,
    check_header_from_file,
)

from proteusPy.logger_config import get_logger
from proteusPy.ssparser import (
    extract_ssbonds_and_atoms,
    print_disulfide_bond_info_dict,
    get_phipsi_atoms_coordinates,
)
import numpy as np


_logger = get_logger("testing")

In [None]:
# Load the PDB file
pdb = Load_PDB_SS(verbose=True, subset=False)
pdb.describe()

In [None]:
"""
        Return a list of Disulfides within the angular cutoff in the others list.
        This routine is used to find Disulfides having the same torsion length
        within the others list. This is used to find families of Disulfides with
        similar conformations. Assumes self is properly initialized.

        *NB* The routine will not distinguish between +/-
        dihedral angles. *i.e.* [-60, -60, -90, -60, -60] would have the same
        torsion length as [60, 60, 90, 60, 60], two clearly different structures.

        :param others: ```DisulfideList``` to search
        :param cutoff: Dihedral angle degree cutoff
        :return: DisulfideList within the cutoff

        Example:
        In this example we load the disulfide database subset, find the disulfides with
        the lowest and highest energies, and then find the nearest conformational neighbors.
        Finally, we display the neighbors overlaid against a common reference frame.

        >>> from proteusPy import Load_PDB_SS, DisulfideList, Disulfide
        >>> PDB_SS = Load_PDB_SS(verbose=False, subset=True)
        >>> ss_list = DisulfideList([], 'tmp')

        We point to the complete list to search for lowest and highest energies.
        >>> sslist = PDB_SS.SSList
        >>> ssmin_enrg, ssmax_enrg = PDB_SS.SSList.minmax_energy

        Make an empty list and find the nearest neighbors within 10 degrees avg RMS in
        sidechain dihedral angle space.

        >>> low_energy_neighbors = DisulfideList([],'Neighbors')
        >>> low_energy_neighbors = ssmin_enrg.Torsion_neighbors(sslist, 10)

        Display the number found, and then display them overlaid onto their common reference frame.

        >>> tot = low_energy_neighbors.length
        >>> print(f'Neighbors: {tot}')
        Neighbors: 2
        >>> low_energy_neighbors.display_overlay()

"""

ssmin_enrg = Disulfide()
ssmax_enrg = Disulfide()
ssmin_enrg, ssmax_enrg = pdb.SSList.minmax_energy
print(f"Min energy: {ssmin_enrg}")
sslist = pdb.SSList
ssmin_enrg.Torsion_neighbors(sslist, 10)
low_energy_neighbors = ssmin_enrg.Torsion_neighbors(pdb.SSList, 10)
tot = low_energy_neighbors.length
# print(f"Neighbors: {tot}")

In [None]:
pdb[0]

In [None]:
ss1 = Disulfide()
ss1.Tor

In [None]:
# Example usage:
ssbond_dict, num_ssbonds, errors = extract_ssbonds_and_atoms(
    "/Users/egs/PDB/good/pdb6f99.ent"
)

chain_id = "A"
key = "proximal-1"
phipsi_atoms = get_phipsi_atoms(ssbond_dict, chain_id, key)
print(phipsi_atoms)
print_disulfide_bond_info_dict(ssbond_dict)

In [None]:
def get_phipsi_atoms(data_dict, chain_id, key):
    """
    Retrieve the phi/psi atoms based on the input dictionary, chain ID, and key.

    :param data_dict: Dictionary containing SSBOND and ATOM records.
    :param chain_id: Chain ID to look for.
    :param key: Key in the form "proximal+1", "distal-1", etc.
    :return: Dictionary containing the phi/psi atoms with their coordinates.
    """
    for pair in data_dict.get("pairs", []):
        if chain_id in pair.get("chains", []):
            phipsi_data = pair.get("phipsi", {})
            if key in phipsi_data:
                return phipsi_data[key]
    return None


# Example usage:
# data_dict = { ... }  # Your input dictionary
# chain_id = "A"
# key = "proximal+1"
# phipsi_atoms = get_phipsi_atoms(data_dict, chain_id, key)
# print(phipsi_atoms)

In [None]:
import os


def print_disulfide_bond_info_dict(ssbond_atom_data):
    """
    Prints the disulfide bond information in a pretty format.

    Args:
    - ssbond_atom_data (dict): A dictionary containing the SSBOND records and the corresponding ATOM records. The dictionary
          has the following structure:
            {
                "ssbonds": list of SSBOND records (str),
                "atoms": {
                    (chain_id, res_seq_num, atom_name): {
                        "line": ATOM record line (str),
                        "x": x-coordinate (float),
                        "y": y-coordinate (float),
                        "z": z-coordinate (float)
                    },
                    ...
                },
                "pairs": [
                    {
                        "proximal": (chain_id1, res_seq_num1),
                        "distal": (chain_id2, res_seq_num2),
                        "chains": (chain_id1, chain_id2),
                        "phipsi": {
                            "proximal-1": {"N": [x, y, z], "C": [x, y, z]},
                            "proximal+1": {"N": [x, y, z], "C": [x, y, z]},
                            "distal-1": {"N": [x, y, z], "C": [x, y, z]},
                            "distal+1": {"N": [x, y, z], "C": [x, y, z]}
                        }
                    },
                    ...
                ]
            }
    """
    if ssbond_atom_list is None:
        print("No disulfide bonds found.")
        return

    ssbonds = ssbond_atom_data.get("ssbonds", [])
    atoms = ssbond_atom_data.get("atoms", {})
    pairs = ssbond_atom_data.get("pairs", [])

    for pair in pairs:
        proximal = pair["proximal"]
        distal = pair["distal"]
        chain_id1, res_seq_num1 = proximal
        chain_id2, res_seq_num2 = distal

        print(
            f"Disulfide Bond between Chain {chain_id1} Residue {res_seq_num1} and Chain {chain_id2} Residue {res_seq_num2}"
        )
        print(f"Proximal Residue (Chain {chain_id1}, Residue {res_seq_num1}):")
        for atom_name in ["N", "CA", "C", "O", "CB", "SG"]:
            atom_record = atoms.get((chain_id1, res_seq_num1, atom_name))
            if atom_record:
                print(
                    f"  Atom {atom_name}: ({atom_record['x']:.3f}, {atom_record['y']:.3f}, {atom_record['z']:.3f})"
                )
            else:
                print(f"  Atom {atom_name}: Not found")

        print(f"Distal Residue (Chain {chain_id2}, Residue {res_seq_num2}):")
        for atom_name in ["N", "CA", "C", "O", "CB", "SG"]:
            atom_record = atoms.get((chain_id2, res_seq_num2, atom_name))
            if atom_record:
                print(
                    f"  Atom {atom_name}: ({atom_record['x']:.3f}, {atom_record['y']:.3f}, {atom_record['z']:.3f})"
                )
            else:
                print(f"  Atom {atom_name}: Not found")

        print("Phi/Psi Atoms:")
        for key, phipsi_atoms in pair["phipsi"].items():
            print(f"  {key}:")
            for atom_name, coords in phipsi_atoms.items():
                res_seq_num = (
                    int(res_seq_num1) - 1
                    if "proximal-1" in key
                    else (
                        int(res_seq_num1) + 1
                        if "proximal+1" in key
                        else (
                            int(res_seq_num2) - 1
                            if "distal-1" in key
                            else int(res_seq_num2) + 1
                        )
                    )
                )
                print(
                    f"    Atom {atom_name} (Residue {res_seq_num}): ({coords[0]:.3f}, {coords[1]:.3f}, {coords[2]:.3f})"
                )

        print("-" * 50)


# Example usage:
ssbond_dict, num_ssbonds, errors = extract_ssbonds_and_atoms(
    "/Users/egs/PDB/good/pdb6f99.ent"
)

print_disulfide_bond_info(ssbond_dict)

In [None]:
from Bio.PDB import Vector


def get_atom_coordinates(ssbond_dict, chain_id, res_seq_num, atom_name) -> Vector:
    """
    Accessor function to get the coordinates of a specific atom in a residue.

    Args:
    - ssbond_dict (dict): The dictionary containing SSBOND and ATOM records.
    - chain_id (str): The chain identifier.
    - res_seq_num (str): The residue sequence number.
    - atom_name (str): The name of the atom.

    Returns:
    - list: A list containing the x, y, z coordinates of the atom if found, otherwise None.
    """
    key = (chain_id, res_seq_num, atom_name)
    if key in ssbond_dict["atoms"]:
        atom_record = ssbond_dict["atoms"][key]
        return Vector([atom_record["x"], atom_record["y"], atom_record["z"]])
    else:
        return Vector([])

In [None]:
def get_residue_atoms_coordinates(ssbond_dict, chain_id, res_seq_num, verbose=False):
    """
    Accessor function to get the coordinates of specific atoms in a residue in the order N, CA, C, O, CB, SG.

    Args:
    - ssbond_dict (dict): The dictionary containing SSBOND and ATOM records.
    - chain_id (str): The chain identifier.
    - res_seq_num (str): The residue sequence number.
    - verbose (bool): Whether to print a warning message if an atom is not found.

    Returns:
    - list: A list of vectors, where each vector is a list containing the x, y, z coordinates of the atom.
            If an atom is not found, None is placed in its position.
    """
    from Bio.PDB import Vector

    atom_names = ["N", "CA", "C", "O", "CB", "SG"]
    coordinates = []

    for atom_name in atom_names:
        key = (chain_id, res_seq_num, atom_name)
        if key in ssbond_dict["atoms"]:
            atom_record = ssbond_dict["atoms"][key]
            coordinates.append(
                Vector([atom_record["x"], atom_record["y"], atom_record["z"]])
            )
        else:
            coordinates.append(Vector(0, 0, 0))
            if verbose:
                _logger.error(
                    f"Atom {atom_name} in residue {chain_id} {res_seq_num} not found."
                )

    return coordinates


# Example usage
ssbond_dict, num_ssbonds, errors = extract_ssbonds_and_atoms(
    "/Users/egs/PDB/good/pdb6f99.ent"
)
coordinates = get_residue_atoms_coordinates(ssbond_dict, "A", "100")
print("Coordinates:", coordinates)

In [None]:
def extract_ssbonds_and_atoms(input_pdb_file, verbose=False) -> tuple:
    """
    Extracts SSBOND and ATOM records from a PDB file.

    This function reads a PDB file to collect SSBOND records and ATOM records for cysteine residues.
    It then extracts the ATOM records corresponding to the SSBOND records and returns the collected
    data as a dictionary, along with the number of SSBOND records found and any errors encountered.

    Args:
    - input_pdb_file (str): The path to the input PDB file.

    Returns:
    - tuple: A tuple containing:
        - dict: A dictionary containing the SSBOND records and the corresponding ATOM records. The dictionary
          has the following structure:
            {
                "ssbonds": list of SSBOND records (str),
                "atoms": {
                    (chain_id, res_seq_num, atom_name): {
                        "line": ATOM record line (str),
                        "x": x-coordinate (float),
                        "y": y-coordinate (float),
                        "z": z-coordinate (float)
                    },
                    ...
                },
                "pairs": [
                    {
                        "proximal": (chain_id1, res_seq_num1),
                        "distal": (chain_id2, res_seq_num2),
                        "chains": (chain_id1, chain_id2),
                        "phipsi": {
                            "proximal-1": {"N": [x, y, z], "C": [x, y, z]},
                            "proximal+1": {"N": [x, y, z], "C": [x, y, z]},
                            "distal-1": {"N": [x, y, z], "C": [x, y, z]},
                            "distal+1": {"N": [x, y, z], "C": [x, y, z]}
                        }
                    },
                    ...
                ]
            }
        - int: The number of SSBOND records found.
        - list: A list of error messages encountered during processing.
    """
    if not os.path.exists(input_pdb_file):
        return None

    ssbonds = []
    atom_list = {}
    errors = []
    pairs = []

    # Read the PDB file and collect SSBOND and ATOM records

    with open(input_pdb_file, "r") as file:
        lines = file.readlines()

    for line in lines:
        if line.startswith("SSBOND"):
            ssbonds.append(line)
        elif line.startswith("ATOM"):
            # Create a map to quickly find ATOM records by residue sequence number, chain ID, and atom name
            chain_id = line[21].strip()
            res_seq_num = line[22:26].strip()
            atom_name = line[12:16].strip()
            x = float(line[30:38].strip())
            y = float(line[38:46].strip())
            z = float(line[46:54].strip())
            key = (chain_id, res_seq_num, atom_name)
            atom_list[key] = {"line": line, "x": x, "y": y, "z": z}
            if verbose:
                print(
                    f"Found ATOM record for chain {chain_id}, residue {res_seq_num}, atom {atom_name}"
                )

    # Extract the ATOM records corresponding to SSBOND
    ssbond_atom_list = {"ssbonds": ssbonds, "atoms": {}, "pairs": pairs}
    for ssbond in ssbonds:
        parts = ssbond.split()
        chain_id1 = parts[3]
        res_seq_num1 = parts[4]
        chain_id2 = parts[6]
        res_seq_num2 = parts[7]

        # Add the corresponding ATOM records to the ssbond_atom_list
        for atom_name in ["N", "CA", "C", "O", "CB", "SG"]:
            atom_record1 = atom_list.get((chain_id1, res_seq_num1, atom_name))
            atom_record2 = atom_list.get((chain_id2, res_seq_num2, atom_name))
            if atom_record1:
                ssbond_atom_list["atoms"][
                    (chain_id1, res_seq_num1, atom_name)
                ] = atom_record1
            else:
                errors.append(
                    f"Atom record not found for chain {chain_id1}, residue {res_seq_num1}, atom {atom_name}"
                )
                if verbose:
                    _logger.error(
                        f"Atom record not found for chain {chain_id1}, residue {res_seq_num1}, atom {atom_name}"
                    )

            if atom_record2:
                ssbond_atom_list["atoms"][
                    (chain_id2, res_seq_num2, atom_name)
                ] = atom_record2
            else:
                errors.append(
                    f"Atom record not found for chain {chain_id2}, residue {res_seq_num2}, atom {atom_name}"
                )
                if verbose:
                    _logger.error(
                        f"Atom record not found for chain {chain_id2}, residue {res_seq_num2}, atom {atom_name}"
                    )

        # Collect phi/psi related atoms
        def get_phipsi_atoms(chain_id, res_seq_num):
            phipsi_atoms = {}
            for offset in [-1, 1]:
                for atom_name in ["N", "C"]:
                    key = (chain_id, str(int(res_seq_num) + offset), atom_name)
                    atom_record = atom_list.get(key)
                    if atom_record:
                        if f"{res_seq_num}{offset}" not in phipsi_atoms:
                            phipsi_atoms[f"{res_seq_num}{offset}"] = {}
                        phipsi_atoms[f"{res_seq_num}{offset}"][atom_name] = [
                            atom_record["x"],
                            atom_record["y"],
                            atom_record["z"],
                        ]
                    else:
                        errors.append(
                            f"Atom record not found for chain {chain_id}, residue {str(int(res_seq_num) + offset)}, atom {atom_name}"
                        )
                        if verbose:
                            _logger.error(
                                f"Atom record not found for chain {chain_id}, residue {str(int(res_seq_num) + offset)}, atom {atom_name}"
                            )
            return phipsi_atoms

        phipsi = {
            "proximal-1": get_phipsi_atoms(chain_id1, res_seq_num1).get(
                f"{res_seq_num1}-1", {}
            ),
            "proximal+1": get_phipsi_atoms(chain_id1, res_seq_num1).get(
                f"{res_seq_num1}+1", {}
            ),
            "distal-1": get_phipsi_atoms(chain_id2, res_seq_num2).get(
                f"{res_seq_num2}-1", {}
            ),
            "distal+1": get_phipsi_atoms(chain_id2, res_seq_num2).get(
                f"{res_seq_num2}+1", {}
            ),
        }

        # Add the pair information to the pairs list
        pairs.append(
            {
                "proximal": (chain_id1, res_seq_num1),
                "distal": (chain_id2, res_seq_num2),
                "chains": (chain_id1, chain_id2),
                "phipsi": phipsi,
            }
        )

    return ssbond_atom_list, len(ssbonds), len(errors)

In [None]:
def initialize_disulfide_from_coords(
    ssbond_atom_data,
    pdb_id,
    proximal_chain_id,
    distal_chain_id,
    proximal,
    distal,
    resolution,
    quiet=True,
) -> Disulfide:
    """
    Initialize a new Disulfide object with atomic coordinates from
    the proximal and distal coordinates, typically taken from a PDB file.
    This routine is primarily used internally when building the compressed
    database.

    :param resolution: structure resolution
    :param quiet: Quiet or noisy parsing, defaults to True
    :raises DisulfideConstructionWarning: Raised when not parsed correctly
    """
    import warnings
    import logging
    from Bio.PDB import Vector
    from proteusPy.ssparser import (
        get_residue_atom_coordinates,
        get_phipsi_atom_coordinates,
    )

    ssbond_name = f"{pdb_id}_{proximal}{proximal_chain_id}_{distal}{distal_chain_id}"
    new_ss = Disulfide(ssbond_name)

    new_ss.pdb_id = pdb_id
    new_ss.resolution = resolution
    prox_atom_list = []
    dist_atom_list = []

    if quiet:
        _logger.setLevel(logging.ERROR)
        # Suppress all warnings from Biopython
        warnings.filterwarnings("ignore", module="Bio")

    # set the objects proximal and distal values
    new_ss.set_resnum(proximal, distal)

    if resolution is not None:
        new_ss.resolution = resolution
    else:
        new_ss.resolution = -1.0

    new_ss.proximal_chain = proximal_chain_id
    new_ss.distal_chain = distal_chain_id

    new_ss.proximal_residue_fullid = proximal
    new_ss.distal_residue_fullid = distal

    # turn off warnings, only report errors
    if quiet:
        _logger.setLevel(logging.ERROR)
        warnings.filterwarnings("ignore", module="Bio")

    # Get the coordinates for the proximal and distal residues as vectors
    # so we can do math on them later. Trap errors here to avoid problems
    # with missing residues or atoms.

    # proximal residue

    try:
        prox_atom_list = get_residue_atom_coordinates(
            ssbond_atom_data, proximal_chain_id, proximal
        )

        n1 = prox_atom_list[0]
        ca1 = prox_atom_list[1]
        c1 = prox_atom_list[2]
        o1 = prox_atom_list[3]
        cb1 = prox_atom_list[4]
        sg1 = prox_atom_list[5]

    except Exception:
        # i'm torn on this. there are a lot of missing coordinates, so is
        # it worth the trouble to note them? I think so.
        _logger.error(f"Invalid/missing coordinates for: {id}, proximal: {prox}")
        return False

    # distal residue
    try:
        dist_atom_list = get_residue_atom_coordinates(
            ssbond_atom_data, distal_chain_id, distal
        )
        n2 = dist_atom_list[0]
        ca2 = dist_atom_list[1]
        c2 = dist_atom_list[2]
        o2 = dist_atom_list[3]
        cb2 = dist_atom_list[4]
        sg2 = dist_atom_list[5]

    except Exception:
        _logger.error(f"Invalid/missing coordinates for: {id}, distal: {dist}")
        return False

    # previous residue and next residue - optional, used for phi, psi calculations
    try:
        prevprox_atom_list = get_phipsi_atom_coordinates(
            ssbond_atom_data, proximal_chain_id, "proximal-1"
        )

        nextprox_atom_list = get_phipsi_atom_coordinates(
            ssbond_atom_data, proximal_chain_id, "proximal+1"
        )

        prevdist_atom_list = get_phipsi_atom_coordinates(
            ssbond_atom_data, distal_chain_id, "distal-1"
        )

        nextdist_atom_list = get_phipsi_atom_coordinates(
            ssbond_atom_data, distal_chain_id, "distal+1"
        )

        # list is N, C
        cprev_prox = prevprox_atom_list[1]
        nnext_prox = nextprox_atom_list[0]

        cprev_dist = prevdist_atom_list[1]
        nnext_dist = nextdist_atom_list[0]

        # compute phi, psi for prox and distal
        new_ss.phiprox = np.degrees(calc_dihedral(cprev_prox, n1, ca1, c1))
        new_ss.psiprox = np.degrees(calc_dihedral(n1, ca1, c1, nnext_prox))
        new_ss.phidist = np.degrees(calc_dihedral(cprev_dist, n2, ca2, c2))
        new_ss.psidist = np.degrees(calc_dihedral(n2, ca2, c2, nnext_dist))

    except Exception:
        _logger.warning(
            f"Missing coords for: {id} {prox-1} or {dist+1}, SS {prox}-{dist}, phi/psi not computed."
        )
        cprev_prox = nnext_prox = cprev_dist = nnext_dist = Vector(-1.0, -1.0, -1.0)
        new_ss.missing_atoms = True

    # update the positions and conformation
    new_ss.set_positions(
        n1,
        ca1,
        c1,
        o1,
        cb1,
        sg1,
        n2,
        ca2,
        c2,
        o2,
        cb2,
        sg2,
        cprev_prox,
        nnext_prox,
        cprev_dist,
        nnext_dist,
    )

    # calculate and set the disulfide dihedral angles
    new_ss.chi1 = np.degrees(calc_dihedral(n1, ca1, cb1, sg1))
    new_ss.chi2 = np.degrees(calc_dihedral(ca1, cb1, sg1, sg2))
    new_ss.chi3 = np.degrees(calc_dihedral(cb1, sg1, sg2, cb2))
    new_ss.chi4 = np.degrees(calc_dihedral(sg1, sg2, cb2, ca2))
    new_ss.chi5 = np.degrees(calc_dihedral(sg2, cb2, ca2, n2))
    new_ss.rho = np.degrees(calc_dihedral(n1, ca1, ca2, n2))

    new_ss.ca_distance = distance3d(new_ss.ca_prox, new_ss.ca_dist)
    new_ss.cb_distance = distance3d(new_ss.cb_prox, new_ss.cb_dist)
    new_ss.torsion_array = np.array(
        (new_ss.chi1, new_ss.chi2, new_ss.chi3, new_ss.chi4, new_ss.chi5)
    )
    new_ss.compute_torsion_length()

    # calculate and set the SS bond torsional energy
    new_ss.compute_torsional_energy()

    # compute and set the local coordinates
    new_ss.compute_local_coords()

    # turn warnings back on
    if quiet:
        _logger.setLevel(logging.WARNING)
        warnings.filterwarnings("default", module="Bio")
    return new_ss

In [None]:
from proteusPy import MODEL_DIR
from proteusPy.logger_config import get_logger

_logger = get_logger("Testing")


def Nload_disulfides_from_id(
    pdb_id: str,
    pdb_dir=MODEL_DIR,
    model_numb=0,
    verbose=False,
    quiet=True,
    dbg=False,
    cutoff=-1.0,
) -> DisulfideList:
    """
    Loads the Disulfides by PDB ID and returns a ```DisulfideList``` of Disulfide objects.
    Assumes the file is downloaded in the pdb_dir path.

    *NB:* Requires EGS-Modified BIO.parse_pdb_header.py from https://github.com/suchanek/biopython

    :param pdb_id: the name of the PDB entry.
    :param pdb_dir: path to the PDB files, defaults to MODEL_DIR - this is: PDB_DIR/good and are
    the pre-parsed PDB files that have been scanned by the DisulfideDownloader program.
    :param model_numb: model number to use, defaults to 0 for single structure files.
    :param verbose: print info while parsing
    :return: a list of Disulfide objects initialized from the file.

    Example:

    PDB_DIR defaults to os.getenv('PDB').
    To load the Disulfides from the PDB ID 5rsa we'd use the following:

    >>> from proteusPy import DisulfideList, load_disulfides_from_id
    >>> from proteusPy.ProteusGlobals import DATA_DIR
    >>> SSlist = DisulfideList([],'5rsa')
    >>> SSlist = load_disulfides_from_id('5rsa', pdb_dir=DATA_DIR, verbose=False)
    >>> SSlist
    [<Disulfide 5rsa_26A_84A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_40A_95A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_58A_110A, Source: 5rsa, Resolution: 2.0 Å>, <Disulfide 5rsa_65A_72A, Source: 5rsa, Resolution: 2.0 Å>]
    """
    import copy
    import os
    import warnings

    from proteusPy import Disulfide, extract_ssbonds_and_atoms, DisulfideList
    from Bio.PDB import PDBParser

    i = 1
    proximal = distal = -1
    chain1_id = chain2_id = ""

    resolution = -1.0

    parser = PDBParser(PERMISSIVE=True)

    # Biopython uses the Structure -> Model -> Chain hierarchy to organize
    # structures. All are iterable.
    structure_fname = os.path.join(pdb_dir, f"pdb{pdb_id}.ent")
    structure = parser.get_structure(pdb_id, file=structure_fname)
    model = structure[model_numb]

    if dbg:
        print(f"-> load_disulfide_from_id() - Parsing structure: {pdb_id}:")

    SSList = DisulfideList([], pdb_id, resolution)

    # list of tuples with (proximal distal chaina chainb)
    # ssbonds = parse_ssbond_header_rec(ssbond_dict)

    ssbond_atom_list, num_ssbonds, errors = extract_ssbonds_and_atoms(
        structure_fname, verbose=verbose
    )

    # with warnings.catch_warnings():
    if quiet:
        # _logger.setLevel(logging.ERROR)
        warnings.filterwarnings("ignore", category=BiopythonWarning)

    for pair in ssbond_atom_list["pairs"]:
        proximal, chain1_id = pair["proximal"]
        distal, chain2_id = pair["distal"]

        if not proximal.isnumeric() or not distal.isnumeric():
            mess = f" -> load_disulfides_from_id(): Cannot parse SSBond record (non-numeric IDs):\
            {pdb_id} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}, ignoring."
            _logger.error(mess)
            continue
        else:
            proximal = int(proximal)
            distal = int(distal)

        if proximal == distal:
            if verbose:
                mess = f" -> load_disulfides_from_id(): SSBond record has (proximal == distal):\
                {pdb_id} Prox: {proximal} {chain1_id} Dist: {distal} {chain2_id}."
                _logger.info(mess)

        if verbose:
            _logger.info(
                f"SSBond: {i}: {pdb_id}: {proximal} {chain1_id} - {distal} {chain2_id}"
            )

        res = initialize_disulfide_from_coords(
            ssbond_atom_data,
            pdb_id,
            chain1_id,
            chain2_id,
            resolution,
            quiet=quiet,
        )
        if res:
            SSList.append(new_ss)
        i += 1

    if quiet:
        # _logger.setLevel(logging.WARNING)
        warnings.filterwarnings("ignore", category=BiopythonWarning)

    if cutoff > 0:
        SSList = SSList.filter_by_distance(cutoff)

    return copy.deepcopy(SSList)

In [None]:
def extract_coordinates_from_pairs(ssbond_atom_list):
    """
    Extracts coordinates for all pairs found in the ssbond_atom_list.

    Args:
    - ssbond_atom_list (dict): The dictionary containing SSBOND records, ATOM records, and pairs.

    Returns:
    - list: A list of tuples containing the coordinates for each pair.
    """
    coordinates = []

    for pair in ssbond_atom_list["pairs"]:
        proximal_chain, proximal_res = pair["proximal"]
        distal_chain, distal_res = pair["distal"]

        # Extract coordinates for proximal residue
        proximal_coords = []
        for atom_name in ["N", "CA", "C", "O", "CB", "SG"]:
            atom_key = (proximal_chain, proximal_res, atom_name)
            if atom_key in ssbond_atom_list["atoms"]:
                atom = ssbond_atom_list["atoms"][atom_key]
                proximal_coords.append((atom["x"], atom["y"], atom["z"]))

        # Extract coordinates for distal residue
        distal_coords = []
        for atom_name in ["N", "CA", "C", "O", "CB", "SG"]:
            atom_key = (distal_chain, distal_res, atom_name)
            if atom_key in ssbond_atom_list["atoms"]:
                atom = ssbond_atom_list["atoms"][atom_key]
                distal_coords.append((atom["x"], atom["y"], atom["z"]))

        coordinates.append((proximal_coords, distal_coords))

    return coordinates


# Example usage
ssbond_atom_list, num_ssbonds, errors = extract_ssbonds_and_atoms(
    "/Users/egs/PDB/pdb2qpn.ent"
)
coordinates = extract_coordinates_from_pairs(ssbond_atom_list)
for proximal_coords, distal_coords in coordinates:
    print("Proximal Coordinates:", proximal_coords)
    print("Distal Coordinates:", distal_coords)

In [None]:
def get_atom_coordinates(ssbond_atom_list, chain_id, res_id, atom_id):
    """
    Retrieves the x, y, z coordinates for a given atom in a residue.

    Args:
    - ssbond_atom_list (dict): The dictionary containing SSBOND records, ATOM records, and pairs.
    - chain_id (str): The chain ID of the residue.
    - res_id (str): The residue ID.
    - atom_id (str): The atom ID.

    Returns:
    - tuple: A tuple containing the x, y, z coordinates of the atom, or None if the atom is not found.
    """
    atom_key = (chain_id, res_id, atom_id)
    if atom_key in ssbond_atom_list["atoms"]:
        atom = ssbond_atom_list["atoms"][atom_key]
        return atom["x"], atom["y"], atom["z"]
    else:
        return None

In [None]:
# Example usage with verbose logging
input_pdb_file = "/Users/egs/PDB/pdb2qpn.ent"
ssbond_atom_data, num_ssbonds, errors = extract_ssbonds_and_atoms(
    input_pdb_file, verbose=True
)
print(f"Number of SSBOND records found: {num_ssbonds}, errors: {errors}")
print("Extracted data:")
print(ssbond_atom_data)
coordinates = get_atom_coordinates(ssbond_atom_data, "A", "63", "CA")
if coordinates:
    print("Coordinates:", coordinates)
else:
    print("Atom not found.")

In [None]:
def get_residue_coordinates(ssbond_atom_list, chain_id, res_id):
    """
    Retrieves the x, y, z coordinates for all atoms in a given residue.

    Args:
    - ssbond_atom_list (dict): The dictionary containing SSBOND records, ATOM records, and pairs.
    - chain_id (str): The chain ID of the residue.
    - res_id (str): The residue ID.

    Returns:
    - list: A list of tuples containing the x, y, z coordinates of each atom in the residue, or an empty list if no atoms are found.
    """
    coordinates = []
    for atom_key, atom in ssbond_atom_list["atoms"].items():
        if atom_key[0] == chain_id and atom_key[1] == res_id:
            coordinates.append((atom["x"], atom["y"], atom["z"]))
    return coordinates

In [None]:
# Example usage
ssbond_atom_list, num_ssbonds, errors = extract_ssbonds_and_atoms(
    "/Users/egs/PDB/good/pdb4yys.ent", verbose=False
)
print(f"Number of SSBOND records found: {num_ssbonds}, errors: {errors}")
print(f"Pairs: {ssbond_atom_list['pairs']}")

coordinates = get_residue_coordinates(ssbond_atom_list, "A", "22")


if coordinates:
    print("Coordinates:", coordinates)
else:
    print("No atoms found for the given residue.")

In [None]:
found, errors = check_header_from_file(
    "/Users/egs/PDB/bad/pdb4ywb.ent", verbose=True, dbg=True
)
found

In [None]:
ss = load_disulfides_from_id("4k3i", pdb_dir="/Users/egs/PDB/bad", verbose=True)


for ssbond in ss:
    print(f"Disulfide: {ssbond}, Ca: {ssbond.ca_distance}")

In [None]:
def find_disulfides(pdb, id) -> DisulfideList:
    """
    Find disulfide in pdb object.
    """

    indices = pdb.SSDict[id]
    print(f"indices: {indices}")
    res = DisulfideList([], id)
    sslist = pdb.SSList
    for ind in indices:
        print(f"ind: {ind} sslist[ind]: {sslist[ind]}")
        res.append(sslist[ind])
    return res

In [None]:
# PDB_SS['4yys'] return a list of SS

sslist = find_disulfides(pdb, "4yys")
sslist

In [None]:
def find_null_pdb_indices(pdb, limit=1000):
    """
    Loops over pdb entries from 0 to limit (default 1000) and checks each entry for null.
    Returns a list of indices with null entries.
    """
    null_indices = []
    ids = pdb.IDList
    for i in ids:
        if len(pdb[i]) == 0:
            null_indices.append(i)
    return null_indices


def find_null_pdb_keys(pdb, limit=1000):
    """
    Loops over pdb entries from 0 to limit (default 1000) and checks each entry for null.
    Returns a list of indices with null entries.
    """
    null_indices = []
    ssdict = pdb.SSDict
    for i in ssdict:
        if len(ssdict[i]) == 0:
            null_indices.append(i)
    return null_indices

In [None]:
missing = find_null_pdb_indices(pdb)
len(missing)
missing

In [None]:
bad = []

for id in missing:
    res = load_disulfides_from_id(id, verbose=True)
    if len(res) == 0:
        print(f"ID {id} is missing disulfides")
    else:
        for ss in res:
            if ss.ca_distance > 8.0:
                print(f"ID {id} has a long disulfide: {ss}")
                bad.append(ss.name)


bad

In [None]:
pdb["6vkk_845A_845C"]

In [None]:
idlist = pdb.IDList
"6vkk" in idlist

In [None]:
def find_disulfides(pdb, id) -> DisulfideList:
    """
    Find disulfide in pdb object.
    """

    indices = pdb.SSDict[id]
    # print(f"indices: {indices}")
    res = DisulfideList([], id)
    sslist = pdb.SSList
    for ind in indices:
        # print(f"ind: {ind} sslist[ind]: {sslist[ind]}")
        res.append(sslist[ind])
    return res

In [None]:
# PDB_SS['4yys'] return a list of SS

sslist = find_disulfides(pdb, "4wmy")
sslist

In [None]:
pdb["4wmy"]

In [None]:
find_null_pdb_indices(pdb)

In [None]:
sslist, xchain = prune_extra_ss(wym)
sslist

In [None]:
from proteusPy import remove_duplicate_ss

pruned = remove_duplicate_ss(wym)
pruned

In [None]:
def find_string_in_list(target_string, list_of_strings):
    """
    Searches for a target string in a list of strings and returns the index if found.
    Returns -1 if the target string is not found.
    """
    try:
        return list_of_strings.index(target_string)
    except ValueError:
        return -1

In [None]:
find_string_in_list("4wmy", pdb.IDList)
pdb["4wmy"]

In [None]:
wmy = pdb["4wmy"]
wmy

In [None]:
ss1 = pdb["4yys_22A_65A"]
ss2 = pdb["4yys_22B_65B"]
ss1 == ss2

In [None]:
ss1.pprint_all()

In [None]:
ss2.pprint_all()

In [None]:
def remove_duplicate_ss(sslist: DisulfideList) -> DisulfideList:
    pruned = []
    for ss in sslist:
        if ss not in pruned:
            pruned.append(ss)
    return pruned

In [None]:
yys = pdb["4wmy"]
yys

In [None]:
pruned = remove_duplicate_ss(yys)
pruned

In [None]:
def compare_dihedrals(self, other) -> float:
    """
    Compare the Disulfide object's dihedrals to another Disulfide object's dihedrals.

    :param other: Disulfide object to compare to
    :return: The length of the difference of the two sets of dihedral angles
    :raises TypeError: If the input is not a Disulfide object
    """
    import numpy
    from Bio.PDB import Vector

    def cmp_vec(v1: Vector, v2: Vector) -> float:
        "Return the length of the difference between the two vectors."
        _diff = v2 - v1
        _len = _diff.norm()
        return _len

    if isinstance(other, Disulfide):
        dihed1 = Vector(self.torsion_array)
        dihed2 = Vector(other.torsion_array)
        return cmp_vec(dihed1, dihed2)
    else:
        raise TypeError("Input must be a Disulfide object.")

In [None]:
def Torsion_RMS(first, other) -> float:
    """
    Calculate the RMS distance between the dihedral angles of self and another Disulfide.
    :param other: Comparison Disulfide
    :return: RMS distance (deg)
    """
    import math

    # Get internal coordinates of both objects
    ic1 = first.torsion_array
    ic2 = other.torsion_array

    # Compute the sum of squared differences between corresponding internal coordinates
    totsq = sum((p1 - p2) ** 2 for p1, p2 in zip(ic1, ic2))
    # Compute the mean of the squared distances
    totsq /= len(ic1)

    # Take the square root of the mean to get the RMS distance
    return math.sqrt(totsq)

In [None]:
ss1 = pdb[0]
ss1

In [None]:
Torsion_RMS(ss1, ss1)