This notebook is a general scratchpad for me so a lot of it might not run correctly. -egs-

In [None]:
import proteusPy

from proteusPy import (
    DisulfideList,
    Disulfide,
    Load_PDB_SS,
    load_disulfides_from_id,
    prune_extra_ss,
    check_header_from_file,
    Vector3D,
)

from proteusPy.logger_config import get_logger
from proteusPy.ssparser import (
    extract_ssbonds_and_atoms,
    print_disulfide_bond_info_dict,
    get_phipsi_atoms_coordinates,
)
import numpy as np


_logger = get_logger("testing")

In [None]:
# Load the PDB file
pdb = Load_PDB_SS(verbose=True, subset=False)
pdb.describe()

In [None]:
def create_disulfide_dict(disulfide_list):
    """
    Create a dictionary from a list of disulfide objects where the key is the pdb_id
    and the value is a list of indices of the disulfide objects in the list.

    Parameters:
    disulfide_list (list): List of disulfide objects.

    Returns:
    dict: Dictionary with pdb_id as keys and lists of indices as values.
    """
    disulfide_dict = {}
    for index, disulfide in enumerate(disulfide_list):
        if disulfide.pdb_id not in disulfide_dict:
            disulfide_dict[disulfide.pdb_id] = []
        disulfide_dict[disulfide.pdb_id].append(index)
    return disulfide_dict

In [None]:
ssdict = create_disulfide_dict(pdb.SSList)
ssdict

In [None]:
pdb.SSDict

In [None]:
pdb[0]
sslist = pdb.SSList
len(sslist)
sslist.minmax_energy

In [None]:
tor_df = sslist.build_torsion_df()

In [None]:
tor_df.describe()

In [None]:
import logging

logger = logging.getLogger("proteusPy.Disulfide")
logger.setLevel(logging.INFO)
logger.info("test")

In [None]:
import logging


def set_logger_level_for_module(pkg_name, level=""):
    logger_dict = logging.Logger.manager.loggerDict
    registered_loggers = [
        name
        for name in logger_dict
        if isinstance(logger_dict[name], logging.Logger) and name.startswith(pkg_name)
    ]
    for logger_name in registered_loggers:
        logger = logging.getLogger(logger_name)
        if level:
            logger.setLevel(level)

    return registered_loggers


# Example usage
pkg_name = "proteusPy"
registered_loggers = set_logger_level_for_module(pkg_name, level=logging.DEBUG)
print(f"Registered loggers for '{pkg_name}':", registered_loggers)

In [None]:
import proteusPy.vector3D

In [None]:
pdb[0]

In [None]:
ss1 = Disulfide()
ss1.Tor

In [None]:
# Example usage:
ssbond_dict, num_ssbonds, errors = extract_ssbonds_and_atoms(
    "/Users/egs/PDB/good/pdb6f99.ent"
)

chain_id = "A"
key = "proximal-1"
phipsi_atoms = get_phipsi_atoms(ssbond_dict, chain_id, key)
print(phipsi_atoms)
print_disulfide_bond_info_dict(ssbond_dict)

In [None]:
import os


def print_disulfide_bond_info_dict(ssbond_atom_data):
    """
    Prints the disulfide bond information in a pretty format.

    Args:
    - ssbond_atom_data (dict): A dictionary containing the SSBOND records and the corresponding ATOM records. The dictionary
          has the following structure:
            {
                "ssbonds": list of SSBOND records (str),
                "atoms": {
                    (chain_id, res_seq_num, atom_name): {
                        "x": x-coordinate (float),
                        "y": y-coordinate (float),
                        "z": z-coordinate (float)
                    },
                    ...
                },
                "pairs": [
                    {
                        "proximal": (chain_id1, res_seq_num1),
                        "distal": (chain_id2, res_seq_num2),
                        "chains": (chain_id1, chain_id2),
                        "phipsi": {
                            "proximal-1": {"N": [x, y, z], "C": [x, y, z]},
                            "proximal+1": {"N": [x, y, z], "C": [x, y, z]},
                            "distal-1": {"N": [x, y, z], "C": [x, y, z]},
                            "distal+1": {"N": [x, y, z], "C": [x, y, z]}
                        }
                    },
                    ...
                ]
            }
    """
    if ssbond_atom_list is None:
        print("No disulfide bonds found.")
        return

    ssbonds = ssbond_atom_data.get("ssbonds", [])
    atoms = ssbond_atom_data.get("atoms", {})
    pairs = ssbond_atom_data.get("pairs", [])

    for pair in pairs:
        proximal = pair["proximal"]
        distal = pair["distal"]
        chain_id1, res_seq_num1 = proximal
        chain_id2, res_seq_num2 = distal

        print(
            f"Disulfide Bond between Chain {chain_id1} Residue {res_seq_num1} and Chain {chain_id2} Residue {res_seq_num2}"
        )
        print(f"Proximal Residue (Chain {chain_id1}, Residue {res_seq_num1}):")
        for atom_name in ["N", "CA", "C", "O", "CB", "SG"]:
            atom_record = atoms.get((chain_id1, res_seq_num1, atom_name))
            if atom_record:
                print(
                    f"  Atom {atom_name}: ({atom_record['x']:.3f}, {atom_record['y']:.3f}, {atom_record['z']:.3f})"
                )
            else:
                print(f"  Atom {atom_name}: Not found")

        print(f"Distal Residue (Chain {chain_id2}, Residue {res_seq_num2}):")
        for atom_name in ["N", "CA", "C", "O", "CB", "SG"]:
            atom_record = atoms.get((chain_id2, res_seq_num2, atom_name))
            if atom_record:
                print(
                    f"  Atom {atom_name}: ({atom_record['x']:.3f}, {atom_record['y']:.3f}, {atom_record['z']:.3f})"
                )
            else:
                print(f"  Atom {atom_name}: Not found")

        print("Phi/Psi Atoms:")
        for key, phipsi_atoms in pair["phipsi"].items():
            print(f"  {key}:")
            for atom_name, coords in phipsi_atoms.items():
                res_seq_num = (
                    int(res_seq_num1) - 1
                    if "proximal-1" in key
                    else (
                        int(res_seq_num1) + 1
                        if "proximal+1" in key
                        else (
                            int(res_seq_num2) - 1
                            if "distal-1" in key
                            else int(res_seq_num2) + 1
                        )
                    )
                )
                print(
                    f"    Atom {atom_name} (Residue {res_seq_num}): ({coords[0]:.3f}, {coords[1]:.3f}, {coords[2]:.3f})"
                )

        print("-" * 50)


# Example usage:
ssbond_dict, num_ssbonds, errors = extract_ssbonds_and_atoms(
    "/Users/egs/PDB/good/pdb6f99.ent"
)

print_disulfide_bond_info(ssbond_dict)

In [None]:
ss = load_disulfides_from_id("7o6v", pdb_dir="/Users/egs/PDB/good", verbose=True)


for ssbond in ss:
    print(f"Disulfide: {ssbond}, Ca: {ssbond.ca_distance}")

In [None]:
def find_disulfides(pdb, id) -> DisulfideList:
    """
    Find disulfide in pdb object.
    """

    indices = pdb.SSDict[id]
    print(f"indices: {indices}")
    res = DisulfideList([], id)
    sslist = pdb.SSList
    for ind in indices:
        print(f"ind: {ind} sslist[ind]: {sslist[ind]}")
        res.append(sslist[ind])
    return res

In [None]:
# PDB_SS['4yys'] return a list of SS

sslist = find_disulfides(pdb, "4yys")
sslist

In [None]:
def find_null_pdb_indices(pdb, limit=1000):
    """
    Loops over pdb entries from 0 to limit (default 1000) and checks each entry for null.
    Returns a list of indices with null entries.
    """
    null_indices = []
    ids = pdb.IDList
    for i in ids:
        if len(pdb[i]) == 0:
            null_indices.append(i)
    return null_indices


def find_null_pdb_keys(pdb, limit=1000):
    """
    Loops over pdb entries from 0 to limit (default 1000) and checks each entry for null.
    Returns a list of indices with null entries.
    """
    null_indices = []
    ssdict = pdb.SSDict
    for i in ssdict:
        if len(ssdict[i]) == 0:
            null_indices.append(i)
    return null_indices

In [None]:
missing = find_null_pdb_indices(pdb)
len(missing)
missing

In [None]:
bad = []

for id in missing:
    res = load_disulfides_from_id(id, verbose=True)
    if len(res) == 0:
        print(f"ID {id} is missing disulfides")
    else:
        for ss in res:
            if ss.ca_distance > 8.0:
                print(f"ID {id} has a long disulfide: {ss}")
                bad.append(ss.name)


bad

In [None]:
pdb["6vkk_845A_845C"]

In [None]:
idlist = pdb.IDList
"6vkk" in idlist

In [None]:
def find_disulfides(pdb, id) -> DisulfideList:
    """
    Find disulfide in pdb object.
    """

    indices = pdb.SSDict[id]
    # print(f"indices: {indices}")
    res = DisulfideList([], id)
    sslist = pdb.SSList
    for ind in indices:
        # print(f"ind: {ind} sslist[ind]: {sslist[ind]}")
        res.append(sslist[ind])
    return res

In [None]:
# PDB_SS['4yys'] return a list of SS

sslist = find_disulfides(pdb, "4wmy")
sslist

In [None]:
pdb["4wmy"]

In [None]:
find_null_pdb_indices(pdb)

In [None]:
sslist, xchain = prune_extra_ss(wym)
sslist

In [None]:
from proteusPy import remove_duplicate_ss

pruned = remove_duplicate_ss(wym)
pruned

In [None]:
def find_string_in_list(target_string, list_of_strings):
    """
    Searches for a target string in a list of strings and returns the index if found.
    Returns -1 if the target string is not found.
    """
    try:
        return list_of_strings.index(target_string)
    except ValueError:
        return -1

In [None]:
find_string_in_list("4wmy", pdb.IDList)
pdb["4wmy"]

In [None]:
wmy = pdb["4wmy"]
wmy

In [None]:
ss1 = pdb["4yys_22A_65A"]
ss2 = pdb["4yys_22B_65B"]
ss1 == ss2

In [None]:
ss1.pprint_all()

In [None]:
ss2.pprint_all()

In [None]:
def remove_duplicate_ss(sslist: DisulfideList) -> DisulfideList:
    pruned = []
    for ss in sslist:
        if ss not in pruned:
            pruned.append(ss)
    return pruned

In [None]:
yys = pdb["4wmy"]
yys

In [None]:
pruned = remove_duplicate_ss(yys)
pruned

In [None]:
def compare_dihedrals(self, other) -> float:
    """
    Compare the Disulfide object's dihedrals to another Disulfide object's dihedrals.

    :param other: Disulfide object to compare to
    :return: The length of the difference of the two sets of dihedral angles
    :raises TypeError: If the input is not a Disulfide object
    """
    import numpy
    from Bio.PDB import Vector

    def cmp_vec(v1: Vector, v2: Vector) -> float:
        "Return the length of the difference between the two vectors."
        _diff = v2 - v1
        _len = _diff.norm()
        return _len

    if isinstance(other, Disulfide):
        dihed1 = Vector(self.torsion_array)
        dihed2 = Vector(other.torsion_array)
        return cmp_vec(dihed1, dihed2)
    else:
        raise TypeError("Input must be a Disulfide object.")

In [None]:
def Torsion_RMS(first, other) -> float:
    """
    Calculate the RMS distance between the dihedral angles of self and another Disulfide.
    :param other: Comparison Disulfide
    :return: RMS distance (deg)
    """
    import math

    # Get internal coordinates of both objects
    ic1 = first.torsion_array
    ic2 = other.torsion_array

    # Compute the sum of squared differences between corresponding internal coordinates
    totsq = sum((p1 - p2) ** 2 for p1, p2 in zip(ic1, ic2))
    # Compute the mean of the squared distances
    totsq /= len(ic1)

    # Take the square root of the mean to get the RMS distance
    return math.sqrt(totsq)

In [None]:
ss1 = pdb[0]
ss1

In [None]:
Torsion_RMS(ss1, ss1)