In [19]:
# ZZ_scratch.ipynb - This is a general scratchpad used for code development and testing. Don't expect it to work as written
# Eric G. Suchanek, PhD. 2024

In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
from proteusPy import (
    Load_PDB_SS,
    Disulfide,
    DisulfideList,
    DisulfideLoader,
    torsion_to_class_string,
)

from proteusPy.ProteusGlobals import *

HOME = Path.home()
PDB = Path(os.getenv("PDB", HOME / "pdb"))

MODEL_DIR = PDB / "good"

PDB_DATA_DIR = PDB / "data"
SAVE_DIR = HOME / "Documents" / "proteusPyDocs" / "classes"
REPO_DIR = HOME / "repos" / "proteusPy" / "data"

OCTANT = SAVE_DIR / "octant"
OCTANT.mkdir(parents=True, exist_ok=True)

BINARY = SAVE_DIR / "binary"
BINARY.mkdir(parents=True, exist_ok=True)

SEXTANT = SAVE_DIR / "sextant"
SEXTANT.mkdir(parents=True, exist_ok=True)

PBAR_COLS = 78

In [None]:
pdb = Load_PDB_SS(verbose=True, subset=False, cutoff=8.0, force=False)

In [13]:
import math
import pyvista as pv
import numpy as np

# Define global constants
BOND_COLOR = "grey"
BS_SCALE = 0.3
SPECULARITY = 0.5
SPEC_POWER = 30
BOND_RADIUS = 0.1

# Example atom colors and radii (extend as needed)
ATOM_COLORS = {
    "N": "blue",
    "C": "black",
    "O": "red",
    "SG": "yellow",
    # Add more atom types if necessary
}
ATOM_RADII_CPK = {
    "N": 0.75,
    "C": 0.70,
    "O": 0.66,
    "SG": 1.05,
    # Add more atom types if necessary
}
ATOM_RADII_COVALENT = {
    "N": 0.75,
    "C": 0.70,
    "O": 0.66,
    "SG": 1.05,
    # Add more atom types if necessary
}


class DisulfideBondRenderer:
    """
    A renderer for visualizing disulfide bonds using PyVista.
    """

    # Static atoms array corresponding to each coordinate row
    ATOMS = [
        "N",  # 0
        "C",  # 1
        "C",  # 2
        "O",  # 3
        "C",  # 4
        "SG",  # 5
        "N",  # 6
        "C",  # 7
        "C",  # 8
        "O",  # 9
        "C",  # 10
        "SG",  # 11
        "C",  # 12
        "N",  # 13
        "C",  # 14
        "N",  # 15
    ]

    # Static bonds array for disulfide bonds (indices correspond to ATOMS)
    BONDS = [
        (0, 1),  # N-Ca
        (1, 2),  # Ca-C
        (2, 3),  # C-O
        (1, 4),  # Ca-Cb
        (4, 5),  # Cb-SG
        (6, 7),  # N-Ca (next residue)
        (7, 8),  # Ca-C
        (8, 9),  # C-O
        (7, 10),  # Ca-Cb
        (10, 11),  # Cb-SG
        (5, 11),  # SG-SG (disulfide bond)
        (12, 0),  # C_prev_prox-N
        (2, 13),  # C-N_next_prox
        (14, 6),  # C_prev_dist-N_dist
        (8, 15),  # C-N_next_dist
    ]

    def __init__(self, ss):
        """
        Initialize the DisulfideBondRenderer.

        Parameters
        ----------
        ss : Disulfide
            A Disulfide object containing atom coordinates and metadata.
            The Disulfide class must have the following attributes:
                - internal_coords_array (np.ndarray): Shape (16, 3)
                - modelled (bool)
                - missing_atoms (bool)
                - cofmass (np.ndarray): Shape (3,)
        """

        # Validate the input object
        required_attributes = [
            "internal_coords_array",
            "modelled",
            "missing_atoms",
            "cofmass",
        ]
        for attr in required_attributes:
            if not hasattr(ss, attr):
                raise AttributeError(
                    f"The Disulfide object must have '{attr}' attribute."
                )

        # Initialize attributes
        self._internal_coords = ss.internal_coords_array.copy()
        self.modelled = ss.modelled
        self.missing_atoms = ss.missing_atoms
        self.cofmass = ss.cofmass

    def _draw_bonds(
        self,
        pvp,
        bradius=BOND_RADIUS,
        style="sb",
        bcolor=BOND_COLOR,
        missing=True,
        all_atoms=True,
        res=100,
    ):
        """
        Generate the appropriate PyVista cylinder objects to represent
        a particular disulfide bond. This utilizes a connection table
        for the starting and ending atoms and a color table for the
        bond colors. Used internally.

        Parameters
        ----------
        pvp : pv.Plotter
            Input plotter object to be updated.
        bradius : float, optional
            Bond radius, by default BOND_RADIUS.
        style : str, optional
            Bond style. One of 'sb', 'plain', 'pd'.
        bcolor : str, optional
            Bond color for simple bonds, by default BOND_COLOR.
        missing : bool, optional
            True if atoms are missing, False otherwise, by default True.
        all_atoms : bool, optional
            True if rendering all atoms including side chains, False if only backbone rendered, by default True.
        res : int, optional
            Resolution for cylinders, by default 100.

        Returns
        -------
        pv.Plotter
            Updated Plotter object.
        """
        # Define bond connections
        _bond_conn = np.array(
            [
                [0, 1],  # N-Ca
                [1, 2],  # Ca-C
                [2, 3],  # C-O
                [1, 4],  # Ca-Cb
                [4, 5],  # Cb-SG
                [6, 7],  # N-Ca (next residue)
                [7, 8],  # Ca-C
                [8, 9],  # C-O
                [7, 10],  # Ca-Cb
                [10, 11],  # Cb-SG
                [5, 11],  # SG-SG
                [12, 0],  # C_prev_prox-N
                [2, 13],  # C-N_next_prox
                [14, 6],  # C_prev_dist-N_dist
                [8, 15],  # C-N_next_dist
            ]
        )

        _bond_conn_backbone = np.array(
            [
                [0, 1],  # N-Ca
                [1, 2],  # Ca-C
                [1, 4],  # Ca-Cb
                [4, 5],  # Cb-SG
                [6, 7],  # N-Ca
                [7, 8],  # Ca-C
                [7, 10],  # Ca-Cb
                [10, 11],  # Cb-SG
                [5, 11],  # SG-SG
            ]
        )

        # Define bond colors
        _bond_split_colors = np.array(
            [
                ("N", "C"),
                ("C", "C"),
                ("C", "O"),
                ("C", "C"),
                ("C", "SG"),
                ("N", "C"),
                ("C", "C"),
                ("C", "O"),
                ("C", "C"),
                ("C", "SG"),
                ("SG", "SG"),
                # Prev and next C-N bonds - color by atom type
                ("C", "N"),
                ("C", "N"),
                ("C", "N"),
                ("C", "N"),
            ]
        )

        _bond_split_colors_backbone = np.array(
            [
                ("N", "C"),
                ("C", "C"),
                ("C", "C"),
                ("C", "SG"),
                ("N", "C"),
                ("C", "C"),
                ("C", "C"),
                ("C", "SG"),
                ("SG", "SG"),
            ]
        )

        # Select bond connections and colors based on all_atoms flag
        if all_atoms:
            bond_conn = _bond_conn
            bond_split_colors = _bond_split_colors
        else:
            bond_conn = _bond_conn_backbone
            bond_split_colors = _bond_split_colors_backbone

        for i, bond in enumerate(bond_conn):
            if all_atoms:
                # Skip bonds involving missing atoms if necessary
                if missing and i >= 11:
                    continue

            # Get the indices for the origin and destination atoms
            orig, dest = bond

            # Get the bond color based on atom types
            col = bond_split_colors[i]
            orig_col = ATOM_COLORS.get(col[0], bcolor)
            dest_col = ATOM_COLORS.get(col[1], bcolor)

            # Get the coordinates
            prox_pos = coords[origin]
            distal_pos = coords[dest]

            # **Correction**: prox_pos and distal_pos should be from the coords array
            # We'll need to pass coords to this function or make it an instance variable
            # To preserve the original logic, we'll adjust the method to accept coords

            # **Adjusted Approach**: Modify the method to accept coords as a parameter

            # For this implementation, let's assume we pass coords as an additional parameter
            # Update the method signature accordingly

            # **Rewriting with coords parameter**

        # To correctly implement the method, let's redefine it with coords parameter
        # I'll redefine the method below with the correct parameters and logic

        # However, since the user wants to preserve the logic as is,
        # and their existing code has prox_pos and distal_pos from coords,
        # it's better to integrate coords into the draw_bonds method.

        # Therefore, we need to pass coords to the draw_bonds method
        # Here's the corrected implementation:

        # Restarting the method with proper logic

        # **Final Implementation**

        # Define bond connections again
        _bond_conn = np.array(
            [
                [0, 1],  # N-Ca
                [1, 2],  # Ca-C
                [2, 3],  # C-O
                [1, 4],  # Ca-Cb
                [4, 5],  # Cb-SG
                [6, 7],  # N-Ca (next residue)
                [7, 8],  # Ca-C
                [8, 9],  # C-O
                [7, 10],  # Ca-Cb
                [10, 11],  # Cb-SG
                [5, 11],  # SG-SG
                [12, 0],  # C_prev_prox-N
                [2, 13],  # C-N_next_prox
                [14, 6],  # C_prev_dist-N_dist
                [8, 15],  # C-N_next_dist
            ]
        )

        _bond_conn_backbone = np.array(
            [
                [0, 1],  # N-Ca
                [1, 2],  # Ca-C
                [1, 4],  # Ca-Cb
                [4, 5],  # Cb-SG
                [6, 7],  # N-Ca
                [7, 8],  # Ca-C
                [7, 10],  # Ca-Cb
                [10, 11],  # Cb-SG
                [5, 11],  # SG-SG
            ]
        )

        # Colors for the bonds. Index into ATOM_COLORS array
        _bond_split_colors = np.array(
            [
                ("N", "C"),
                ("C", "C"),
                ("C", "O"),
                ("C", "C"),
                ("C", "SG"),
                ("N", "C"),
                ("C", "C"),
                ("C", "O"),
                ("C", "C"),
                ("C", "SG"),
                ("SG", "SG"),
                # Prev and next C-N bonds - color by atom type
                ("C", "N"),
                ("C", "N"),
                ("C", "N"),
                ("C", "N"),
            ]
        )

        _bond_split_colors_backbone = np.array(
            [
                ("N", "C"),
                ("C", "C"),
                ("C", "C"),
                ("C", "SG"),
                ("N", "C"),
                ("C", "C"),
                ("C", "C"),
                ("C", "SG"),
                ("SG", "SG"),
            ]
        )

        # Now, iterate through the bonds and render them
        for i, bond in enumerate(bond_conn):
            if all_atoms:
                # Skip bonds involving missing atoms if necessary
                if missing and i >= 11:
                    continue

            # Get the indices for the origin and destination atoms
            orig, dest = bond

            # Get the bond color based on atom types
            col = bond_split_colors[i]
            orig_col = ATOM_COLORS.get(col[0], bcolor)
            dest_col = ATOM_COLORS.get(col[1], bcolor)

            # Get the coordinates
            prox_pos = coords[orig]
            distal_pos = coords[dest]

            # Compute the direction vector and height
            direction = distal_pos - prox_pos
            height = math.dist(prox_pos, distal_pos) / 2.0

            # Compute split bond origins
            origin1 = prox_pos + 0.25 * direction
            origin2 = prox_pos + 0.75 * direction

            # Adjust bond radius for previous and next residue bonds
            if i >= 11:
                current_bradius = bradius * 0.5  # Make smaller to distinguish
            else:
                current_bradius = bradius

            if style == "plain":
                # Single cylinder for plain style
                cyl = pv.Cylinder(
                    center=prox_pos + 0.5 * direction,
                    direction=direction,
                    radius=bradius,
                    height=height * 2.0,
                    resolution=res,
                    capping=True,
                )
                pvp.add_mesh(
                    cyl,
                    color=bcolor,
                    smooth_shading=True,
                    specular=spec,
                    specular_power=specpow,
                )
            else:
                # Split bonds into two cylinders
                cyl1 = pv.Cylinder(
                    center=origin1,
                    direction=direction,
                    radius=current_bradius,
                    height=height,
                    resolution=res,  # Correct parameter
                    capping=False,
                )
                cyl2 = pv.Cylinder(
                    center=origin2,
                    direction=direction,
                    radius=current_bradius,
                    height=height,
                    resolution=res,  # Correct parameter
                    capping=False,
                )
                pvp.add_mesh(
                    cyl1,
                    color=orig_col,
                    smooth_shading=True,
                    specular=spec,
                    specular_power=specpow,
                )
                pvp.add_mesh(
                    cyl2,
                    color=dest_col,
                    smooth_shading=True,
                    specular=spec,
                    specular_power=specpow,
                )

            # Create and add caps
            cap1 = pv.Sphere(
                center=prox_pos,
                radius=current_bradius,
                theta_resolution=res // 2,
                phi_resolution=res // 2,
            )
            cap2 = pv.Sphere(
                center=distal_pos,
                radius=current_bradius,
                theta_resolution=res // 2,
                phi_resolution=res // 2,
            )
            pvp.add_mesh(
                cap1,
                color=orig_col,
                smooth_shading=True,
                specular=spec,
                specular_power=specpow,
            )
            pvp.add_mesh(
                cap2,
                color=dest_col,
                smooth_shading=True,
                specular=spec,
                specular_power=specpow,
            )

        return pvp  # end draw_bonds

    def _render_atoms(
        self,
        pvp: pv.Plotter,
        coords: np.ndarray,
        style: str,
        bs_scale: float,
        spec: float,
        specpow: int,
        res: int,
    ):
        """
        Render the atoms as spheres based on the selected style.

        Parameters
        ----------
        pvp : pv.Plotter
            PyVista Plotter object.
        coords : np.ndarray
            Coordinates of the atoms.
        style : str
            Rendering style.
        bs_scale : float
            Scale factor for ball-and-stick.
        spec : float
            Specularity.
        specpow : int
            Specular power.
        res : int
            Resolution for spheres.

        Returns
        -------
        None
        """
        for i, atom in enumerate(self.ATOMS):
            if style in ["cpk", "cov", "bs"]:
                if style == "cpk":
                    rad = ATOM_RADII_CPK.get(atom, 0.5)
                elif style == "cov":
                    rad = ATOM_RADII_COVALENT.get(atom, 0.5)
                elif style == "bs":
                    rad = ATOM_RADII_CPK.get(atom, 0.5) * bs_scale
                    if i > 11:
                        rad *= 0.75

                sphere = pv.Sphere(
                    center=coords[i],
                    radius=rad,
                    theta_resolution=res // 2,
                    phi_resolution=res // 2,
                )
                atom_color = ATOM_COLORS.get(atom, "white")
                pvp.add_mesh(
                    sphere,
                    color=atom_color,
                    smooth_shading=True,
                    specular=spec,
                    specular_power=specpow,
                )

    def _render(
        self,
        pvplot: pv.Plotter,
        style="bs",
        plain=False,
        bondcolor=BOND_COLOR,
        bs_scale=BS_SCALE,
        spec=SPECULARITY,
        specpow=SPEC_POWER,
        translate=True,
        bond_radius=BOND_RADIUS,
        res=100,
    ):
        """
        Update the passed PyVista Plotter object with the mesh data for the
        input Disulfide Bond. Used internally

        Parameters
        ----------
        pvplot : pv.Plotter
            PyVista Plotter object.

        style : str, optional
            Rendering style, by default 'bs'. One of 'bs', 'cpk', 'cov', 'sb', 'pd', 'plain'.
            - 'bs': Ball-and-Stick
            - 'cpk': CPK (Corey-Pauling-Koltun)
            - 'cov': Covalent
            - 'sb': Split Bonds
            - 'pd': Proximal-Distal
            - 'plain': Plain Stick
        plain : bool, optional
            Used internally, by default False
        bondcolor : str, optional
            Bond color for simple bonds, by default BOND_COLOR.
        bs_scale : float, optional
            Scale factor (0-1) to reduce the atom sizes for ball and stick, by default BS_SCALE
        spec : float, optional
            Specularity (0-1), where 1 is totally smooth and 0 is rough, by default SPECULARITY
        specpow : int, optional
            Exponent used for specularity calculations, by default SPEC_POWER
        translate : bool, optional
            Flag used internally to indicate if we should translate
            the disulfide to its geometric center of mass, by default True.
        bond_radius : float, optional
            Bond radius, by default BOND_RADIUS
        res : int, optional
            Resolution for spheres and cylinders, by default 100.

        Returns
        -------
        pv.Plotter
            Updated PyVista Plotter object with atoms and bonds.
        """
        _bradius = bond_radius
        coords = self._internal_coords.copy()
        missing_atoms = self.missing_atoms
        clen = coords.shape[0]

        all_atoms = not self.modelled

        if translate:
            cofmass = self.cofmass
            coords = coords - cofmass

        atoms = self.ATOMS
        pvp = pvplot

        # Render atoms based on the selected style
        if style in ["cpk", "cov", "bs"]:
            self._render_atoms(pvp, coords, style, bs_scale, spec, specpow, res)

        # Render bonds based on the selected style
        if style in ["bs", "sb", "pd", "plain"]:
            pvp = self._draw_bonds(
                pvp,
                bradius=_bradius,
                style=style,
                bcolor=bondcolor,
                missing=missing_atoms,
                all_atoms=all_atoms,
                res=res,
            )

        return pvp  # end _render

    def display(
        self,
        WINSIZE=(800, 600),
        background_color="white",
        style="bs",
        plain=False,
        bondcolor=BOND_COLOR,
        bs_scale=BS_SCALE,
        spec=SPECULARITY,
        specpow=SPEC_POWER,
        translate=True,
        bond_radius=BOND_RADIUS,
        res=100,
    ):
        """
        Create a PyVista Plotter with specified window size, render the disulfide bond,
        and display the visualization.

        Parameters
        ----------
        WINSIZE : tuple, optional
            Window size as (width, height), by default (800, 600).
        background_color : str or list, optional
            Background color of the plotter, by default 'white'.
        style : str, optional
            Rendering style, by default "bs". One of 'bs', 'cpk', 'cov', 'sb', 'pd', 'plain'.
            - 'bs': Ball-and-Stick
            - 'cpk': CPK (Corey-Pauling-Koltun)
            - 'cov': Covalent
            - 'sb': Split Bonds
            - 'pd': Proximal-Distal
            - 'plain': Plain Stick
        plain : bool, optional
            Used internally, by default False.
        bondcolor : str, optional
            Bond color for simple bonds, by default BOND_COLOR.
        bs_scale : float, optional
            Scale factor (0-1) to reduce the atom sizes for ball and stick, by default BS_SCALE.
        spec : float, optional
            Specularity (0-1), where 1 is totally smooth and 0 is rough, by default SPECULARITY.
        specpow : int, optional
            Exponent used for specularity calculations, by default SPEC_POWER.
        translate : bool, optional
            Flag used internally to indicate if we should translate
            the disulfide to its geometric center of mass, by default True.
        bond_radius : float, optional
            Bond radius, by default BOND_RADIUS.
        res : int, optional
            Resolution for spheres and cylinders, by default 100.

        Returns
        -------
        None
            Displays the plot using PyVista's show().
        """
        # Initialize the PyVista Plotter with specified window size
        plotter = pv.Plotter(window_size=WINSIZE)
        plotter.set_background(background_color)

        # Render the disulfide bond
        plotter = self._render(
            pvplot=plotter,
            style=style,
            plain=plain,
            bondcolor=bondcolor,
            bs_scale=bs_scale,
            spec=spec,
            specpow=specpow,
            translate=translate,
            bond_radius=bond_radius,
            res=res,
        )

        # Set camera position for better visualization

        # Display the plot
        plotter.show()

In [None]:
ss1 = pdb[0]
type(ss1)

In [15]:
renderer = DisulfideBondRenderer(ss1)

In [None]:
renderer.display(style="sb", WINSIZE=(600, 600), background_color="white")

In [3]:
import timeit
import numpy as np
import matplotlib.pyplot as plt
from proteusPy import Disulfide


def add_to_list(n, pdb_sslist):
    disulfide_list = []
    for i in range(n):
        disulfide_list.append(pdb_sslist[i])

    sslist = DisulfideList(disulfide_list, "tmp")
    avg = sslist.average_conformation


def add_to_ndarray(n, pdb_sslist):
    disulfide_array = np.empty(n, dtype=object)
    for i in range(n):
        disulfide_array[i] = pdb_sslist[i]
    sslist = DisulfideList(disulfide_array, "tmp")
    avg = sslist.average_conformation


def profile_sslist(sslist):
    # Number of objects to add
    num_objects = [1000, 5000, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000]

    # Measure time for adding to list
    list_times = []
    for n in num_objects:
        time = timeit.timeit(lambda: add_to_list(n, sslist), number=10)
        list_times.append(time)

    # Measure time for adding to ndarray
    ndarray_times = []
    for n in num_objects:
        time = timeit.timeit(lambda: add_to_ndarray(n, sslist), number=10)
        ndarray_times.append(time)

    # Plot the results
    plt.plot(num_objects, list_times, label="DisulfideList (list)")
    plt.plot(num_objects, ndarray_times, label="ndarray")
    plt.xlabel("Number of Disulfide Objects")
    plt.ylabel("Time (seconds)")
    plt.title("Comparison of Adding Disulfide Objects to List vs. ndarray")
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
profile_sslist(pdb.SSList)

In [32]:
ss1 = pdb.SSList

In [None]:
tors_df2 = ss1.torsion_df
tors_df2.describe()

In [None]:
pdb = DisulfideLoader(verbose=True, subset=True, cutoff=8.0)

In [None]:
pdb = Load_PDB_SS(verbose=True, subset=False, cutoff=8.0)

In [None]:
pdb[0]

In [2]:
import proteusPy
import pickle
from proteusPy.logger_config import get_logger

_logger = get_logger("bootstrap")


def NBootstrap_PDB_SS(
    loadpath=DATA_DIR, cutoff=8.0, verbose=False, subset=False, force=False
):
    """
    Download and load the disulfide databases from Google Drive.

    This function downloads the disulfide databases from Google Drive if they do not
    already exist in the specified load path or if the force flag is set to True.
    It then loads the disulfide data from the downloaded file and initializes a
    DisulfideLoader instance.

    :param loadpath: Path from which to load the data, defaults to DATA_DIR
    :type loadpath: str
    :param cutoff: Cutoff value for disulfide loading, defaults to 8.0
    :type cutoff: float
    :param verbose: Flag to enable verbose logging, defaults to False
    :type verbose: bool
    :param subset: Flag to indicate whether to load a subset of the data, defaults to False
    :type subset: bool
    :param force: Flag to force download even if the file exists, defaults to False
    :type force: bool
    :return: An instance of DisulfideLoader initialized with the loaded data
    :rtype: DisulfideLoader
    :raises FileNotFoundError: If the downloaded file is not found
    :raises pickle.UnpicklingError: If there is an error unpickling the file
    :raises Exception: For any other exceptions that may occur during file loading
    """
    import gdown

    fname = SS_PICKLE_FILE
    url = SS_LIST_URL

    _fname = os.path.join(loadpath, fname)
    print(_fname)

    if not os.path.exists(_fname) or force is True:
        if verbose:
            print("Downloading Disulfide Database from Drive...")
        gdown.download(url, str(_fname), quiet=False)

    full_path = os.path.join(loadpath, _fname)
    loader = DisulfideLoader(
        datadir=DATA_DIR, subset=subset, verbose=verbose, cutoff=cutoff
    )
    loader.save(savepath=DATA_DIR, subset=subset, cutoff=cutoff)
    return loader

In [None]:
loader = NBootstrap_PDB_SS(loadpath=DATA_DIR, verbose=True, subset=True, force=False)

In [None]:
pdb = NBootstrap_PDB_SS(loadpath=DATA_DIR, verbose=True, subset=False, force=False)

In [None]:
PDB_SS = Load_PDB_SS(verbose=True, subset=False)
PDB_SS.describe()

In [None]:
PDB_SS[0]

In [None]:
PDB_SS = Load_PDB_SS(subset=False, verbose=True)
PDB_SS.describe()

In [None]:
import os
from pathlib import Path
import importlib.resources as pkg_resources

# Determine the base directory of the installed package
this_dir = Path(pkg_resources.files("proteusPy"))
this_dir

In [None]:
ss1 = PDB_SS[0]
ss1.rho

In [5]:
from proteusPy import (
    torsion_to_sixclass,
    torsion_to_eightclass,
    torsion_to_class_string,
)

lhs = [-60, -60, -90, -60, -60]
rhs = [60, 60, 90, 60, 60]
six = torsion_to_sixclass(lhs)
eight = torsion_to_eightclass(lhs)

In [None]:
cls = torsion_to_class_string(lhs, base=6)
cls

In [None]:
six

In [None]:
eight

In [9]:
sixr = torsion_to_sixclass(rhs)
eightr = torsion_to_eightclass(rhs)

In [None]:
sixr

In [None]:
eightr

In [None]:
torsion_to_class_string(lhs, 6)

In [None]:
torsion_to_class_string(lhs, 8)

In [None]:
def plot_disulfide_secondary_structures(pdb_ss):
    """
    Plot a graph of disulfide secondary structures segmented by secondary structure.

    :param pdb_ss: The PDB_SS object containing SSList with disulfide bonds.
    :type pdb_ss: object
    """
    # Initialize counters for each secondary structure type
    helix_count = 0
    sheet_count = 0
    turn_count = 0
    nosecondary_count = 0

    # Iterate through the SSList and count the secondary structures
    for ss in pdb_ss.SSList:
        proximal_secondary = ss.proximal_secondary
        distal_secondary = ss.distal_secondary

        if proximal_secondary == "helix":
            helix_count += 1
        elif proximal_secondary == "sheet":
            sheet_count += 1
        elif proximal_secondary == "turn":
            turn_count += 1
        elif proximal_secondary == "nosecondary":
            nosecondary_count += 1

        if distal_secondary == "helix":
            helix_count += 1
        elif distal_secondary == "sheet":
            sheet_count += 1
        elif distal_secondary == "turn":
            turn_count += 1
        elif distal_secondary == "nosecondary":
            nosecondary_count += 1

    # Data for plotting
    labels = ["Helix", "Sheet", "Turn", "No Secondary"]
    counts = [helix_count, sheet_count, turn_count, nosecondary_count]

    # Plotting the data
    plt.figure(figsize=(10, 6))
    plt.bar(labels, counts, color=["blue", "green", "red", "gray"])
    plt.xlabel("Secondary Structure")
    plt.ylabel("Count of Disulfide Bonds")
    plt.title("Disulfide Bonds Segmented by Secondary Structure")
    plt.show()


import matplotlib.pyplot as plt


def plot_disulfide_secondary_structure_cooccurrence(pdb_ss):
    """
    Plot a graph of disulfide secondary structure co-occurrence by proximal-distal type.

    :param pdb_ss: The PDB_SS object containing SSList with disulfide bonds.
    :type pdb_ss: object
    """
    # Initialize counters for each proximal-distal secondary structure combination
    cooccurrence_counts = {}
    skipped = 0
    turns = 0
    cnt = 0

    # Iterate through the SSList and count the secondary structure combinations
    for ss in pdb_ss.SSList:
        proximal_secondary = ss.proximal_secondary
        distal_secondary = ss.distal_secondary
        # Skip disulfide bonds with no secondary structure or with a turn secondary structure

        if proximal_secondary == "nosecondary" or distal_secondary == "nosecondary":
            skipped += 1
            continue

        if proximal_secondary == "turn" or distal_secondary == "turn":
            turns += 1
            continue

        key = (proximal_secondary, distal_secondary)

        if key in cooccurrence_counts:
            cooccurrence_counts[key] += 1
        else:
            cooccurrence_counts[key] = 1

        cnt += 1

    print(
        f"Skipped {skipped} disulfide bonds with no secondary structure and {turns} turn secondary structure.\n"
        f"Disulfides used: {cnt}"
    )

    # Data for plotting
    labels = [f"{prox}-{dist}" for prox, dist in cooccurrence_counts.keys()]
    counts = list(cooccurrence_counts.values())

    # Plotting the data
    plt.figure(figsize=(12, 8))
    plt.bar(labels, counts, color="skyblue")
    plt.xlabel("Proximal-Distal Secondary Structure")
    plt.ylabel("Count of Disulfide Bonds")
    plt.title("Co-occurrence of Secondary Structures by Proximal-Distal Type")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np


def plot_disulfide_secondary_structure_and_rho(pdb_ss):
    """
    Plot a graph of disulfide secondary structure co-occurrence by proximal-distal type
    and the disulfide parameter rho with error bars.

    :param pdb_ss: The PDB_SS object containing SSList with disulfide bonds.
    :type pdb_ss: object
    """
    # Initialize data structures for secondary structure combinations and rho values
    cooccurrence_counts = {}
    rho_values = {}

    # Iterate through the SSList and collect data
    for ss in pdb_ss.SSList:
        proximal_secondary = ss.proximal_secondary.strip().lower()
        distal_secondary = ss.distal_secondary.strip().lower()

        # Skip disulfide bonds with no secondary structure or with a turn secondary structure
        if proximal_secondary == "no_secondary" or distal_secondary == "no_secondary":
            continue

        if proximal_secondary == "turn" or distal_secondary == "turn":
            continue

        key = (proximal_secondary, distal_secondary)

        if key in cooccurrence_counts:
            cooccurrence_counts[key] += 1
            rho_values[key].append(ss.ca_distance)
        else:
            cooccurrence_counts[key] = 1
            rho_values[key] = [ss.rho]

    # Data for plotting
    labels = [f"{prox}-{dist}" for prox, dist in cooccurrence_counts.keys()]
    counts = list(cooccurrence_counts.values())
    rho_means = [np.mean(rho_values[key]) for key in rho_values.keys()]
    rho_stds = [np.std(rho_values[key]) for key in rho_values.keys()]

    # Plotting the secondary structure co-occurrence
    plt.figure(figsize=(12, 8))
    plt.bar(labels, counts, color="skyblue")
    plt.xlabel("Proximal-Distal Secondary Structure")
    plt.ylabel("Count of Disulfide Bonds")
    plt.title("Co-occurrence of Secondary Structures by Proximal-Distal Type")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

    # Plotting the rho values with error bars
    plt.figure(figsize=(12, 8))
    plt.errorbar(
        labels,
        rho_means,
        yerr=rho_stds,
        fmt="o",
        color="red",
        ecolor="black",
        capsize=5,
    )
    plt.xlabel("Proximal-Distal Secondary Structure")
    plt.ylabel("Mean Rho Value")
    plt.title("Mean Rho Value by Proximal-Distal Secondary Structure with Error Bars")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

In [None]:
plot_disulfide_secondary_structure_and_rho(PDB_SS)

In [None]:
plot_disulfide_secondary_structures(PDB_SS)

In [None]:
plot_disulfide_secondary_structure_cooccurrence(PDB_SS)

In [None]:
from proteusPy import extract_ssbonds_and_atoms
from pathlib import Path
import os

verbose = True
structure_fname = str(MODEL_DIR / "pdb5rsa.ent")
ssbond_atom_list, num_ssbonds, errors = extract_ssbonds_and_atoms(
    structure_fname, verbose=verbose
)

In [None]:
ssbond_atom_list["helices"]

In [None]:
ssbond_atom_list["sheets"]

In [None]:
ssbond_atom_list["turns"]

In [None]:
ssbond_atom_list["ssbonds"]

In [None]:
ssbond_atom_list["pairs"]

In [None]:
ssbond_atom_list["pairs"]

In [None]:
df = pd.DataFrame()
fig = PDB_SS.plot_count_vs_class_df(
    df,
    title="Binary",
    save=True,
    savedir=BINARY,
    verbose=True,
    base=2,
)
fig.show()

In [None]:
PDB_SS.plot_binary_to_eightclass_incidence(
    theme="light", save=True, verbose=True, savedir=OCTANT
)

In [None]:
PDB_SS.plot_binary_to_sixclass_incidence(
    theme="light", save=True, verbose=True, savedir=SEXTANT
)

In [None]:
clslist6 = PDB_SS.tclass.sslist_from_classid("55555", base=6)
clslist6

In [None]:
clslist8 = PDB_SS.tclass.sslist_from_classid("77778", base=8)
clslist8

In [None]:
df2 = PDB_SS.tclass.classdf
df6 = PDB_SS.tclass.sixclass_df
df8 = PDB_SS.tclass.eightclass_df

In [None]:
df2.head(32)

In [None]:
df8

In [None]:
df6

In [None]:
def enumerate_class_fromlist(loader: DisulfideLoader, sslist):
    import pandas as pd

    x = []
    y = []

    for cls in sslist:
        if cls is not None:
            _y = loader.tclass.sslist_from_classid(cls)
            # it's possible to have 0 SS in a class
            if _y is not None:
                # only append if we have both.
                x.append(cls)
                y.append(len(_y))

    sslist_df = pd.DataFrame(columns=["class_id", "count"])
    sslist_df["class_id"] = x
    sslist_df["count"] = y
    return sslist_df

In [None]:
cls = "00000"
sixcls = PDB_SS.tclass.binary_to_six_class(cls)
df = enumerate_class_fromlist(PDB_SS, sixcls)
# Assuming 'count' is the column by which we want to sort
sorted_df = df.sort_values(by="count", ascending=False)

# Display the sorted DataFrame
print(sorted_df)

In [None]:
cls = "00000"
eightcls = PDB_SS.tclass.binary_to_eight_class(cls)
# eightcls

df = enumerate_class_fromlist(PDB_SS, eightcls)
# Assuming 'count' is the column by which we want to sort
sorted_df = df.sort_values(by="count", ascending=False)

# Display the sorted DataFrame
print(sorted_df)

In [None]:
PDB_SS.plot_binary_to_eightclass_incidence(theme="light")

In [None]:
PDB_SS.plot_count_vs_class_df(df, cls, theme="light")

In [None]:
PDB_SS.plot_binary_to_sixclass_incidence(light=False)

In [None]:
eight = PDB_SS.tclass.eightclass_df
eight.head(-10)

In [None]:
sslist2 = PDB_SS.extract_class("87784")
sslist2

In [None]:
PDB_SS.plot_classes_vs_cutoff(0.1, 20)

In [None]:
ssdict = PDB_SS.SSDict
ssdict

In [None]:
def build_ss_from_idlist(loader, idlist):
    """
    Return a DisulfideList of Disulfides for a given list of PDBIDs

    :param idlist: List of PDBIDs, e.g. ['4yys', '2q7q']
    :return: DisulfideList
    """
    res = DisulfideList([], "RCSB_list")
    for k, v in loader.SSDict.items():
        if k in idlist:
            for ss_index in range(len(v)):
                res.append(loader.SSList[v[ss_index]])
    return res

In [None]:
sslist = build_ss_from_idlist(PDB_SS, ["4yys", "2q7q"])
sslist

In [None]:
slice = PDB_SS[10:20]
slice.pdb_id

In [None]:
sslist = PDB_SS.SSList
slice2 = sslist[100:200]
slice2.pdb_id

In [None]:
tors = [-60, -60, 90, -60, -60]
ss1 = Disulfide(torsions=tors)
ss1.pprint_all()

In [None]:
ss1 = PDB_SS["2q7q_75D_140D"]
ss1.quiet = False
ss1.bond_length_ideality
ss1.bond_angle_ideality

In [None]:
dihed = ss1.dihedrals
modelled_min = Disulfide("model", quiet=False)
modelled_min.dihedrals = dihed
modelled_min.build_yourself()
modelled_min.bond_length_ideality

In [None]:
# investigate the distal N->Ca distance. 8/15/24 -egs-

modelled_min.bond_angle_ideality

In [None]:
sslist = PDB_SS.SSList
len(sslist)

In [None]:
import pandas as pd
from tqdm import tqdm


def create_disulfide_dataframe(disulfide_list):
    """
    Create a DataFrame with columns PDB_ID, SS_Name, Angle_Deviation, Distance_Deviation
    from a list of disulfides.

    :param disulfide_list: List of disulfide objects.
    :type proteusPy.DisulfideList: list
    :return: DataFrame containing the disulfide information.
    :rtype: pd.DataFrame
    """
    data = {
        "PDB_ID": [],
        "SS_Name": [],
        "Angle_Deviation": [],
        "Distance_Deviation": [],
        "Ca_Distance": [],
    }

    for ss in tqdm(disulfide_list, desc="Processing Disulfides"):
        pdb_id = ss.pdb_id
        ca_distance = ss.ca_distance
        angle_deviation = ss.bond_angle_ideality
        distance_deviation = ss.bond_length_ideality

        data["PDB_ID"].append(pdb_id)
        data["SS_Name"].append(ss.name)
        data["Angle_Deviation"].append(angle_deviation)
        data["Distance_Deviation"].append(distance_deviation)
        data["Ca_Distance"].append(ca_distance)

    df = pd.DataFrame(data)
    return df


# Example usage
# Assuming you have a list of disulfide objects called disulfide_list
# df = create_disulfide_dataframe(disulfide_list)
# print(df)

In [None]:
dev_df = create_disulfide_dataframe(sslist)

In [None]:
dev_df.describe()

In [None]:
dev_df.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


def plot_histograms(df):
    """
    Plot histograms for Distance_Deviation, Angle_Deviation, and Ca_Distance.

    :param df: DataFrame containing the disulfide information.
    :type df: pd.DataFrame
    """
    plt.figure(figsize=(21, 6))

    # Distance Deviation Histogram
    plt.subplot(1, 3, 1)
    sns.histplot(df["Distance_Deviation"], kde=True, bins=30)
    plt.title("Distance Deviation Distribution")
    plt.xlabel("Distance Deviation")
    plt.ylabel("Frequency")

    # Angle Deviation Histogram
    plt.subplot(1, 3, 2)
    sns.histplot(df["Angle_Deviation"], kde=True, bins=30)
    plt.title("Angle Deviation Distribution")
    plt.xlabel("Angle Deviation")
    plt.ylabel("Frequency")

    # Ca_Distance Histogram
    plt.subplot(1, 3, 3)
    sns.histplot(df["Ca_Distance"], kde=True, bins=30)
    plt.title("Ca Distance Distribution")
    plt.xlabel("Ca Distance")
    plt.ylabel("Frequency")

    plt.tight_layout()
    plt.show()

In [None]:
# plot_histograms(dev_df)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


def highlight_worst_structures(df, top_n=10):
    """
    Highlight the worst structures for distance and angle deviations and annotate their names.
    Also, add a subplot showing the worst structures aggregated by PDB_ID.

    :param df: DataFrame containing the disulfide information.
    :type df: pd.DataFrame
    :param top_n: Number of worst structures to highlight.
    :type top_n: int
    """
    # Identify the worst structures for distance deviation
    worst_distance = df.nlargest(top_n, "Distance_Deviation")

    # Identify the worst structures for angle deviation
    worst_angle = df.nlargest(top_n, "Angle_Deviation")

    # Combine the worst structures
    worst_structures = pd.concat([worst_distance, worst_angle]).drop_duplicates()

    # Aggregate worst structures by PDB_ID
    worst_structures_agg = (
        worst_structures.groupby("PDB_ID").size().reset_index(name="Count")
    )

    fig, axes = plt.subplots(1, 2, figsize=(20, 6))

    # Scatter plot for all structures
    sns.scatterplot(
        x="Distance_Deviation",
        y="Angle_Deviation",
        data=df,
        label="All Structures",
        ax=axes[0],
    )

    # Highlight the worst structures
    sns.scatterplot(
        x="Distance_Deviation",
        y="Angle_Deviation",
        data=worst_structures,
        color="red",
        label="Worst Structures",
        marker="X",
        s=100,
        ax=axes[0],
    )

    # Annotate the worst structures with their names
    for i, row in worst_structures.iterrows():
        axes[0].annotate(
            row["SS_Name"],
            (row["Distance_Deviation"], row["Angle_Deviation"]),
            textcoords="offset points",
            xytext=(5, 5),
            ha="right",
        )

    axes[0].set_title("Distance Deviation vs. Angle Deviation")
    axes[0].set_xlabel("Distance Deviation")
    axes[0].set_ylabel("Angle Deviation")
    axes[0].legend()

    # Bar plot for worst structures aggregated by PDB_ID
    sns.barplot(x="PDB_ID", y="Count", data=worst_structures_agg, ax=axes[1])
    axes[1].set_title("Worst Structures Aggregated by PDB_ID")
    axes[1].set_xlabel("PDB_ID")
    axes[1].set_ylabel("Count")
    axes[1].tick_params(axis="x", rotation=90)

    plt.tight_layout()
    plt.show()


# Example usage
# Assuming you have a DataFrame named dev_df
# highlight_worst_structures(dev_df, top_n=10)

In [None]:
highlight_worst_structures(dev_df, top_n=10)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


def highlight_worst_structures2(df, top_n=10):
    """
    Highlight the worst structures for distance and angle deviations and annotate their names.
    Also, add subplots showing the worst structures aggregated by PDB_ID for distance and angle deviations.

    :param df: DataFrame containing the disulfide information.
    :type df: pd.DataFrame
    :param top_n: Number of worst structures to highlight.
    :type top_n: int
    """
    # Identify the worst structures for distance deviation
    worst_distance = df.nlargest(top_n, "Distance_Deviation")

    # Identify the worst structures for angle deviation
    worst_angle = df.nlargest(top_n, "Angle_Deviation")

    # Combine the worst structures
    worst_structures = pd.concat([worst_distance, worst_angle]).drop_duplicates()

    # Aggregate worst structures by PDB_ID for distance and angle deviations
    worst_distance_agg = (
        worst_distance.groupby("PDB_ID").size().reset_index(name="Count")
    )
    worst_angle_agg = worst_angle.groupby("PDB_ID").size().reset_index(name="Count")

    fig, axes = plt.subplots(1, 3, figsize=(24, 6))

    # Histogram for distance deviation
    sns.histplot(df["Distance_Deviation"], kde=True, bins=30, ax=axes[0])
    axes[0].set_title("Distance Deviation Distribution")
    axes[0].set_xlabel("Distance Deviation")
    axes[0].set_ylabel("Frequency")

    # Histogram for angle deviation
    sns.histplot(df["Angle_Deviation"], kde=True, bins=30, ax=axes[1])
    axes[1].set_title("Angle Deviation Distribution")
    axes[1].set_xlabel("Angle Deviation")
    axes[1].set_ylabel("Frequency")

    # Bar plots for worst structures aggregated by PDB_ID
    sns.barplot(
        x="PDB_ID",
        y="Count",
        data=worst_distance_agg,
        ax=axes[2],
        color="blue",
        label="Distance Deviation",
    )
    sns.barplot(
        x="PDB_ID",
        y="Count",
        data=worst_angle_agg,
        ax=axes[2],
        color="green",
        label="Angle Deviation",
        alpha=0.6,
    )
    axes[2].set_title("Worst Structures Aggregated by PDB_ID")
    axes[2].set_xlabel("PDB_ID")
    axes[2].set_ylabel("Count")
    axes[2].tick_params(axis="x", rotation=90)
    axes[2].legend()

    plt.tight_layout()
    plt.show()


# Example usage
# Assuming you have a DataFrame named dev_df
# highlight_worst_structures(dev_df, top_n=10)

In [None]:
highlight_worst_structures2(dev_df, top_n=10)

In [None]:
import pandas as pd
import numpy as np


def calculate_std_cutoff(df, column, num_std=2):
    """
    Calculate cutoff based on standard deviation.

    :param df: DataFrame containing the deviations.
    :type df: pd.DataFrame
    :param column: Column name for which to calculate the cutoff.
    :type column: str
    :param num_std: Number of standard deviations to use for the cutoff.
    :type num_std: int
    :return: Cutoff value.
    :rtype: float
    """
    mean = df[column].mean()
    std = df[column].std()
    cutoff = mean + num_std * std
    return cutoff


def calculate_percentile_cutoff(df, column, percentile=95):
    """
    Calculate cutoff based on percentile.

    :param df: DataFrame containing the deviations.
    :type df: pd.DataFrame
    :param column: Column name for which to calculate the cutoff.
    :type column: str
    :param percentile: Percentile to use for the cutoff.
    :type percentile: int
    :return: Cutoff value.
    :rtype: float
    """
    cutoff = np.percentile(df[column].dropna(), percentile)
    return cutoff


# Example usage
# distance_cutoff = calculate_percentile_cutoff(dev_df, 'Distance_Deviation', percentile=95)
# angle_cutoff = calculate_percentile_cutoff(dev_df, 'Angle_Deviation', percentile=95)

In [None]:
import pandas as pd
import numpy as np

# Assuming you have a DataFrame named dev_df with columns 'Distance_Deviation' and 'Angle_Deviation'

# Standard Deviation Method
distance_cutoff_std = calculate_std_cutoff(dev_df, "Distance_Deviation", num_std=3)
angle_cutoff_std = calculate_std_cutoff(dev_df, "Angle_Deviation", num_std=3)
ca_cutoff_std = calculate_std_cutoff(dev_df, "Ca_Distance", num_std=3)

# Percentile Method
distance_cutoff_percentile = calculate_percentile_cutoff(
    dev_df, "Distance_Deviation", percentile=98
)
angle_cutoff_percentile = calculate_percentile_cutoff(
    dev_df, "Angle_Deviation", percentile=98
)
ca_cutoff_percentile = calculate_percentile_cutoff(dev_df, "Ca_Distance", percentile=98)

print(f"Distance Deviation Cutoff (3 Std Dev): {distance_cutoff_std}")
print(f"Angle Deviation Cutoff (3 Std Dev): {angle_cutoff_std}")
print(f"Ca Distance Cutoff (3 Std Dev): {ca_cutoff_std}\n")

print(f"Distance Deviation Cutoff (98th Percentile): {distance_cutoff_percentile}")
print(f"Angle Deviation Cutoff (98th Percentile): {angle_cutoff_percentile}")
print(f"Ca Distance Cutoff (98th Percentile): {ca_cutoff_percentile}")

In [None]:
import pandas as pd


def filter_by_cutoffs(df, distance_cutoff, angle_cutoff):
    """
    Filter the DataFrame based on distance and angle cutoffs.

    :param df: DataFrame containing the deviations.
    :type df: pd.DataFrame
    :param distance_cutoff: Cutoff value for distance deviation.
    :type distance_cutoff: float
    :param angle_cutoff: Cutoff value for angle deviation.
    :type angle_cutoff: float
    :return: Filtered DataFrame.
    :rtype: pd.DataFrame
    """
    filtered_df = df[
        (df["Distance_Deviation"] <= distance_cutoff)
        & (df["Angle_Deviation"] <= angle_cutoff)
    ]
    return filtered_df


# Example usage
# Assuming you have a DataFrame named dev_df
# distance_cutoff = 5.0
# angle_cutoff = 10.0
# filtered_df = filter_by_cutoffs(dev_df, distance_cutoff, angle_cutoff)
# print(filtered_df)

In [None]:
dev_df.shape[0]

In [None]:
filt = filter_by_cutoffs(dev_df, 1.0, 10.0)
filt.shape[0]

In [None]:
dev_df.shape[0]

In [None]:
print(dev_df["Angle_Deviation"].isna().sum())

In [None]:
import pandas as pd
import numpy as np


def calculate_std_cutoff(df, column, num_std=2):
    mean = df[column].mean()
    std = df[column].std()
    cutoff = mean + num_std * std
    return cutoff


def calculate_percentile_cutoff(df, column, percentile=95):
    cutoff = np.percentile(df[column].dropna(), percentile)
    return cutoff


# Assuming you have a DataFrame named dev_df with columns 'Distance_Deviation' and 'Angle_Deviation'

# Check for missing values
print(f"Missing values in 'Angle_Deviation': {dev_df['Angle_Deviation'].isna().sum()}")

# Drop missing values for the calculation
dev_df = dev_df.dropna(subset=["Angle_Deviation", "Distance_Deviation"])

# Standard Deviation Method
distance_cutoff_std = calculate_std_cutoff(dev_df, "Distance_Deviation", num_std=3)
angle_cutoff_std = calculate_std_cutoff(dev_df, "Angle_Deviation", num_std=3)

# Percentile Method
distance_cutoff_percentile = calculate_percentile_cutoff(
    dev_df, "Distance_Deviation", percentile=98
)
angle_cutoff_percentile = calculate_percentile_cutoff(
    dev_df, "Angle_Deviation", percentile=98
)

print(f"Distance Deviation Cutoff (3 Std Dev): {distance_cutoff_std}")
print(f"Angle Deviation Cutoff (3 Std Dev): {angle_cutoff_std}")
print(f"Distance Deviation Cutoff (98th Percentile): {distance_cutoff_percentile}")
print(f"Angle Deviation Cutoff (98th Percentile): {angle_cutoff_percentile}")