In [None]:
# proteusPy Disulfide Database Analysis
# Author: Eric G. Suchanek, PhD.

In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import pyvista as pv
import logging

import os
import pandas as pd
import numpy as np
from proteusPy.logger_config import (
    list_all_loggers,
    set_logger_level,
    toggle_stream_handler,
    configure_master_logger,
    list_handlers,
    disable_stream_handlers_for_namespace,
    set_logging_level_for_all_handlers,
)


from proteusPy import (
    Load_PDB_SS,
    Disulfide,
    DisulfideList,
    DisulfideLoader,
    torsion_to_class_string,
    Bootstrap_PDB_SS,
    load_disulfides_from_id,
    create_logger,
    configure_master_logger,
    Vector3D,
)

from proteusPy.atoms import (
    ATOM_COLORS,
    ATOM_RADII_COVALENT,
    ATOM_RADII_CPK,
    BOND_COLOR,
    BOND_RADIUS,
    BS_SCALE,
    FONTSIZE,
    SPEC_POWER,
    SPECULARITY,
)

from proteusPy.ProteusGlobals import *

HOME = Path.home()
PDB = Path(os.getenv("PDB", HOME / "pdb"))

MODEL_DIR = PDB / "good"

PDB_DATA_DIR = PDB / "data"
SAVE_DIR = HOME / "Documents" / "proteusPyDocs" / "classes"
REPO_DIR = HOME / "repos" / "proteusPy" / "data"

OCTANT = SAVE_DIR / "octant"
OCTANT.mkdir(parents=True, exist_ok=True)

BINARY = SAVE_DIR / "binary"
BINARY.mkdir(parents=True, exist_ok=True)

SEXTANT = SAVE_DIR / "sextant"
SEXTANT.mkdir(parents=True, exist_ok=True)

PBAR_COLS = 78
configure_master_logger("database_analysis,log")

In [2]:
pdb_full = Load_PDB_SS(subset=False, verbose=True)
tot_ss = pdb_full.TotalDisulfides
tot_ss

-> load_PDB_SS(): Reading /Users/egs/miniforge3/envs/ppydev/lib/python3.11/site-packages/proteusPy/data/PDB_SS_ALL_LOADER.pkl... 
-> load_PDB_SS(): Done Reading /Users/egs/miniforge3/envs/ppydev/lib/python3.11/site-packages/proteusPy/data/PDB_SS_ALL_LOADER.pkl... 


175277

In [3]:
pdb_full.describe()

PDB IDs present:                    36968
Disulfides loaded:                  175277
Average structure resolution:       2.19 Å
Lowest Energy Disulfide:            2q7q_75D_140D
Highest Energy Disulfide:           6vxk_801B_806B
Cα distance cutoff:                 -1.00 Å
Sg distance cutoff:                 -1.00 Å
Total RAM Used:                     45.60 MB.


We can filter the database by instantiating the ``DisulfideLoader`` object directly with appropriate filtering flags:

In [None]:
pdb_ca_filtered = DisulfideLoader(
    subset=False, verbose=True, cutoff=8.0, sg_cutoff=-1.0
)
tot_ss_ca = pdb_ca_filtered.TotalDisulfides
tot_ss_ca

In [None]:
pdb_ca_filtered.describe()

In [None]:
pdb_sg_filtered = DisulfideLoader(
    subset=False, verbose=True, cutoff=-1.0, sg_cutoff=2.1
)
tot_ss_sg = pdb_sg_filtered.TotalDisulfides
tot_ss_sg

In [None]:
pdb_sg_filtered.describe()

In [4]:
pdb_filtered = DisulfideLoader(subset=False, verbose=True, cutoff=8.0, sg_cutoff=2.1)
tot_ss_filt = pdb_filtered.TotalDisulfides
tot_ss_filt

proteusPy: INFO 2024-11-19 15:03:34,504 - proteusPy.DisulfideClass_Constructor.__init__ - Building SS classes...
proteusPy: INFO 2024-11-19 15:03:34,510 - proteusPy.DisulfideClass_Constructor.build_yourself - Creating binary SS classes...
proteusPy: INFO 2024-11-19 15:03:37,028 - proteusPy.DisulfideClass_Constructor.build_yourself - Creating sixfold SS classes...
proteusPy: INFO 2024-11-19 15:03:37,613 - proteusPy.DisulfideClass_Constructor.build_yourself - Creating eightfold SS classes...
proteusPy: INFO 2024-11-19 15:03:38,286 - proteusPy.DisulfideClass_Constructor.build_yourself - Initialization complete.


163489

In [5]:
pdb_filtered.describe()

PDB IDs present:                    35347
Disulfides loaded:                  163489
Average structure resolution:       2.21 Å
Lowest Energy Disulfide:            2q7q_75D_140D
Highest Energy Disulfide:           6vxk_801B_806B
Cα distance cutoff:                 8.00 Å
Sg distance cutoff:                 2.10 Å
Total RAM Used:                     45.60 MB.


In [9]:
filtered_ss = pdb_filtered.SSList
filtered2 = filtered_ss.filter_by_bond_ideality(5)
len(filtered2)

151163