# Disulfide Binary Class Breakdown

|   class_id | SS_Classname   | FXN        |   count |   incidence |   percentage |
|-----------:|:---------------|:-----------|--------:|------------:|-------------:|
|      00000 | -LHSpiral      | UNK        |   40943 |  0.23359    |    23.359    |
|      00002 | 00002          | UNK        |    9391 |  0.0535781  |     5.35781  |
|      00020 | -LHHook        | UNK        |    4844 |  0.0276363  |     2.76363  |
|      00022 | 00022          | UNK        |    2426 |  0.0138409  |     1.38409  |
|      00200 | -RHStaple      | Allosteric |   16146 |  0.092117   |     9.2117   |
|      00202 | 00202          | UNK        |    1396 |  0.00796454 |     0.796454 |
|      00220 | 00220          | UNK        |    7238 |  0.0412946  |     4.12946  |
|      00222 | 00222          | UNK        |    6658 |  0.0379856  |     3.79856  |
|      02000 | 02000          | UNK        |    7104 |  0.0405301  |     4.05301  |
|      02002 | 02002          | UNK        |    8044 |  0.0458931  |     4.58931  |
|      02020 | -LHStaple      | UNK        |    3154 |  0.0179944  |     1.79944  |
|      02022 | 02022          | UNK        |    1146 |  0.00653822 |     0.653822 |
|      02200 | -RHHook        | UNK        |    7115 |  0.0405929  |     4.05929  |
|      02202 | 02202          | UNK        |    1021 |  0.00582507 |     0.582507 |
|      02220 | -RHSpiral      | UNK        |    8989 |  0.0512845  |     5.12845  |
|      02222 | 02222          | UNK        |    7641 |  0.0435939  |     4.35939  |
|      20000 | ±LHSpiral      | UNK        |    5007 |  0.0285662  |     2.85662  |
|      20002 | +LHSpiral      | UNK        |    1611 |  0.00919117 |     0.919117 |
|      20020 | ±LHHook        | UNK        |    1258 |  0.00717721 |     0.717721 |
|      20022 | +LHHook        | UNK        |     823 |  0.00469542 |     0.469542 |
|      20200 | ±RHStaple      | UNK        |     745 |  0.00425042 |     0.425042 |
|      20202 | +RHStaple      | UNK        |     538 |  0.00306943 |     0.306943 |
|      20220 | ±RHHook        | Catalytic  |    1907 |  0.0108799  |     1.08799  |
|      20222 | 20222          | UNK        |    1159 |  0.00661239 |     0.661239 |
|      22000 | -/+LHHook      | UNK        |    3652 |  0.0208356  |     2.08356  |
|      22002 | 22002          | UNK        |    2052 |  0.0117072  |     1.17072  |
|      22020 | ±LHStaple      | UNK        |    1791 |  0.0102181  |     1.02181  |
|      22022 | +LHStaple      | UNK        |     579 |  0.00330334 |     0.330334 |
|      22200 | -/+RHHook      | UNK        |    8169 |  0.0466062  |     4.66062  |
|      22202 | +RHHook        | UNK        |     895 |  0.0051062  |     0.51062  |
|      22220 | ±RHSpiral      | UNK        |    3581 |  0.0204305  |     2.04305  |
|      22222 | +RHSpiral      | UNK        |    8254 |  0.0470912  |     4.70912  |


In [1]:
# DisulfideBond Class Analysis Dictionary creation
# Author: Eric G. Suchanek, PhD.
# (c) 2025 Eric G. Suchanek, PhD., All Rights Reserved
# Last Modification: 2025-01-16 15:27:59 -egs-

import pyvista as pv
from pyvista import set_plot_theme

import proteusPy as pp

# pyvista setup for notebooks
pv.set_jupyter_backend("trame")


THEME = "auto"

INFO:proteusPy:ProteusPy 0.99.1.dev0 initialized.
INFO:proteusPy:Plotly theme set to: plotly_dark


In [2]:
PDB_SS = pp.Load_PDB_SS(verbose=True, subset=False)
pp.print_memory_used()

proteusPy: INFO 2025-02-08 00:52:13,017 - proteusPy.DisulfideLoader.Load_PDB_SS - Reading disulfides from: /Users/egs/miniforge3/envs/ppydev/lib/python3.12/site-packages/proteusPy/data/PDB_SS_ALL_LOADER.pkl...
INFO:proteusPy.DisulfideLoader:Reading disulfides from: /Users/egs/miniforge3/envs/ppydev/lib/python3.12/site-packages/proteusPy/data/PDB_SS_ALL_LOADER.pkl...
proteusPy: INFO 2025-02-08 00:52:18,745 - proteusPy.DisulfideLoader.Load_PDB_SS - Done reading disulfides from: /Users/egs/miniforge3/envs/ppydev/lib/python3.12/site-packages/proteusPy/data/PDB_SS_ALL_LOADER.pkl...
INFO:proteusPy.DisulfideLoader:Done reading disulfides from: /Users/egs/miniforge3/envs/ppydev/lib/python3.12/site-packages/proteusPy/data/PDB_SS_ALL_LOADER.pkl...


PDB IDs present:                 35361
Disulfides loaded:               158935
Average structure resolution:    2.18 Å
Lowest Energy Disulfide:         2q7q_75D_140D
Highest Energy Disulfide:        6vxk_801B_806B
Cα distance cutoff:              6.71 Å
Sγ distance cutoff:              2.12 Å
               ===== proteusPy: 0.99.1.dev0 =====
proteusPy 0.99.1.dev0: Memory Used: 0.00 GB


In [3]:
# plot the sextant class incidence as a function of binary class input. Generates 32 graphs

PDB_SS.plot_binary_to_eightclass_incidence(
    theme="auto", save=False, savedir="/Users/egs/Documents/proteusPy"
)

In [4]:
# the average structures for each class, returned as a
# list of disulfides
PDB_SS.plot_classes_vs_cutoff(0.5, 50)

In [5]:
PDB_SS.tclass.eightclass_df

Unnamed: 0,class_id,ss_id,count,incidence,percentage
0,11212,"[3c34_202B_256B, 3c36_202B_256B, 4uip_195A_207...",5,0.000031,0.003146
1,11221,[2g6z_197A_219A],1,0.000006,0.000629
2,11222,"[7s1b_534A_587A, 6snw_151E_169E, 5uk5_253B_262...",17,0.000107,0.010696
3,11223,[2crd_13A_33A],1,0.000006,0.000629
4,11224,[2kd3_84A_142A],1,0.000006,0.000629
...,...,...,...,...,...
8509,88647,"[4okr_308A_311A, 4oku_308A_311A]",2,0.000013,0.001258
8510,88664,[6p48_352A_356A],1,0.000006,0.000629
8511,88738,[3cu7_1654A_1657A],1,0.000006,0.000629
8512,88745,[2ifi_3A_8A],1,0.000006,0.000629


In [6]:
PDB_SS.plot_count_vs_classid(base=8)

In [7]:
PDB_SS.plot_classes_vs_cutoff(0.2, 50)

In [8]:
# Remapping binary classes into eight-space
# the tclass.binary_to_classes() function returns
# all possible combinations as a list of octant-based
# strings. These variable names correspond to the names given by
# Hogg et al.

# most prevelent
LHSpiral_neg = PDB_SS.tclass.binary_to_class("00000")
RHSpiral_neg = PDB_SS.tclass.binary_to_class("02220")

LHSpiral_pminus = PDB_SS.tclass.binary_to_class("20000")
LHSpiral_plus = PDB_SS.tclass.binary_to_class("20002")

LHHook_neg = PDB_SS.tclass.binary_to_class("00020")
LHHook_pminus = PDB_SS.tclass.binary_to_class("20020")
LHHook_plus = PDB_SS.tclass.binary_to_class("20022")
LHHook_minus_plus = PDB_SS.tclass.binary_to_class("22000")

# Catalytic
RHHook_pminus = PDB_SS.tclass.binary_to_class("20220")

RHHook_minus = PDB_SS.tclass.binary_to_class("02200")

# Allosteric
RHStaple_neg = PDB_SS.tclass.binary_to_class("00200")

RHStaple_pminus = PDB_SS.tclass.binary_to_class("20200")
RHStaple_plus = PDB_SS.tclass.binary_to_class("20202")

LHStaple_neg = PDB_SS.tclass.binary_to_class("02020")
LHStaple_pminus = PDB_SS.tclass.binary_to_class("22020")
LHStaple_plus = PDB_SS.tclass.binary_to_class("22022")

RHSpiral_pminus = PDB_SS.tclass.binary_to_class("22220")
RHSpiral_plus = PDB_SS.tclass.binary_to_class("22222")

# Examining a Catalytic Disulfide Class
RHHook_pminus is considered to be 'catalytic', so we can
look more closely in six-space by examining the number of disulfides within
each of the possible six-space strings computed above:

For each six-class ID in the combo list, return the sslist represented by
that class and calculate the number of SS in it. 
Plot the results.

In [9]:
df2 = PDB_SS.enumerate_class_fromlist(RHStaple_neg, 8)
PDB_SS.plot_count_vs_class_df(df2, "RHStaple_neg (Allosteric)", theme="auto", log=False)

In [10]:
df = PDB_SS.enumerate_class_fromlist(LHSpiral_neg, 8)
PDB_SS.plot_count_vs_class_df(df, title="LHSpiral_neg", theme="auto")

In [11]:
df4 = PDB_SS.enumerate_class_fromlist(RHSpiral_plus, 8)
PDB_SS.plot_count_vs_class_df(df4, title="RHSpiral_plus", theme=THEME)

In [12]:
from scipy.optimize import minimize
import numpy as np


def energy_function(x):
    chi1, chi2, chi3, chi4, chi5 = x
    energy = 2.0 * (np.cos(np.deg2rad(3.0 * chi1)) + np.cos(np.deg2rad(3.0 * chi5)))
    energy += np.cos(np.deg2rad(3.0 * chi2)) + np.cos(np.deg2rad(3.0 * chi4))
    energy += (
        3.5 * np.cos(np.deg2rad(2.0 * chi3))
        + 0.6 * np.cos(np.deg2rad(3.0 * chi3))
        + 10.1
    )
    return energy


initial_guess = [
    -60.0,
    -60.0,
    90.0,
    -60.0,
    -60.0,
]  # initial guess for chi1, chi2, chi3, chi4, chi5
result = minimize(energy_function, initial_guess, method="Nelder-Mead")
minimum_energy = result.fun
inputs = result.x
inputs[0]

np.float64(-59.99997570059405)

In [13]:
best_ss = Disulfide("BestDisulfide")
best_ss.build_model(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4])
best_ss.pprint()

NameError: name 'Disulfide' is not defined

In [None]:
def download_and_save_binary(url, filename):
    import requests

    response = requests.get(url)
    content = response.content
    open(filename, "wb+").write(content)


# download_and_save_binary("https://github.com/suchanek/proteusPy/blob/0adcd3185604f87b5f366232473a961fb67e6ac9/proteusPy/data/PDB_SS_ALL_LOADER.pkl", "SS.pkl")