In [2]:
# DisulfideBond Class Analysis
# Author: Eric G. Suchanek, PhD.
# (c) 2023 Eric G. Suchanek, PhD., All Rights Reserved
# License: MIT
# Last Modification: 2/18/23
# Cα Cβ Sγ

import pandas as pd
import numpy

import pyvista as pv
from pyvista import set_plot_theme

from Bio.PDB import *

# for using from the repo we 
import proteusPy
from proteusPy import *
from proteusPy.data import *
from proteusPy.Disulfide import *
from proteusPy.DisulfideList import DisulfideList, load_disulfides_from_id
from proteusPy.utility import Create_classes

# override any default PDB globals
# location for PDB repository
PDB_ROOT = '/Users/egs/PDB/'

# location of cleaned PDB files - these are not stored in the repo
PDB_GOOD = '/Users/egs/PDB/good/'

# location of the compressed Disulfide .pkl files
MODELS = f'{PDB_ROOT}data/'

# pyvista setup for notebooks
pv.set_jupyter_backend('trame')
#set_plot_theme('dark')

PDB_SS = Load_PDB_SS(verbose=True, subset=False)
PDB_SS.describe()

-> load_PDB_SS(): Reading /Users/egs/repos/proteusPy/proteusPy/data/PDB_SS_ALL_LOADER.pkl... done.
PDB IDs present:                    35818
Disulfides loaded:                  120697
Average structure resolution:       2.34 Å
Lowest Energy Disulfide:            2q7q_75D_140D
Highest Energy Disulfide:           1toz_456A_467A
Total RAM Used:                     29.26 GB.


In [3]:
tors_df = PDB_SS.getTorsions()
tors_df.describe()


Unnamed: 0,proximal,distal,chi1,chi2,chi3,chi4,chi5,energy,ca_distance,phi_prox,psi_prox,phi_dist,psi_dist,torsion_length
count,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0,120697.0
mean,224.952352,273.166616,-46.576729,-15.271499,-2.472293,-27.620642,-38.68503,3.547099,5.55874,-97.818195,62.190902,-96.178772,63.919627,224.176713
std,272.58512,276.640523,97.018984,102.465838,93.910914,103.712065,94.162959,2.361169,1.514885,43.803698,99.029196,43.531611,96.442547,52.598704
min,1.0,1.0,-179.998217,-179.999554,-179.981786,-179.99951,-179.999305,0.491737,2.831762,-180.0,-180.0,-180.0,-180.0,77.51608
25%,47.0,96.0,-83.383209,-87.688664,-87.401377,-92.805867,-75.113684,1.910619,5.077294,-128.635741,-27.963951,-123.289748,-24.254751,181.152368
50%,136.0,193.0,-63.664056,-58.572699,-64.468336,-66.48871,-61.045404,3.02299,5.593961,-96.109404,112.91855,-97.429163,115.186341,225.205972
75%,305.0,361.0,-45.09902,81.012589,94.710376,73.630486,-14.139138,4.35081,6.074565,-68.9236,143.684511,-70.590516,143.207727,261.539869
max,4374.0,8774.0,179.999749,179.996783,179.99507,179.999222,179.999693,18.12453,101.701154,179.912372,179.999324,179.849134,179.993389,381.310941


In [7]:
grouped = Create_classes(tors_df)
grouped.to_csv(f'{DATA_DIR}PDB_ss_classes.csv')

grouped_summary = grouped.drop(columns=['ss_id'], axis=1)
grouped.to_csv(f'{DATA_DIR}PDB_ss_classes_summary.csv')
