In [1]:
# DisulfideBond Playground
# Playing with the DisulfideBond class
# Author: Eric G. Suchanek, PhD.
# (c) 2022 Eric G. Suchanek, PhD., All Rights Reserved
# License: BSD
# Last Modification: 12/12/22
# Cα Cβ Sγ

# important preamble

import pandas as pd

import pyvista as pv
from pyvista import set_plot_theme

from Bio.PDB import *

# for using from the repo we 
from proteusPy import *
from proteusPy.data import *
from proteusPy.Disulfide import *


# override any default PDB globals
# location for PDB repository
PDB_ROOT = '/Users/egs/PDB/'

# location of cleaned PDB files - these are not stored in the repo
PDB_GOOD = '/Users/egs/PDB/good/'

# from within the repo 
PDB_REPO = '../pdb/'

# location of the compressed Disulfide .pkl files
MODELS = f'{PDB_ROOT}models/'

# pyvista setup for notebooks
pv.set_jupyter_backend('ipyvtklink')
set_plot_theme('document')


In [2]:
# Comment these out since they take so long.
# Download_Disulfides(pdb_home=PDB_ORIG, model_home=MODELS, reset=False)

#Extract_Disulfides(numb=1000, pdbdir=PDB_GOOD, datadir=MODELS, verbose=False, quiet=False)

PDB_SS = None
PDB_SS = DisulfideLoader(verbose=True)

ss_list = DisulfideList([], 'tmp')
PDB_SS.TotalDisulfides


Reading disulfides from: /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_ss.pkl
Disulfides Read: 8210
Reading disulfide dict from: /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_ss_dict.pkl
Reading Torsion DF /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_SS_torsions.csv.
Read torsions DF.
PDB IDs parsed: 1000
Total Space Used: 1969317 bytes.


8210

In [4]:
new = DisulfideList([], 'tmp')
sslist = PDB_SS.getlist()
sslist.min()


<Disulfide 1xr9_203A_259A SourceID: 1xr9 Proximal: 203 A Distal: 259 A>

In [None]:
sslist2 = PDB_SS.SSList
name = sslist.get_id()
name

In [None]:

# one disulfide from the database
ss = Disulfide()
ss = PDB_SS[0]
print(f'SS: {ss.pprint_all()}')


# get all disulfides for one structure. Make a 
# DisulfideList object to hold it
ss4yys = DisulfideList([], '4yys')
ss4yys = PDB_SS['4yys']

#ss4crn = DisulfideList([], '1crn')
#ss4crn = PDB_SS['1crn']

tot_ss = len(ss4yys) # number off ssbonds
print(f'tot {tot_ss}')


In [None]:
sslist = PDB_SS.SSList
tors_df = build_torsion_df(sslist)
tors_df.head()

In [None]:
ss6fuf = PDB_SS['6fuf']


In [None]:
tors = pd.DataFrame()
tors = PDB_SS.getTorsions('')
tors

In [None]:
ss1 = ss4yys.get_by_name('4yys_22A_65A')
ss1

In [None]:
Check_chains('4yys', PDB_GOOD)

In [None]:
ss4yys_a = ss4yys.by_chain('A')
ss4yys_a


In [None]:
ss4yys_b = ss4yys.by_chain('B')
ss4yys_b

In [None]:
ss4yys_a1 = ss4yys_a[0]
print(ss4yys_a1.repr_ss_coords())


In [None]:
ss4yys_b1 = ss4yys_b[0]
ss4yys_b1

In [None]:
Distance_RMS(ss4yys_a1, ss4yys_b1)

In [None]:
Torsion_RMS(ss4yys_a1, ss4yys_b1)

In [None]:
ss4yys_a1 == ss4yys_b1

In [None]:
chns = ss4yys.get_chains()
ss4yys.has_chain('yyy')
chns

In [None]:
# load SS bonds by PDB ID
ss1 = PDB_SS['4yys']
print(ss1)
print(ss1[0].get_full_id())


In [None]:
# you can loop over the IDList list and extract by ID
#
for id in PDB_SS.IDList[:5]:    # just show last 5
    # get the SS bonds for the given ID
    ssb = PDB_SS[id]
    numb_ss = len(ssb)
    print(f'ID: {id} has {numb_ss} Disulfides:')
    for bond in ssb:
        print(bond)
    print('\n')
    

In [None]:
ss_list = PDB_SS.getlist()
ss0 = ss_list[0]

print(ss0.proximal_residue_fullid)
print(ss0.chi3)
len(ss_list)


In [None]:
# routine creates 2 lists  for left-handed and right-handed disulfides 
ss_list = PDB_SS.getlist()
left_handed = DisulfideList([], 'left_handed')
right_handed = DisulfideList([], 'right_handed')

i = 0

for i in range(len(ss_list)):
    ss = ss_list[i]
    if ss.chi3 < 0:
        left_handed.append(ss)
    else:
        right_handed.append(ss)


print(f'Left Handed: {len(left_handed)}, Right Handed: {len(right_handed)}')



In [None]:
from proteusPy.Disulfide import Disulfide

# make some empty disulfides
ss1 = Disulfide('ss1')
ss2 = Disulfide('ss2')

# make a DisulfideList containing ss1, named 'tmp'
sslist = DisulfideList([ss1], 'tmp')
sslist.append(ss2)

# load the PDB Disulfide database
PDB_SS = None
PDB_SS = DisulfideLoader(verbose=True)

# extract a disulfide with typical index
ss1 = PDB_SS[0]
print(f'{ss1.pprint_all()}')

# grab a subset via slicing
subset = DisulfideList(PDB_SS[0:10],'subset')

In [None]:
torsions = PDB_SS.getTorsions()
torsions.sort_values(by=['energy'], ascending=False, inplace=True)

torsions.head(10)

In [None]:
idx_max = int(torsions['energy'].idxmax())
print(f'IDMAX: {idx_max}')
ssmax = PDB_SS[idx_max]
ssmax

In [None]:
badlist = torsions['ss_id']

bad_SS_list = DisulfideList([],'20 top high energy')
ss = Disulfide()

for i in range(30):
    ssid = torsions.iloc[i]['ss_id']
    ss = PDB_SS.get_by_name(ssid)
    print(f'ID: {ss.name}:{ss.energy:.2f} CA: {ss.ca_distance:.2f}')
    bad_SS_list.append(ss)
