In [1]:
# DisulfideBond Playground
# Playing with the DisulfideBond class
# Author: Eric G. Suchanek, PhD.
# (c) 2022 Eric G. Suchanek, PhD., All Rights Reserved
# License: BSD
# Last Modification: 12/12/22
# Cα Cβ Sγ

# important preamble

import pandas as pd

import pyvista as pv
from pyvista import set_plot_theme

from Bio.PDB import *

# for using from the repo we 
from proteusPy import *
from proteusPy.data import *
from proteusPy.Disulfide import *


# override any default PDB globals
# location for PDB repository
PDB_ROOT = '/Users/egs/PDB/'

# location of cleaned PDB files - these are not stored in the repo
PDB_GOOD = '/Users/egs/PDB/good/'

# from within the repo 
PDB_REPO = '../pdb/'

# location of the compressed Disulfide .pkl files
MODELS = f'{PDB_ROOT}models/'

# pyvista setup for notebooks
pv.set_jupyter_backend('ipyvtklink')
set_plot_theme('document')


In [3]:
# Comment these out since they take so long.
# Download_Disulfides(pdb_home=PDB_ORIG, model_home=MODELS, reset=False)

#Extract_Disulfides(numb=1000, pdbdir=PDB_GOOD, datadir=MODELS, verbose=False, quiet=False)

PDB_SS = None
PDB_SS = DisulfideLoader(verbose=True)

ss_list = DisulfideList([], 'tmp')
PDB_SS.TotalDisulfides


Reading disulfides from: /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_ss.pkl
Disulfides Read: 8210
Reading disulfide dict from: /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_ss_dict.pkl
Reading Torsion DF /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_SS_torsions.csv.
Read torsions DF.
PDB IDs parsed: 1000
Total Space Used: 2035005 bytes.


8210

In [4]:

# one disulfide from the database
ss = Disulfide()
ss = PDB_SS[0]
print(f'SS: {ss.pprint_all()}')


# get all disulfides for one structure. Make a 
# DisulfideList object to hold it
ss4yys = DisulfideList([], '4yys')
ss4yys = PDB_SS['4yys']

#ss4crn = DisulfideList([], '1crn')
#ss4crn = PDB_SS['1crn']

tot_ss = len(ss4yys) # number off ssbonds
print(f'tot {tot_ss}')


<Disulfide 4yys_22A_65A SourceID: 4yys Proximal: 22 A Distal: 65 A
 Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))> 
Proximal Coordinates:
   N: <Vector -2.36, -20.48, 5.21>
   Cα: <Vector -2.10, -19.89, 3.90>
   C: <Vector -1.12, -18.78, 4.12>
   O: <Vector -1.30, -17.96, 5.03>
   Cβ: <Vector -3.38, -19.31, 3.32>
   Sγ: <Vector -3.24, -18.40, 1.76>
   Cprev <Vector -2.67, -21.75, 5.36>
   Nnext: <Vector -0.02, -18.76, 3.36>
 Distal Coordinates:
   N: <Vector -0.60, -18.71, -1.62>
   Cα: <Vector -0.48, -19.10, -0.22>
   C: <Vector 0.92, -19.52, 0.18>
   O: <Vector 1.10, -20.09, 1.25>
   Cβ: <Vector -1.48, -20.23, 0.08>
   Sγ: <Vector -3.22, -19.69, 0.18>
   Cprev <Vector -0.73, -17.44, -2.01>
   Nnext: <Vector 1.92, -19.18, -0.63>

 Proximal Internal Coordinates:
   N: <Vector -0.41, 1.40, -0.00>
   Cα: <Vector 0.00, 0.00, 0.00>
   C: <Vector 1.50, 0.00, 0.00>
   O: <Vector 2.12, 0.71, -0.80>
   Cβ: <Vector -0.50, -0.70,

In [6]:
sslist = PDB_SS.SSList
tors_df = build_torsion_df(sslist)
tors_df.head()

100%|█████████████████████████████████████████████████████████| 8210/8210 [00:06<00:00, 1283.67it/s]


Unnamed: 0,source,ss_id,proximal,distal,chi1,chi2,chi3,chi4,chi5,energy,ca_distance,phi_prox,psi_prox,phi_dist,psi_dist
0,4yys,4yys_22A_65A,22,65,174.629233,82.51771,-83.322249,-62.523644,-73.827286,1.696237,4.502086,128.68679,128.68679,-105.731172,16.431694
1,4yys,4yys_56A_98A,56,98,-50.239063,-85.583916,97.275447,70.535692,179.046592,2.112566,4.967417,-30.489936,-30.489936,-59.378573,125.462589
2,4yys,4yys_156A_207A,156,207,62.598713,172.940042,-95.352637,-23.070934,-55.15848,2.331733,5.292317,150.741801,150.741801,60.457994,22.170381
3,4yys,4yys_22B_65B,22,65,173.666078,88.297996,-82.387276,-65.997032,-72.289506,1.958823,4.532387,130.848015,130.848015,-103.903213,16.298008
4,4yys,4yys_56B_98B,56,98,-56.410909,-81.401941,94.310784,67.035993,178.852441,1.444608,4.768629,-30.319989,-30.319989,-53.571262,120.853837


In [4]:
ss6fuf = PDB_SS['6fuf']


In [5]:
tors = pd.DataFrame()
tors = PDB_SS.getTorsions('')
tors

Unnamed: 0,source,ss_id,proximal,distal,chi1,chi2,chi3,chi4,chi5,energy,ca_distance,phi_prox,psi_prox,phi_dist,psi_dist
0,4yys,4yys_22A_65A,22,65,174.629233,82.517710,-83.322249,-62.523644,-73.827286,1.696237,4.502086,-149.776903,128.686790,-105.731172,16.431694
1,4yys,4yys_56A_98A,56,98,-50.239063,-85.583916,97.275447,70.535692,179.046592,2.112566,4.967417,-113.982916,-30.489936,-59.378573,125.462589
2,4yys,4yys_156A_207A,156,207,62.598713,172.940042,-95.352637,-23.070934,-55.158480,2.331733,5.292317,-156.930659,150.741801,60.457994,22.170381
3,4yys,4yys_22B_65B,22,65,173.666078,88.297996,-82.387276,-65.997032,-72.289506,1.958823,4.532387,-154.247515,130.848015,-103.903213,16.298008
4,4yys,4yys_56B_98B,56,98,-56.410909,-81.401941,94.310784,67.035993,178.852441,1.444608,4.768629,-113.329915,-30.319989,-53.571262,120.853837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8205,2p5r,2p5r_44B_92B,44,92,-124.400230,14.097260,87.928807,51.580778,-164.615423,6.941233,4.936201,-103.960336,93.039551,-52.651014,115.486532
8206,4qyz,4qyz_143A_253A,143,253,-69.050704,48.503674,150.647932,71.508814,-78.969552,7.378103,6.269383,-65.493849,-49.756331,-33.643753,-34.724290
8207,4qyz,4qyz_143A_250A,143,250,-69.050704,161.401761,-123.410979,-178.756423,170.219368,4.229833,7.803253,-65.493849,-49.756331,-75.614403,152.676285
8208,3tdj,3tdj_206A_261A,206,261,-43.122049,-78.811213,-89.985262,-53.676273,-50.840338,2.058045,5.446051,46.516562,39.500316,-114.399870,102.881614


In [6]:
ss1 = ss4yys.get_by_name('4yys_22A_65A')
ss1

<Disulfide 4yys_22A_65A SourceID: 4yys Proximal: 22 A Distal: 65 A>

In [7]:
Check_chains('4yys', PDB_GOOD)

ssbond dict: {1: ('22', '65', 'A', 'A'), 2: ('56', '98', 'A', 'A'), 3: ('156', '207', 'A', 'A'), 4: ('22', '65', 'B', 'B'), 5: ('56', '98', 'B', 'B'), 6: ('156', '207', 'B', 'B')}
multiple chains. [<Chain id=A>, <Chain id=B>]
Chain: A, length: 504
Chain: B, length: 454
chain lengths are unequal: [504, 454]


False

In [8]:
ss4yys_a = ss4yys.by_chain('A')
ss4yys_a


DisulfideList([<Disulfide 4yys_22A_65A SourceID: 4yys Proximal: 22 A Distal: 65 A>,
               <Disulfide 4yys_56A_98A SourceID: 4yys Proximal: 56 A Distal: 98 A>,
               <Disulfide 4yys_156A_207A SourceID: 4yys Proximal: 156 A Distal: 207 A>])

In [9]:
ss4yys_b = ss4yys.by_chain('B')
ss4yys_b

DisulfideList([<Disulfide 4yys_22B_65B SourceID: 4yys Proximal: 22 B Distal: 65 B>,
               <Disulfide 4yys_56B_98B SourceID: 4yys Proximal: 56 B Distal: 98 B>,
               <Disulfide 4yys_156B_207B SourceID: 4yys Proximal: 156 B Distal: 207 B>])

In [10]:
ss4yys_a1 = ss4yys_a[0]
print(ss4yys_a1.repr_ss_coords())



Proximal Coordinates:
   N: <Vector -2.36, -20.48, 5.21>
   Cα: <Vector -2.10, -19.89, 3.90>
   C: <Vector -1.12, -18.78, 4.12>
   O: <Vector -1.30, -17.96, 5.03>
   Cβ: <Vector -3.38, -19.31, 3.32>
   Sγ: <Vector -3.24, -18.40, 1.76>
   Cprev <Vector -2.67, -21.75, 5.36>
   Nnext: <Vector -0.02, -18.76, 3.36>
 Distal Coordinates:
   N: <Vector -0.60, -18.71, -1.62>
   Cα: <Vector -0.48, -19.10, -0.22>
   C: <Vector 0.92, -19.52, 0.18>
   O: <Vector 1.10, -20.09, 1.25>
   Cβ: <Vector -1.48, -20.23, 0.08>
   Sγ: <Vector -3.22, -19.69, 0.18>
   Cprev <Vector -0.73, -17.44, -2.01>
   Nnext: <Vector 1.92, -19.18, -0.63>




In [11]:
ss4yys_b1 = ss4yys_b[0]
ss4yys_b1

<Disulfide 4yys_22B_65B SourceID: 4yys Proximal: 22 B Distal: 65 B>

In [12]:
Distance_RMS(ss4yys_a1, ss4yys_b1)

0.1145026461405659

In [13]:
Torsion_RMS(ss4yys_a1, ss4yys_b1)

7.045760800877229

In [14]:
ss4yys_a1 == ss4yys_b1

False

In [15]:
chns = ss4yys.get_chains()
ss4yys.has_chain('yyy')
chns

{'A', 'B'}

In [16]:
# load SS bonds by PDB ID
ss1 = PDB_SS['4yys']
print(ss1)
print(ss1[0].get_full_id())


[<Disulfide 4yys_22A_65A SourceID: 4yys Proximal: 22 A Distal: 65 A>, <Disulfide 4yys_56A_98A SourceID: 4yys Proximal: 56 A Distal: 98 A>, <Disulfide 4yys_156A_207A SourceID: 4yys Proximal: 156 A Distal: 207 A>, <Disulfide 4yys_22B_65B SourceID: 4yys Proximal: 22 B Distal: 65 B>, <Disulfide 4yys_56B_98B SourceID: 4yys Proximal: 56 B Distal: 98 B>, <Disulfide 4yys_156B_207B SourceID: 4yys Proximal: 156 B Distal: 207 B>]
(('4yys', 0, 'A', (' ', 22, ' ')), ('4yys', 0, 'A', (' ', 65, ' ')))


In [17]:
# you can loop over the IDList list and extract by ID
#
for id in PDB_SS.IDList[:5]:    # just show last 5
    # get the SS bonds for the given ID
    ssb = PDB_SS[id]
    numb_ss = len(ssb)
    print(f'ID: {id} has {numb_ss} Disulfides:')
    for bond in ssb:
        print(bond)
    print('\n')
    

ID: 4yys has 6 Disulfides:
<Disulfide 4yys_22A_65A SourceID: 4yys Proximal: 22 A Distal: 65 A>
<Disulfide 4yys_56A_98A SourceID: 4yys Proximal: 56 A Distal: 98 A>
<Disulfide 4yys_156A_207A SourceID: 4yys Proximal: 156 A Distal: 207 A>
<Disulfide 4yys_22B_65B SourceID: 4yys Proximal: 22 B Distal: 65 B>
<Disulfide 4yys_56B_98B SourceID: 4yys Proximal: 56 B Distal: 98 B>
<Disulfide 4yys_156B_207B SourceID: 4yys Proximal: 156 B Distal: 207 B>


ID: 1j5h has 2 Disulfides:
<Disulfide 1j5h_37A_47A SourceID: 1j5h Proximal: 37 A Distal: 47 A>
<Disulfide 1j5h_88A_93A SourceID: 1j5h Proximal: 88 A Distal: 93 A>


ID: 1mfe has 1 Disulfides:
<Disulfide 1mfe_137L_196L SourceID: 1mfe Proximal: 137 L Distal: 196 L>


ID: 1chv has 4 Disulfides:
<Disulfide 1chv_3S_21S SourceID: 1chv Proximal: 3 S Distal: 21 S>
<Disulfide 1chv_14S_38S SourceID: 1chv Proximal: 14 S Distal: 38 S>
<Disulfide 1chv_42S_53S SourceID: 1chv Proximal: 42 S Distal: 53 S>
<Disulfide 1chv_54S_59S SourceID: 1chv Proximal: 54 S Distal

In [18]:
ss_list = PDB_SS.getlist()
ss0 = ss_list[0]

print(ss0.proximal_residue_fullid)
print(ss0.chi3)
len(ss_list)


('4yys', 0, 'A', (' ', 22, ' '))
-83.32224872066772


8210

In [19]:
# routine creates 2 lists  for left-handed and right-handed disulfides 
ss_list = PDB_SS.getlist()
left_handed = DisulfideList([], 'left_handed')
right_handed = DisulfideList([], 'right_handed')

i = 0

for i in range(len(ss_list)):
    ss = ss_list[i]
    if ss.chi3 < 0:
        left_handed.append(ss)
    else:
        right_handed.append(ss)


print(f'Left Handed: {len(left_handed)}, Right Handed: {len(right_handed)}')



Left Handed: 4397, Right Handed: 3813


In [21]:
from proteusPy.Disulfide import Disulfide

# make some empty disulfides
ss1 = Disulfide('ss1')
ss2 = Disulfide('ss2')

# make a DisulfideList containing ss1, named 'tmp'
sslist = DisulfideList([ss1], 'tmp')
sslist.append(ss2)

# load the PDB Disulfide database
PDB_SS = None
PDB_SS = DisulfideLoader(verbose=True)

# extract a disulfide with typical index
ss1 = PDB_SS[0]
print(f'{ss1.pprint_all()}')

# grab a subset via slicing
subset = DisulfideList(PDB_SS[0:10],'subset')

Reading disulfides from: /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_ss.pkl
Disulfides Read: 8210
Reading disulfide dict from: /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_ss_dict.pkl
Reading Torsion DF /Users/egs/repos/proteusPy/proteusPy/data/PDB_all_SS_torsions.csv.
Read torsions DF.
PDB IDs parsed: 1000
Total Space Used: 2035005 bytes.
<Disulfide 4yys_22A_65A SourceID: 4yys Proximal: 22 A Distal: 65 A
 Proximal Chain fullID: <('4yys', 0, 'A', (' ', 22, ' '))> Distal Chain fullID: <('4yys', 0, 'A', (' ', 65, ' '))> 
Proximal Coordinates:
   N: <Vector -2.36, -20.48, 5.21>
   Cα: <Vector -2.10, -19.89, 3.90>
   C: <Vector -1.12, -18.78, 4.12>
   O: <Vector -1.30, -17.96, 5.03>
   Cβ: <Vector -3.38, -19.31, 3.32>
   Sγ: <Vector -3.24, -18.40, 1.76>
   Cprev <Vector -2.67, -21.75, 5.36>
   Nnext: <Vector -0.02, -18.76, 3.36>
 Distal Coordinates:
   N: <Vector -0.60, -18.71, -1.62>
   Cα: <Vector -0.48, -19.10, -0.22>
   C: <Vector 0.92, -19.52, 0.18>
   O: <Vector 1.10, -20.

In [69]:
torsions = PDB_SS.getTorsions()
torsions.sort_values(by=['energy'], ascending=False, inplace=True)

torsions.head(10)

Unnamed: 0,source,ss_id,proximal,distal,chi1,chi2,chi3,chi4,chi5,energy,ca_distance,phi_prox,psi_prox,phi_dist,psi_dist
1858,1zjk,1zjk_629A_660A,629,660,-119.404445,115.451877,-156.958041,-27.389791,-113.040397,17.289549,5.848614,-180.0,-180.0,-180.0,-180.0
5983,5ix5,5ix5_28A_49A,28,49,-125.198921,85.810593,165.177044,-110.014736,-109.484554,16.993266,6.445898,-72.579873,-42.872608,-115.16897,166.069531
7142,5i70,5i70_249B_285B,249,285,-118.572818,17.63204,27.714934,103.400599,-105.519889,16.852911,4.762222,-72.765441,-41.35128,-131.509368,146.384733
7,1j5h,1j5h_88A_93A,88,93,-130.028193,-138.597573,27.09182,101.727177,-109.684946,16.823423,5.291536,-94.998658,-22.831376,-51.633735,-179.647589
7743,7vn9,7vn9_369E_396E,369,396,-1.307355,-110.315764,35.017574,105.456355,114.05783,16.636493,3.985665,-68.656248,138.354646,-65.497012,149.767509
1360,2mtm,2mtm_4A_18A,4,18,-145.62164,100.815883,-11.727692,87.895308,-109.13406,16.367176,4.795191,-105.157431,-39.053441,-88.013097,129.32238
5982,5ix5,5ix5_24A_47A,24,47,-120.552249,81.976395,-168.373887,-60.690634,-138.385992,14.558078,6.201594,-121.287702,-52.569386,-104.576661,119.777325
3694,7lfs,7lfs_305C_309C,305,309,-121.368685,-34.955346,-166.201944,99.814757,-35.352797,14.428365,6.12367,-88.21344,95.513576,-68.695803,-40.604028
4432,2m96,2m96_8A_21A,8,21,-144.956128,104.80275,153.936652,-117.600915,-89.623184,14.300415,6.59087,-140.41327,163.464552,-126.424594,124.185942
5742,1igr,1igr_425A_458A,425,458,-97.434381,-82.004776,-152.7775,32.773367,-115.644343,14.204259,5.743282,-53.767106,137.688874,-103.019429,-111.890077


In [85]:
idx_max = int(torsions['energy'].idxmax())
print(f'IDMAX: {idx_max}')
ssmax = PDB_SS[idx_max]
ssmax

IDMAX: 1858


<Disulfide 1zjk_629A_660A SourceID: 1zjk Proximal: 629 A Distal: 660 A>

<Disulfide 1zjk_629A_660A SourceID: 1zjk Proximal: 629 A Distal: 660 A>

In [92]:
badlist = torsions['ss_id']

bad_SS_list = DisulfideList([],'20 top high energy')
ss = Disulfide()

for i in range(30):
    ssid = torsions.iloc[i]['ss_id']
    ss = PDB_SS.get_by_name(ssid)
    print(f'ID: {ss.name}:{ss.energy:.2f} CA: {ss.ca_distance:.2f}')
    bad_SS_list.append(ss)


ID: 1zjk_629A_660A:17.29 CA: 5.85
ID: 5ix5_28A_49A:16.99 CA: 6.45
ID: 5i70_249B_285B:16.85 CA: 4.76
ID: 1j5h_88A_93A:16.82 CA: 5.29
ID: 7vn9_369E_396E:16.64 CA: 3.99
ID: 2mtm_4A_18A:16.37 CA: 4.80
ID: 5ix5_24A_47A:14.56 CA: 6.20
ID: 7lfs_305C_309C:14.43 CA: 6.12
ID: 2m96_8A_21A:14.30 CA: 6.59
ID: 1igr_425A_458A:14.20 CA: 5.74
ID: 6l5g_6A_167A:14.20 CA: 6.17
ID: 7czr_391B_525B:14.04 CA: 3.91
ID: 1ejo_2646H_2701H:13.93 CA: 5.94
ID: 4jun_144D_148D:13.89 CA: 5.61
ID: 5k9o_52I_277I:13.76 CA: 5.82
ID: 4hzc_227A_259A:13.71 CA: 4.84
ID: 2abz_18D_58D:13.55 CA: 4.06
ID: 5wkf_23J_104J:13.52 CA: 6.90
ID: 4uao_214B_215C:13.44 CA: 5.99
ID: 7e4u_52I_173J:13.42 CA: 5.49
ID: 4zg6_149A_195A:13.30 CA: 5.98
ID: 7djn_124A_169A:13.26 CA: 6.14
ID: 7e50_558B_566B:13.23 CA: 6.37
ID: 5wkf_158D_187E:13.18 CA: 6.41
ID: 6n5e_22F_90F:13.04 CA: 6.54
ID: 2rng_52A_70A:12.88 CA: 6.11
ID: 6uoa_108B_135B:12.86 CA: 5.59
ID: 7t6v_98R_176R:12.76 CA: 7.09
ID: 2m96_15A_34A:12.74 CA: 6.47
ID: 5l1b_718D_773D:12.66 CA: 5.23


DisulfideList([<Disulfide 1zjk_629A_660A SourceID: 1zjk Proximal: 629 A Distal: 660 A>,
               <Disulfide 5ix5_28A_49A SourceID: 5ix5 Proximal: 28 A Distal: 49 A>,
               <Disulfide 5i70_249B_285B SourceID: 5i70 Proximal: 249 B Distal: 285 B>,
               <Disulfide 1j5h_88A_93A SourceID: 1j5h Proximal: 88 A Distal: 93 A>,
               <Disulfide 7vn9_369E_396E SourceID: 7vn9 Proximal: 369 E Distal: 396 E>,
               <Disulfide 2mtm_4A_18A SourceID: 2mtm Proximal: 4 A Distal: 18 A>,
               <Disulfide 5ix5_24A_47A SourceID: 5ix5 Proximal: 24 A Distal: 47 A>,
               <Disulfide 7lfs_305C_309C SourceID: 7lfs Proximal: 305 C Distal: 309 C>,
               <Disulfide 2m96_8A_21A SourceID: 2m96 Proximal: 8 A Distal: 21 A>,
               <Disulfide 1igr_425A_458A SourceID: 1igr Proximal: 425 A Distal: 458 A>,
               <Disulfide 6l5g_6A_167A SourceID: 6l5g Proximal: 6 A Distal: 167 A>,
               <Disulfide 7czr_391B_525B SourceID: 7czr Prox