In [1]:
# Playing with the Disulfide Class
# Author: Eric G. Suchanek, PhD.
# Biopython imports

import pandas as pd

from Bio.PDB import *
from Bio.PDB import PDBParser

# Eric's modules

from proteusPy import *
from proteusPy.disulfide import *
from proteusPy.turtle3D import *

# global directory for PDB files

PDB_DIR = '/Users/egs/PDB/good/'

PDBlist = PDBList(pdb=PDB_DIR, verbose=False)
parser = PDBParser(PERMISSIVE=True, QUIET=True)


PDB_ID = '5rsa'
structure = parser.get_structure(PDB_ID, file=f'{PDB_DIR}/pdb{PDB_ID}.ent')

model = structure[0]

ssbond_dict = structure.header['ssbond'] # NB: this requires the modified code
print(ssbond_dict)

disulfide_list = parse_ssbond_header_rec(ssbond_dict)
print(f'found: {disulfide_list}')



{1: ('26', '84', 'A', 'A'), 2: ('40', '95', 'A', 'A'), 3: ('58', '110', 'A', 'A'), 4: ('65', '72', 'A', 'A')}
found: [('26', '84', 'A', 'A'), ('40', '95', 'A', 'A'), ('58', '110', 'A', 'A'), ('65', '72', 'A', 'A')]


In [None]:
chain = model['A']
#prox = chain[30]
# dis = prox.is_disordered()
# print(f'{dis}')
# prox_atoms = prox.get_unpacked_list()

residues = chain.get_residues()
for res in residues:
    print(f'Residue: {res.get_id()} {res.get_resname()}')
reslist = res.get_list()

In [None]:

empty = []

SSList = []
SSList = load_disulfides_from_id(PDB_ID, model_numb=0, 
                                        verbose=True, pdb_dir=PDB_DIR)
'''for ss in SSList:
    ss_str = ss.print_compact()
    print(f'{ss_str}')'''

print(f'found: {len(SSList)} disulfides')




In [None]:
# create a list of Disulfides from a specific PDB file.
SSList = load_disulfides_from_id(PDB_ID, model_numb=0, 
                pdb_dir=PDB_DIR, verbose=True)

# since I've set up comparison operators for the class we can sort by energy easily
sortedSS = sorted(SSList)

for ss in sortedSS:
    print(f'SS: {ss.name} {ss.proximal} - {ss.distal}. Energy: {ss.energy:.4f}')


In [None]:
# Build a list of PDB files in PDB_DIR that are readable. These files were downloaded
# via the RCSB web query interface for structures containing >= 1 SS Bond.

import glob
import os
from tqdm import tqdm
import pandas as pd

os.chdir(PDB_DIR)

ss_filelist = glob.glob(f'*.ent')
tot = len(ss_filelist)
print(f'first level file scan: {tot} files')

entrylist = []
for entry in ss_filelist:
    entrylist.append(name_to_id(entry))

# loop over ss_filelist, create disulfides and initialize them
pbar = tqdm(entrylist[12000:16000], ncols=80)

df_cols = ['source', 'ss_id', 'proximal', 'distal', 'chi1', 'chi2', 'chi3', 'chi4',
           'chi5', 'energy']

SS_df = pd.DataFrame(columns=df_cols)
problem_ids = []

for entry in pbar:
    pbar.set_postfix({'File': entry})
    sslist = load_disulfides_from_id(entry, model_numb=0, verbose=False, pdb_dir=PDB_DIR)
    if len(sslist) != 0:
        for ss in sslist:
            new_row = [entry, ss.name, ss.proximal, ss.distal, ss.chi1, ss.chi2, ss.chi3, 
            ss.chi4, ss.chi5, ss.energy]
            SS_df.loc[len(SS_df.index)] = new_row
    else:
        problem_ids.append(entry)

print(f'Found: {len(problem_ids)} problem structures.')

In [None]:
SS_df.to_csv('disulfides.csv')
SS_df.describe()


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
chi1_data = SS_df['chi1']
chi1_data.plot(kind='hist')

In [None]:
chi2_data = SS_df['chi2']
chi2_data.plot(kind='hist')

In [None]:
chi3_data = SS_df['chi3']
chi3_data.plot(kind='hist')

In [None]:
chi4_data = SS_df['chi4']
chi4_data.plot(kind='hist')

In [None]:
chi5_data = SS_df['chi5']
chi5_data.plot(kind='hist')