In [1]:
# DisulfideBond Playground
# Playing with the DisulfideBond class
# Author: Eric G. Suchanek, PhD.
# (c) 2023 Eric G. Suchanek, PhD., All Rights Reserved
# License: MIT
# Last Modification: 1/30/23
# Cα Cβ Sγ

import pandas as pd
import numpy

import pyvista as pv
from pyvista import set_plot_theme

from Bio.PDB import *

# for using from the repo we 
import proteusPy
from proteusPy import *
from proteusPy.data import *
from proteusPy.Disulfide import *
from proteusPy.DisulfideList import DisulfideList, load_disulfides_from_id
from proteusPy.DisulfideLoader import Load_PDB_SS

# pyvista setup for notebooks
pv.set_jupyter_backend('trame')
#set_plot_theme('dark')


In [2]:
PDB_SS = Load_PDB_SS(verbose=True)

-> load_PDB_SS(): Reading /Users/egs/repos/proteusPy/proteusPy/data/PDB_SS_ALL_LOADER.pkl... done.


In [3]:
PDB_SS.describe()

PDB IDs present:                    35818
Disulfides loaded:                  120697
Average resolution:                 2.23 Å
Lowest Energy Disulfide:            2q7q_75D_140D
Highest Energy Disulfide:           1toz_456A_467A
Total RAM Used:                     29.26 GB.


In [4]:
ss_df = pd.read_csv('2q7q_seqsim.csv')
ss_df.head()

Unnamed: 0,pdb_id,organism_scientific_name,tax_id,organism_synonyms,rank,genus,superkingdom,journal,journal_volume,journal_first_page,...,molecule_name,all_molecule_name,modified_residue_flag,molecule_type,mutation_type,entry_uniprot_accession,uniprot_id,molecule_synonym,gene_name,entity_id
0,2q7q,Paracoccus denitrificans,266,"Parde,Paracoccus Denitrificans,Micrococcus Den...","species,genus,family,order,class,phylum,superk...",Paracoccus,Bacteria,J. Mol. Biol.,276.0,,...,Methylamine dehydrogenase heavy chain,,N,Protein,Conflict,"P29894,P22619",DHMH_PARDE,"Methylamine dehydrogenase (amicyanin),Methylam...",mauB,1
1,2bbk,Paracoccus denitrificans,266,"Parde,Paracoccus Denitrificans,Micrococcus Den...","species,genus,family,order,class,phylum,superk...",Paracoccus,Bacteria,J. Mol. Biol.,276.0,,...,Methylamine dehydrogenase light chain,,Y,Protein,,"P29894,P22619",DHML_PARDE,"Methylamine dehydrogenase (amicyanin),MADH,Met...",mauA,2
2,2agy,Alcaligenes faecalis,511,"Achromobacter Sp. Atcc8750,Alcaligenes Sp. Bp1...","species,genus,family,order,class,phylum,superk...",Alcaligenes,Bacteria,Science,312.0,,...,Aralkylamine dehydrogenase light chain,,Y,Protein,,"P84887,P84888",AAUA_ALCFA,"Aromatic amine dehydrogenase,AADH,Aralkylamine...",aauA,1
3,2agy,Alcaligenes faecalis,511,"Achromobacter Sp. Atcc8750,Alcaligenes Sp. Bp1...","species,genus,family,order,class,phylum,superk...",Alcaligenes,Bacteria,Science,312.0,,...,Aralkylamine dehydrogenase heavy chain,,N,Protein,,"P84887,P84888",AAUB_ALCFA,"Aromatic amine dehydrogenase,Aralkylamine dehy...",aauB,2
4,2ah1,Alcaligenes faecalis,511,"Achromobacter Sp. Atcc8750,Alcaligenes Sp. Bp1...","species,genus,family,order,class,phylum,superk...",Alcaligenes,Bacteria,Science,312.0,,...,Aralkylamine dehydrogenase light chain,,Y,Protein,,"P84888,P84887",AAUA_ALCFA,"Aromatic amine dehydrogenase,AADH,Aralkylamine...",aauA,1


In [5]:
relative_list = ss_df['pdb_id'].unique()
relative_list

array(['2q7q', '2bbk', '2agy', '2ah1', '2ah0', '2agl', '2agx', '2hjb',
       '1mae', '2oiz', '2ojy', '2i0s', '2iup', '2iur', '2agw', '2hxc',
       '2i0r', '2iuv', '2i0t', '2mad', '2agz', '2hkr', '2hj4', '2ok4',
       '2hkm', '1maf', '2ok6', '2iuq', '3orv', '2h47', '2h3x', '3l4m',
       '3l4o', '2j57', '2j55', '2j56', '3pxt', '3sle', '3c75', '3rn0',
       '3sjl', '3pxw', '3pxs', '3rlm', '3rmz', '4fa1', '4fa9', '3sxt',
       '4l3h', '3rn1', '3sws', '4o1q', '4l3g', '4k3i', '4l1q', '4fan',
       '4fa5', '4fav', '4fb1', '3svw', '4y5r', '4fa4', '2iaa', '1mg3',
       '1mg2', '2gc4', '2gc7', '2mta'], dtype=object)

In [6]:
def find_ss(sslist, id):
    res = DisulfideList([], id)
    for ss in sslist:
        if ss.pdb_id == id:
            res.append(ss)
    return res



In [9]:
relatives = DisulfideList([], 'relatives')

for id in relative_list:
    rel = find_ss(PDB_SS.SSList, id)
    for ss in rel:
        relatives.append(ss)


In [11]:
relatives.length

317

In [12]:
relatives.Average_Energy

2.242235480740762

In [13]:
relatives.Average_Distance

3.1539267109112834

In [14]:
relatives.Average_Resolution

1.830283018867925

In [15]:
relatives.Average_Torsion_Distance

192.41984936964994