In [2]:
from proteusPy import (
    DisulfideList,
    Disulfide,
    Load_PDB_SS,
    load_disulfides_from_id,
    prune_extra_ss,
)


# Load the PDB file
pdb = Load_PDB_SS(verbose=True, subset=False)
pdb.describe()

-> load_PDB_SS(): Reading /Users/egs/repos/proteusPy/proteusPy/data/PDB_SS_ALL_LOADER.pkl... 
-> load_PDB_SS(): Done reading /Users/egs/repos/proteusPy/proteusPy/data/PDB_SS_ALL_LOADER.pkl... 
PDB IDs present:                    35881
Disulfides loaded:                  169257
Average structure resolution:       2.55 Å
Lowest Energy Disulfide:            2q7q_75D_140D
Highest Energy Disulfide:           6vxk_801B_806B
Cα distance cutoff:                 -1.00 Å
Total RAM Used:                     42.77 GB.


In [3]:
def find_disulfides(pdb, id) -> DisulfideList:
    """
    Find disulfide in pdb object.
    """

    indices = pdb.SSDict[id]
    print(f"indices: {indices}")
    res = DisulfideList([], id)
    sslist = pdb.SSList
    for ind in indices:
        print(f"ind: {ind} sslist[ind]: {sslist[ind]}")
        res.append(sslist[ind])
    return res

In [4]:
# PDB_SS['4yys'] return a list of SS

sslist = find_disulfides(pdb, "4mwy")
sslist

indices: [123392, 123393, 123394, 123395, 123396, 123397, 123398, 123399, 123400]
ind: 123392 sslist[ind]: <Disulfide 4mwy_93A_419A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123393 sslist[ind]: <Disulfide 4mwy_125A_130A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123394 sslist[ind]: <Disulfide 4mwy_177A_195A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123395 sslist[ind]: <Disulfide 4mwy_185A_232A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123396 sslist[ind]: <Disulfide 4mwy_234A_239A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123397 sslist[ind]: <Disulfide 4mwy_280A_293A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123398 sslist[ind]: <Disulfide 4mwy_282A_291A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123399 sslist[ind]: <Disulfide 4mwy_320A_338A, Source: 4mwy, Resolution: 1.8 Å>
ind: 123400 sslist[ind]: <Disulfide 4mwy_423A_449A, Source: 4mwy, Resolution: 1.8 Å>


DisulfideList([<Disulfide 4mwy_93A_419A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_125A_130A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_177A_195A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_185A_232A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_234A_239A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_280A_293A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_282A_291A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_320A_338A, Source: 4mwy, Resolution: 1.8 Å>,
               <Disulfide 4mwy_423A_449A, Source: 4mwy, Resolution: 1.8 Å>])

In [5]:
def find_null_pdb_indices(pdb, limit=1000):
    """
    Loops over pdb entries from 0 to limit (default 1000) and checks each entry for null.
    Returns a list of indices with null entries.
    """
    null_indices = []
    ids = pdb.IDList
    for i in ids:
        if len(pdb[i]) == 0:
            null_indices.append(i)
    return null_indices


def find_null_pdb_keys(pdb, limit=1000):
    """
    Loops over pdb entries from 0 to limit (default 1000) and checks each entry for null.
    Returns a list of indices with null entries.
    """
    null_indices = []
    ssdict = pdb.SSDict
    for i in ssdict:
        if len(ssdict[i]) == 0:
            null_indices.append(i)
    return null_indices

In [6]:
missing = find_null_pdb_indices(pdb)
len(missing)
missing

[]

In [None]:
bad = []

for id in missing:
    res = load_disulfides_from_id(id, verbose=True)
    if len(res) == 0:
        print(f"ID {id} is missing disulfides")
        bad.append(id)
bad

In [7]:
idlist = pdb.IDList
"4mwy" in idlist

True

In [12]:
def find_disulfides(pdb, id) -> DisulfideList:
    """
    Find disulfide in pdb object.
    """

    indices = pdb.SSDict[id]
    # print(f"indices: {indices}")
    res = DisulfideList([], id)
    sslist = pdb.SSList
    for ind in indices:
        # print(f"ind: {ind} sslist[ind]: {sslist[ind]}")
        res.append(sslist[ind])
    return res

In [13]:
# PDB_SS['4yys'] return a list of SS

sslist = find_disulfides(pdb, "4wmy")
sslist

DisulfideList([<Disulfide 4wmy_31A_48A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_41A_70A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_94A_280A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_199A_259A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_251A_265A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_31B_48B, Source: 4wmy, Resolution: 1.6 Å>])

In [15]:
pdb["4wmy"]

DisulfideList([<Disulfide 4wmy_31A_48A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_41A_70A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_94A_280A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_199A_259A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_251A_265A, Source: 4wmy, Resolution: 1.6 Å>,
               <Disulfide 4wmy_31B_48B, Source: 4wmy, Resolution: 1.6 Å>])

In [None]:
# PDB_SS['4yys'] return a list of SS
item = "4mwy"
indices = pdb.SSDict[item]
res = DisulfideList([], item)
sslist = self.SSList
for ind in indices:
    res.append(sslist[ind])

In [None]:
pdblist = pdb.SSList
len(pdblist)
pdblist[211508]

In [None]:
pdb["4mwy"]

In [None]:
find_null_pdb_indices(pdb)

In [None]:
wym = load_disulfides_from_id("4wym", verbose=False)
wym

In [None]:
sslist, xchain = prune_extra_ss(wym)
sslist

In [None]:
from proteusPy import remove_duplicate_ss

pruned = remove_duplicate_ss(wym)
pruned

In [None]:
def find_string_in_list(target_string, list_of_strings):
    """
    Searches for a target string in a list of strings and returns the index if found.
    Returns -1 if the target string is not found.
    """
    try:
        return list_of_strings.index(target_string)
    except ValueError:
        return -1

In [None]:
find_string_in_list("4wmy", pdb.IDList)
pdb["4wmy"]

In [None]:
wmy = pdb["4wmy"]
wmy

In [None]:
ss1 = pdb["4yys_22A_65A"]
ss2 = pdb["4yys_22B_65B"]
ss1 == ss2

In [None]:
ss1.pprint_all()

In [None]:
ss2.pprint_all()

In [None]:
def remove_duplicate_ss(sslist: DisulfideList) -> DisulfideList:
    pruned = []
    for ss in sslist:
        if ss not in pruned:
            pruned.append(ss)
    return pruned

In [None]:
yys = pdb["4wmy"]
yys

In [None]:
pruned = remove_duplicate_ss(yys)
pruned

In [None]:
ssmax = minss.max

In [None]:
ssmin == ssmax

In [None]:
def compare_dihedrals(self, other) -> float:
    """
    Compare the Disulfide object's dihedrals to another Disulfide object's dihedrals.

    :param other: Disulfide object to compare to
    :return: The length of the difference of the two sets of dihedral angles
    :raises TypeError: If the input is not a Disulfide object
    """
    import numpy
    from Bio.PDB.vectors import Vector

    def cmp_vec(v1: Vector, v2: Vector) -> float:
        "Return the length of the difference between the two vectors."
        _diff = v2 - v1
        _len = _diff.norm()
        return _len

    if isinstance(other, Disulfide):
        dihed1 = Vector(self.torsion_array)
        dihed2 = Vector(other.torsion_array)
        return cmp_vec(dihed1, dihed2)
    else:
        raise TypeError("Input must be a Disulfide object.")

In [None]:
def Torsion_RMS(first, other) -> float:
    """
    Calculate the RMS distance between the dihedral angles of self and another Disulfide.
    :param other: Comparison Disulfide
    :return: RMS distance (deg)
    """
    import math

    # Get internal coordinates of both objects
    ic1 = first.torsion_array
    ic2 = other.torsion_array

    # Compute the sum of squared differences between corresponding internal coordinates
    totsq = sum((p1 - p2) ** 2 for p1, p2 in zip(ic1, ic2))
    # Compute the mean of the squared distances
    totsq /= len(ic1)

    # Take the square root of the mean to get the RMS distance
    return math.sqrt(totsq)

In [None]:
ss1 = pdb[0]
ss1

In [None]:
Torsion_RMS(ss1, ss1)