# Analysis of the Protein Data Bank

In [1]:
import numpy as np
import gzip
import glob,io
from sys import stdout
from shutil import copyfile
import os
import json
import mdtraj as md
from pdbfixer import PDBFixer
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
print(md.version.version)

pdbs = np.loadtxt('cullpdb_pc25_res2.5_R0.25_d170705_entries10388.18833',comments={'IDs'},usecols=(0,),dtype=str)

try:
    wdir
except NameError:
    wdir=%pwd
else:
    %cd $wdir
    
%mkdir pdbs

1.7.2


#### Rsync is used to download the whole PDB in the pdbs subfolder

%rsync -rlpt -v -z --partial --delete --port=33444 rsync.rcsb.org::ftp_data/structures/divided/pdb/ ./pdbs

### Create a folder containing subset of sequences culled from the entire PDB using PISCES

In [3]:
missing = np.empty(0)
print('PISCES selected:',pdbs.size,'entries')
PDBlist = pdbs.tolist()
ids = np.empty(0)
for i in PDBlist:
    if os.path.isfile(wdir+'/pdbs/'+i[1:3]+'/pdb'+i[0:4]+'.ent.gz'):
        copyfile(wdir+'/pdbs/'+i[1:3]+'/pdb'+i[0:4]+'.ent.gz', wdir+'/pisces/pdb'+i[0:4]+'.ent.gz')
        if (i[0:4] not in ids): 
            ids = np.append(ids,i[0:4])
    elif (i[0:4] not in missing): 
        missing = np.append(missing,i[1:3])
print(missing.size,'entries have not been found in the RCSB PDB')
print(missing)

('PISCES selected:', 10388, 'entries')


  


(0, 'entries have not been found in the RCSB PDB')
[]


### Extract K, R, D, E residues from the structures

In [19]:
dirname = wdir+'/pisces_gunzip'
if not os.path.isdir(dirname):
    %mkdir -p $dirname

for name in ids:    
    if not os.path.isfile(dirname+'/pdb'+name+'.pdb'):
        outfile = dirname+'/pdb'+name+'.pdb'

        original = wdir+'/pisces/pdb'+name+'.ent.gz'

        with io.TextIOWrapper(gzip.open(original, 'rb')) as f:
            with open(outfile, 'w') as text_file:
                text_file.write(f.read())
                text_file.close()

        pdb = ''
        with open(outfile, 'r') as f:
            for line in f:
                if line.split()[0] == 'ENDMDL':
                    pdb = pdb + 'END'
                    break
                if line.split()[0] == 'ATOM':
                    if line.split()[3] in ['LYS','ARG','GLU','ASP']:
                        pdb = pdb + str(line)

        with open(outfile, 'w') as text_file:
            text_file.write(pdb)
            text_file.close()

### Find structures containing stacked pairs of R or K residues

In [4]:
lyslys = np.empty(0,dtype=str) # entries with stacked K
gdmgdm = np.empty(0,dtype=str) # entries with stacked R
lyslys_wh = np.empty(0,dtype=str) # entries with stacked K and no hydrogen atoms
gdmgdm_wh = np.empty(0,dtype=str) # entries with stacked R and no hydrogen atoms

dirname = wdir+'/stacked'
if not os.path.isdir(dirname):
    %mkdir -p $dirname

for name in ids:
        
    filename = wdir+'/pisces_gunzip/pdb'+name+'.pdb'

    outfile = wdir+'/stacked/pdb'+name+'.pdb'

    pdb = ''
    with open(filename, 'r') as f:
        for line in f:
            if line.split()[0] == 'ATOM':
                pdb = pdb + str(line) 
            if line.split()[0] == 'END':
                pdb = pdb + str(line)
            if line.split()[0] == 'ENDMDL':
                pdb = pdb + 'END'
                break
                
    if len(pdb) == 0:
        continue
    
    if pdb[0:3] == 'END':
        continue
    
    with open(outfile, 'w') as text_file:
        text_file.write(pdb)
        text_file.close()  
                    
    t = md.load_pdb(outfile) 
    
    cz = t.topology.select('name CZ and resname ARG')
    
    ce = t.topology.select('name CE and resname LYS')
    
    # CE atoms closer than 4 Å
    if (ce.size > 1):
        ce_pairs = t.topology.select_pairs(ce,ce)
        ce_dist = md.compute_distances(t,ce_pairs,periodic=False)
        ce_dist = ce_dist.reshape(ce_dist.size)
        for i in range(0,len(ce_dist)):
            if (ce_dist[i] <= .4):
                if (name not in lyslys):
                    lyslys = np.append(lyslys,name)
                    if (t.topology.select('type H').size == 0):
                        if (name not in lyslys_wh):
                            lyslys_wh = np.append(lyslys_wh, name)
                            break
                            
    # CZ atoms closer than 4 Å
    if (cz.size > 1):
        cz_pairs = t.topology.select_pairs(cz,cz)
        cz_dist = md.compute_distances(t,cz_pairs,periodic=False)
        cz_dist = cz_dist.reshape(cz_dist.size)
        for i in range(0,len(cz_dist)):
            if (cz_dist[i] <= .4):
                if (name not in gdmgdm):
                    gdmgdm = np.append(gdmgdm,name)
                    if (t.topology.select('type H').size == 0):
                        if (name not in gdmgdm_wh):
                            gdmgdm_wh = np.append(gdmgdm_wh, name)
                            break
print(gdmgdm.size,gdmgdm_wh.size)
print(lyslys.size,lyslys_wh.size)

(1697, 1594)
(423, 395)


### Extract R, E, D and add missing hydrogen atoms with PDBFixer

In [5]:
dirname = 'argarg'
if not os.path.isdir(wdir+'/'+dirname):
    %mkdir -p $dirname
    
for name in gdmgdm:
    
    oldfile = wdir+'/stacked/pdb'+name+'.pdb'
    newfile = wdir+'/argarg/pdb'+name+'.pdb'
    
    pdb = ''
    with open(oldfile, 'r') as f:
        for line in f:
            if line.split()[0] == 'ATOM':
                if line.split()[3] in ['ARG','GLU','ASP']:
                    pdb = pdb + str(line)

    with open(newfile, 'w') as text_file:
        text_file.write(pdb)
        text_file.close()

    if name in gdmgdm_wh:
        
        t = md.load_pdb(newfile) 

        fixer = PDBFixer(filename=newfile)
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology, fixer.positions, open(newfile, 'w'))

### Find stacked R side chains interacting with two carboxyl groups

In [6]:
results_file = wdir+'/results_r.dat'
results = ''
print('Number of entries with stacked R residues:',gdmgdm.size)

for name in gdmgdm:
    
    file = wdir+'/argarg/pdb'+name+'.pdb'

    t = md.load_pdb(file) 
    
    cz = t.topology.select('name CZ and resname ARG')

    if (cz.size > 1):
        cz_pairs = t.topology.select_pairs(cz,cz)
        cz_dist = md.compute_distances(t,cz_pairs,periodic=False)
        cz_dist = cz_dist.reshape(cz_dist.size)
        for i in range(0,len(cz_dist)):
            if (cz_dist[i] <= .4):
                #print(cz_dist[i])
                pairs = np.empty(0,dtype=int)
                carboxyl = np.empty(0,dtype=int)
                n_pairs = 0
                acc_r1 = np.empty(0)
                acc_r2 = np.empty(0)

                r1 = t.topology.atom(cz_pairs[i][0]).residue.index
                r2 = t.topology.atom(cz_pairs[i][1]).residue.index
                                
                names_r = "(name NH1 or name NH2 or name NE)"
                names_e = "(name OE1 or name OE2)"
                names_d = "(name OD1 or name OD2)"
                atoms_r1 = t.topology.select("(resid "+str(r1)+") and "+names_r)
                atoms_r2 = t.topology.select("(resid "+str(r2)+") and "+names_r)
                atoms_coo = t.topology.select("(resname GLU) and "+names_e)
                atoms_coo = np.append(atoms_coo, t.topology.select("(resname ASP) and "+names_d))
                hb = md.wernet_nilsson(t,periodic=False)
                for k in hb[0]:
                    acc = t.topology.atom(k[2]).residue.index
                    don = t.topology.atom(k[0]).residue.index
                    if (don in [r1,r2]):
                        if (k[2] in atoms_coo):
                            if (k[0] in atoms_r1):
                                acc_r1 = np.append(acc_r1,acc)
                                if (acc not in carboxyl):
                                    carboxyl = np.append(carboxyl,acc) 
                            if (k[0] in atoms_r2):
                                acc_r2 = np.append(acc_r2,acc)
                                if (acc not in carboxyl):
                                    carboxyl = np.append(carboxyl,acc) 
                if carboxyl.size == 2:
                    n_pairs += 1
                    pairs = np.append(pairs,r1)
                    pairs = np.append(pairs,r2) 
                    
                    res_names = [t.topology.residue(carboxyl[0]).name,t.topology.residue(carboxyl[1]).name]
                    
                    
                    same_chain1 = t.top.residue(pairs[0]).chain.index == t.top.residue(pairs[1]).chain.index
                    same_chain2 = t.top.residue(carboxyl[0]).chain.index == t.top.residue(carboxyl[1]).chain.index
                    same_chain3 = t.top.residue(pairs[0]).chain.index == t.top.residue(carboxyl[1]).chain.index
                    same_chain = same_chain1 and same_chain2 and same_chain3
                    if (same_chain == True): same_chain = 'intra'
                    else: same_chain = 'inter'
                    
                    catename = '-'
                    if (acc_r1.size==0 or acc_r2.size==0):
                        catename = 'single'
                        dirname = wdir+'/argarg/'+catename
                        if not os.path.isdir(dirname):
                            %mkdir -p $dirname
                    elif (np.intersect1d(acc_r1, acc_r2).size==0):
                        catename = 'double'
                        dirname = wdir+'/argarg/'+catename
                        if not os.path.isdir(dirname):
                            %mkdir -p $dirname
                    else:
                        catename = 'bridge'
                        dirname = wdir+'/argarg/'+catename
                        if not os.path.isdir(dirname):
                            %mkdir -p $dirname
                        
                    final = dirname+'/'+name+'.pdb'
        
                    allat="((resid " + str(r1) + " or resid " + str(r2)
                    allat=allat+" or resid " + str(carboxyl[0]) + " or resid " + str(carboxyl[1]) + ")"
                    allat=allat+" and not (name C or name CA or name O or name N or name CB or name H"
                    allat=allat+" or name HA3 or name HB3 or name HG3 or name HD3 or name H2 or name H3"
                    allat=allat+" or name HA2 or name HB2 or name HG2 or name HD2 or name HA))"
                    if t.topology.residue(carboxyl[0]).name == "ASP":
                        allat=allat+" or (resid " + str(carboxyl[0]) + " and name CB)"
                    if t.topology.residue(carboxyl[1]).name == "ASP":
                        allat=allat+" or (resid " + str(carboxyl[1]) + " and name CB)"
                    
                    #print(allat)
                    
                    traj = t.atom_slice(t.topology.select(allat))
                    traj.save_pdb(final)
                    
                    resids = np.append(pairs,carboxyl)
                    s = ' '
                    results1 = name+s+str(res_names[0])+s+str(res_names[1])+s+str(same_chain)+s+str(catename) 
                    results2 = str(resids[0])+s+str(resids[1])+s+str(resids[2])+s+str(resids[3])+s+str(cz_dist[i])
                    results = results + results1+s+results2+'\n'
                    print(results1+s+results2)
                    break
                    
with open(results_file, 'w') as text_file:
        text_file.write(results)
        text_file.close()

('Number of entries with stacked R residues:', 1697)
2I71 ASP GLU inter double 44 67 43 51 0.366248
5HZU ASP ASP inter single 20 135 21 136 0.363093
1V4E GLU GLU inter double 25 84 82 23 0.354867
4YJ1 ASP ASP intra single 38 48 47 40 0.377139
1K32 ASP GLU intra single 64 203 70 137 0.343541
1WB9 ASP ASP intra single 187 227 184 186 0.357506
4OPM ASP GLU intra single 38 41 44 45 0.379949
4M85 GLU ASP intra single 108 109 123 111 0.35399
2NXF ASP ASP intra double 30 38 36 37 0.374988
3TPD ASP ASP intra bridge 33 35 17 34 0.356259
5CWG GLU GLU intra single 25 36 26 34 0.39944
1YLL GLU GLU inter double 102 138 149 113 0.352558
2ZDS ASP ASP inter double 385 466 454 373 0.372875
3D3S GLU ASP intra single 90 91 96 92 0.376258
5A6W ASP GLU inter single 25 29 14 24 0.393773
3DTZ GLU GLU inter single 37 40 81 41 0.384087
4HTP GLU ASP intra double 13 17 14 26 0.356107
3N0U GLU GLU inter double 20 148 21 147 0.379427
5IKJ ASP GLU intra double 27 65 52 28 0.35913
1V7Z ASP ASP inter double 58 158 57

  mask = np.logical_and(distances < cutoffs, angles < angle_cutoff)


In [13]:
results_file = wdir+'/results_r.dat'
ids = np.loadtxt(results_file,dtype=str,usecols=(0,))
cate = np.loadtxt(results_file,dtype=str,usecols=(4,))
res = np.loadtxt(results_file,dtype=int,usecols=(5,6,7,8))
conn = np.loadtxt(results_file,dtype=str,usecols=(3,))
double = 0
single = 0
bridge = 0
inter = 0
intra = 0
d = np.empty(0,dtype=str)
s = np.empty(0,dtype=str)
b = np.empty(0,dtype=str)
for i in range(ids.size):
    name = ids[i]    
    if cate[i] == 'single':
        single += 1
        s = np.append(s,name)
    if cate[i] == 'double':
        double += 1
        d = np.append(d,name)
    if cate[i] == 'bridge':
        bridge += 1
        b = np.append(b,name)
    if conn[ids.tolist().index(name)] == 'inter':
        inter += 1   
    if conn[ids.tolist().index(name)] == 'intra':
        intra += 1
print('Entries featuring the mode of interaction')
occur = float(double + single + bridge + 2)
print occur
print('fraction of I, II, III')
print (double+1)/231.,(single+1)/231.,bridge/231. 
print('fraction of inter-chain, intra-chain')
print inter/229.,intra/229. 
print('Number of I, II, III')
print(double,single,bridge)
print('check that there is no overlap between categories')
print(np.unique(d).size,d.size)
print(np.intersect1d(s,b).size)
print(np.unique(s).size,s.size)
print(np.unique(b).size,b.size)
print('PDB entries in each category')
print('--------')
print(r', '.join(i for i in d.astype(str)))
print('--------')
print(r', '.join(i for i in s.astype(str)))
print('--------')
print(r', '.join(i for i in b.astype(str)))

Entries featuring the mode of interaction
231.0
fraction of I, II, III
0.558441558442 0.329004329004 0.112554112554
fraction of inter-chain, intra-chain
0.454148471616 0.545851528384
Number of I, II, III
(128, 75, 26)
check that there is no overlap between categories
(128, 128)
0
(75, 75)
(26, 26)
PDB entries in each category
--------
2I71, 1V4E, 2NXF, 1YLL, 2ZDS, 4HTP, 3N0U, 5IKJ, 1V7Z, 3EKH, 1UI5, 2QCU, 5LUS, 5ELN, 2FIQ, 4ZBO, 4X2P, 5IPY, 5I0G, 2XXP, 1GQI, 5HHJ, 1M5Q, 3P7L, 2GMH, 3AHN, 3OXP, 4EU9, 1C8U, 4ML9, 5G3Q, 3H14, 5HNO, 1JIX, 4PE6, 5EJ8, 5L7A, 3ESL, 1M55, 4OGE, 5B2R, 5BP8, 1R2J, 2AHU, 4PZ3, 2APJ, 1PN0, 2WFW, 1F5V, 5A0N, 1PIX, 2CAY, 5UAM, 2AL6, 4ZFL, 3E3M, 3AK5, 4KPP, 3HKA, 5D6O, 2DQB, 3KKI, 4MUV, 3PPL, 3E57, 5FVN, 4BHU, 5J41, 5NGD, 5K8P, 3T6S, 4TW5, 1YLX, 4QXL, 5CZL, 3I3W, 3OOQ, 3M33, 3R44, 1CS0, 5HT2, 5H3Z, 1VJV, 3PF7, 1T0B, 2I00, 2JE6, 5LX8, 3BLZ, 5KLP, 5CYW, 1KAE, 4CPC, 2Z0J, 3NG7, 2HA8, 2RH0, 2OIZ, 3C8D, 2VS7, 5CQG, 3MEM, 1VJU, 5G23, 2ZGY, 5K3W, 3LVY, 3CK1, 2WBM, 4PW2, 1Z4

### Find stacked K side chains interacting with two carboxyl groups

In [8]:
dirname = 'lyslys'
if not os.path.isdir(wdir+'/'+dirname):
    %mkdir -p $dirname
    
for name in lyslys:
    
    oldfile = wdir+'/stacked/pdb'+name+'.pdb'
    newfile = wdir+'/lyslys/pdb'+name+'.pdb'
    
    pdb = ''
    with open(oldfile, 'r') as f:
        for line in f:
            if line.split()[0] == 'ATOM':
                if line.split()[3] in ['LYS','GLU','ASP']:
                    pdb = pdb + str(line)

    with open(newfile, 'w') as text_file:
        text_file.write(pdb)
        text_file.close()

    if name in lyslys_wh:
        
        t = md.load_pdb(newfile) 

        fixer = PDBFixer(filename=newfile)
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology, fixer.positions, open(newfile, 'w'))

In [9]:
results_file = wdir+'/results_k.dat'
results = ''
print('Number of entries with stacked K residues:',lyslys.size)

for name in lyslys:
    
    file = wdir+'/lyslys/pdb'+name+'.pdb'

    t = md.load_pdb(file) 
    
    ce = t.topology.select('name CE and resname LYS')

    if (ce.size > 1):
        ce_pairs = t.topology.select_pairs(ce,ce)
        ce_dist = md.compute_distances(t,ce_pairs,periodic=False)
        ce_dist = ce_dist.reshape(ce_dist.size)
        for i in range(0,len(ce_dist)):
            if (ce_dist[i] <= .4):
                pairs = np.empty(0,dtype=int)
                carboxyl = np.empty(0,dtype=int)
                n_pairs = 0
                acc_r1 = np.empty(0)
                acc_r2 = np.empty(0)

                r1 = t.topology.atom(ce_pairs[i][0]).residue.index
                r2 = t.topology.atom(ce_pairs[i][1]).residue.index
                                
                names_r = "(name NZ)"
                names_e = "(name OE1 or name OE2)"
                names_d = "(name OD1 or name OD2)"
                atoms_r1 = t.topology.select("(resid "+str(r1)+") and "+names_r)
                atoms_r2 = t.topology.select("(resid "+str(r2)+") and "+names_r)
                atoms_coo = t.topology.select("(resname GLU) and "+names_e)
                atoms_coo = np.append(atoms_coo, t.topology.select("(resname ASP) and "+names_d))
                hb = md.wernet_nilsson(t,periodic=False)
                for k in hb[0]:
                    acc = t.topology.atom(k[2]).residue.index
                    don = t.topology.atom(k[0]).residue.index
                    if (don in [r1,r2]):
                        if (k[2] in atoms_coo):
                            if (k[0] in atoms_r1):
                                acc_r1 = np.append(acc_r1,acc)
                                if (acc not in carboxyl):
                                    carboxyl = np.append(carboxyl,acc) 
                            if (k[0] in atoms_r2):
                                acc_r2 = np.append(acc_r2,acc)
                                if (acc not in carboxyl):
                                    carboxyl = np.append(carboxyl,acc) 
                if carboxyl.size == 2:
                    n_pairs += 1
                    pairs = np.append(pairs,r1)
                    pairs = np.append(pairs,r2) 
                    
                    res_names = [t.topology.residue(carboxyl[0]).name,t.topology.residue(carboxyl[1]).name]
                    
                    
                    same_chain1 = t.top.residue(pairs[0]).chain.index == t.top.residue(pairs[1]).chain.index
                    same_chain2 = t.top.residue(carboxyl[0]).chain.index == t.top.residue(carboxyl[1]).chain.index
                    same_chain3 = t.top.residue(pairs[0]).chain.index == t.top.residue(carboxyl[1]).chain.index
                    same_chain = same_chain1 and same_chain2 and same_chain3
                    if (same_chain == True): same_chain = 'intra'
                    else: same_chain = 'inter'
                    
                    catename = '-'
                    if (acc_r1.size==0 or acc_r2.size==0):
                        catename = 'single'
                        dirname = wdir+'/lyslys/'+catename
                        if not os.path.isdir(dirname):
                            %mkdir -p $dirname
                    elif (np.intersect1d(acc_r1, acc_r2).size==0):
                        catename = 'double'
                        dirname = wdir+'/lyslys/'+catename
                        if not os.path.isdir(dirname):
                            %mkdir -p $dirname
                    else:
                        catename = 'bridge'
                        dirname = wdir+'/lyslys/'+catename
                        if not os.path.isdir(dirname):
                            %mkdir -p $dirname
                        
                    final = dirname+'/'+name+'.pdb'
        
                    allat="(resid "+str(r1)+" or resid "+str(r2)
                    allat=allat+" or resid "+str(carboxyl[0])+" or resid "+str(carboxyl[1])+")"
                    
                    traj = t.atom_slice(t.topology.select(allat))
                    traj.save_pdb(final)

                    resids = np.append(pairs,carboxyl)
                    s = ' '
                    results1 = name+s+str(res_names[0])+s+str(res_names[1])+s+str(same_chain)+s+str(catename) 
                    results2 = str(resids[0])+s+str(resids[1])+s+str(resids[2])+s+str(resids[3])+s+str(ce_dist[i])
                    results = results + results1+s+results2+'\n'
                    print(results1+s+results2)
                    break
                    
with open(results_file, 'w') as text_file:
        text_file.write(results)
        text_file.close()

('Number of entries with stacked K residues:', 423)
2XMP ASP GLU intra single 11 13 97 96 0.387812
3M1R GLU GLU intra double 232 238 231 193 0.384207
5FOE GLU GLU inter double 26 27 24 122 0.377795
1O88 ASP GLU intra double 16 17 22 14 0.345613
2SQC GLU GLU inter double 44 147 148 45 0.323039
5JRH ASP ASP intra single 0 76 89 87 0.366404
4N13 GLU GLU intra double 39 49 47 40 0.380001
3IX3 ASP ASP inter double 7 34 35 8 0.398684
2W42 GLU GLU intra single 103 104 121 119 0.396204
4EQL ASP ASP intra single 13 15 66 6 0.360326
5JJA GLU ASP inter double 31 132 37 30 0.37975
1WWJ ASP ASP intra double 2 7 21 12 0.377612
4G5H ASP ASP intra double 1 26 0 20 0.384661


In [16]:
results_file = wdir+'/results_k.dat'
ids = np.loadtxt(results_file,dtype=str,usecols=(0,))
cate = np.loadtxt(results_file,dtype=str,usecols=(4,))
res = np.loadtxt(results_file,dtype=int,usecols=(5,6,7,8))
conn = np.loadtxt(results_file,dtype=str,usecols=(3,))
double = 0
single = 0
bridge = 0
inter = 0
intra = 0
d = np.empty(0,dtype=str)
s = np.empty(0,dtype=str)
b = np.empty(0,dtype=str)
for i in range(ids.size):
    name = ids[i]    
    if cate[i] == 'single':
        single += 1
        s = np.append(s,name)
    if cate[i] == 'double':
        double += 1
        d = np.append(d,name)
    if cate[i] == 'bridge':
        bridge += 1
        b = np.append(b,name)
    if conn[ids.tolist().index(name)] == 'inter':
        inter += 1   
    if conn[ids.tolist().index(name)] == 'intra':
        intra += 1
print('Entries featuring the mode of interaction')
occur = float(double + single + bridge)
print occur
print('fraction of I, II, III')
print double/13.,single/13.,bridge/13. 
print('fraction of inter-chain, intra-chain')
print inter/13.,intra/13. 
print('Number of I, II, III')
print(double,single,bridge)
print('check that there is no overlap between categories')
print(np.unique(d).size,d.size)
print(np.intersect1d(s,b).size)
print(np.unique(s).size,s.size)
print(np.unique(b).size,b.size)
print('PDB entries in each category')
print('--------')
print(r', '.join(i for i in d.astype(str)))
print('--------')
print(r', '.join(i for i in s.astype(str)))
print('--------')
print(r', '.join(i for i in b.astype(str)))

Entries featuring the mode of interaction
13.0
fraction of I, II, III
0.692307692308 0.307692307692 0.0
fraction of inter-chain, intra-chain
0.307692307692 0.692307692308
Number of I, II, III
(9, 4, 0)
check that there is no overlap between categories
(9, 9)
0
(4, 4)
(0, 0)
PDB entries in each category
--------
3M1R, 5FOE, 1O88, 2SQC, 4N13, 3IX3, 5JJA, 1WWJ, 4G5H
--------
2XMP, 5JRH, 2W42, 4EQL
--------

