In [1]:
cd /home/drew/Projects/Serotonin/NBOHvNapAnalysis/CASTp/6WHA

/scr/drew/Serotonin/NBOHvNapAnalysis/CASTp/6WHA


In [2]:
import bioinf as bi

receptor_chain = "A"
pdb = "6WHA"
alpha_5_chain = "B"
alpha_helix_lowest_resSeq = 224
receptor_pocket_residues_dict = {}
cutoff = 5

class Pocket(object):
    def __init__(self, pseudoresidue, pocket_id):
        self._pseudoresidue = pseudoresidue
        self._pocket_id = pocket_id
        
    def is_closer_than(self, d, residue):
        return self._pseudoresidue.is_closer_than(d, residue)
    
    def get_resSeqs(self):
        resSeqs_set = {atom.resSeq for atom in self._pseudoresidue.atoms}
        sorted_resSeqs = [int(resSeq) for resSeq in resSeqs_set]
        sorted_resSeqs.sort()
        return [f'{resSeq}' for resSeq in sorted_resSeqs]
    
    def get_serials(self):
        serials_set = {atom.serial for atom in self._pseudoresidue.atoms}
        sorted_serials = [int(serial) for serial in serials_set]
        sorted_serials.sort()
        return [f'{serial}' for serial in sorted_serials]
    
    pocket_id = property(lambda self: self._pocket_id)
    atoms = property(lambda self: self._pseudoresidue.atoms)


In [3]:
with open(f'{pdb}_{receptor_chain}.poc') as f:
    for line in f:
        cleanline = line.strip()
        atom_line = bi.PDBAtomLine.parse_string(cleanline)
        if atom_line.chainID == receptor_chain and int(atom_line.resSeq) < 1000: #over 1000 used for added residues
            pocket_id = cleanline.split()[-2]
            atom_line_ids = receptor_pocket_residues_dict.get(pocket_id, [])
            atom_line_ids.append(f'{atom_line.resSeq}_{atom_line.name}') #assume resSeq and name are consistent b/n poc and pdb
            receptor_pocket_residues_dict[pocket_id] = atom_line_ids
            
chains_atom_lines = {}
pockets_atom_lines = {}
with open(f'{pdb}.pdb') as f:
    for line in f:
        cleanline = line.strip()
        if not cleanline.startswith('ATOM'):
            continue
        atom_line = bi.PDBAtomLine.parse_string(cleanline)
        chain_ID = atom_line.chainID
        if chain_ID == receptor_chain:
            for pocket_id, atom_ids in receptor_pocket_residues_dict.items():
                if f'{atom_line.resSeq}_{atom_line.name}' in atom_ids:
                    atom_lines = pockets_atom_lines.get(pocket_id, [])
                    atom_lines.append(atom_line)
                    pockets_atom_lines[pocket_id] = atom_lines
                    break
            continue
        if chain_ID != alpha_5_chain:
            continue
        if int(atom_line.resSeq) < alpha_helix_lowest_resSeq:
            continue
        atom_lines = chains_atom_lines.get(chain_ID, [])
        atom_lines.append(atom_line)
        chains_atom_lines[chain_ID] = atom_lines

gprot_chains = [bi.PDBProtein(v) for v in chains_atom_lines.values()]
pockets = [Pocket(bi.PDBResidue([bi.PDBAtom(atom_line) for atom_line in atom_lines]),k) for (k,atom_lines) in pockets_atom_lines.items()]# if k in receptor_pocket_residues_dict.keys()]

In [4]:
close_gprot_pockets = [p for p in pockets if any([protein.is_closer_than(cutoff, p) for protein in gprot_chains])] 
# FIXME might inefficiently calculate whether each chain is closer than the cutoff before asking whether any are.
#gprot_pocket_residues.sort()

In [12]:
#serial_set = {int(num) for sublist in [r.get_serials() for r in close_gprot_pockets] for num in sublist}
# serials = list(serial_set)
# serials.sort()
atoms = [f'resid {atom.resSeq} and name {atom.name}' for sublist in [r.atoms for r in close_gprot_pockets] for atom in sublist]
print(" or ".join(atoms))

resid 105 and name O or resid 105 and name CD2 or resid 107 and name CB or resid 107 and name CG or resid 107 and name OD1 or resid 107 and name ND2 or resid 108 and name CA or resid 108 and name C or resid 108 and name O or resid 108 and name CB or resid 109 and name N or resid 109 and name CA or resid 109 and name CB or resid 109 and name OG1 or resid 109 and name CG2 or resid 110 and name CG or resid 110 and name OD1 or resid 110 and name ND2 or resid 111 and name CD2 or resid 111 and name CE2 or resid 112 and name CB or resid 112 and name CD2 or resid 112 and name CE2 or resid 113 and name CG or resid 113 and name CD2 or resid 168 and name C or resid 168 and name O or resid 168 and name CB or resid 169 and name CA or resid 169 and name O or resid 169 and name CG1 or resid 169 and name CG2 or resid 169 and name CD1 or resid 172 and name O or resid 172 and name CB or resid 172 and name CG or resid 172 and name OD1 or resid 172 and name OD2 or resid 173 and name N or resid 173 and nam

In [11]:
atoms

['resid 105 and name O',
 'resid 105 and name CD2',
 'resid 107 and name CB',
 'resid 107 and name CG',
 'resid 107 and name OD1',
 'resid 107 and name ND2',
 'resid 108 and name CA',
 'resid 108 and name C',
 'resid 108 and name O',
 'resid 108 and name CB',
 'resid 109 and name N',
 'resid 109 and name CA',
 'resid 109 and name CB',
 'resid 109 and name OG1',
 'resid 109 and name CG2',
 'resid 110 and name CG',
 'resid 110 and name OD1',
 'resid 110 and name ND2',
 'resid 111 and name CD2',
 'resid 111 and name CE2',
 'resid 112 and name CB',
 'resid 112 and name CD2',
 'resid 112 and name CE2',
 'resid 113 and name CG',
 'resid 113 and name CD2',
 'resid 168 and name C',
 'resid 168 and name O',
 'resid 168 and name CB',
 'resid 169 and name CA',
 'resid 169 and name O',
 'resid 169 and name CG1',
 'resid 169 and name CG2',
 'resid 169 and name CD1',
 'resid 172 and name O',
 'resid 172 and name CB',
 'resid 172 and name CG',
 'resid 172 and name OD1',
 'resid 172 and name OD2',
 'r

In [27]:
chains_atom_lines['14']

KeyError: '14'