# Inserting implicit hydrogens ...
## ...and mapping ASE atom indices with (atom name, residue) tuples 

as often used in protein-related context, e.g. in .pdb files

Attention: when reading a Gromacs .top file with Parmed, all force-field related files should be accessible 
OR all imports (and the according entries) should be removed from the .top file, otherwise Parmed will not load

In [1]:
import ast
import h5py
import ase.io
import parmed as pmd
from parmed import gromacs
from insertHbyList import insertHbyList

In [23]:
# dictionary on which "particles" are united carbohydrates and how many hydrogens should be inserted
# done for all atoms with that name, regardless of the atom's residue 
implicitHbondingPartners = {'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2}

infile_pdb = 'system100.pdb'
infile_top = 'system100.top'

ua_ase_struct = ase.io.read(infile_pdb)
ua_pmd_struct = pmd.load_file(infile_pdb)
ua_pmd_top = gromacs.GromacsTopologyFile(infile_top,parametrize=False)
# throws some warnings on angle types, does not matter for bonding info
# if error thrown, just try to "reduce" .top as far as possible
ua_pmd_top.strip(':SOL,CL') # strip water and electrolyte from system (if not yet done in .top)
ua_pmd_top.box = ua_pmd_struct.box # Needed because .pdb contains box info
ua_pmd_top.positions = ua_pmd_struct.positions

aa_ase_struct, aa_pmd_struct, names, residues = insertHbyList(ua_ase_struct,ua_pmd_top,
        implicitHbondingPartners,1.0)

ua_count = len(ase_struct)     # united atoms structure
aa_count = len(new_ase_struct) # all atoms structure



Adding 1 H-atoms to CD3 (#8)...
bondingPartners [ 9 21]
Atom CD3 already has bonding partners CD4, CB1
Adding H-atom 1CD3 at position [ 23.911579364757202, 25.466622991293512, 12.349999999999998 ]
Adding 1 H-atoms to CD4 (#9)...
bondingPartners [10 12]
Atom CD4 already has bonding partners CD5, CA1
Adding H-atom 1CD4 at position [ 25.312471957818314, 25.072450829115194, 10.489569761035874 ]
Adding 2 H-atoms to CA2 (#15)...
bondingPartners [16]
Atom CA2 already has bonding partners CA3
Adding H-atom 1CA2 at position [ 28.74261935828807, 23.557590675857806, 11.104589188842278 ]
bondingPartners [ 16 105]
Atom CA2 already has bonding partners CA3, 1CA2
Adding H-atom 2CA2 at position [ 28.85271690173004, 23.702220854918263, 9.389826475843604 ]
Adding 2 H-atoms to CA3 (#16)...
bondingPartners [17]
Atom CA3 already has bonding partners NA1
Adding H-atom 1CA3 at position [ 30.129780545620445, 25.445820959539404, 10.879183744085134 ]
bondingPartners [ 17 107]
Atom CA3 already has bonding partne



In [24]:
ua_ase_struct

Atoms(symbols='C44H38N6O15', pbc=True, cell=[[28.873, 0.0, 0.0], [0.0, 28.873, 0.0], [14.436500000000004, 14.436500000000004, 20.416294093199184]])

In [26]:
ua_pmd_top.atoms

AtomList([
	<Atom CE1 [0]; In terB 0>
	<Atom HE1 [1]; In terB 0>
	<Atom HE2 [2]; In terB 0>
	<Atom CD1 [3]; In terB 0>
	<Atom HD1 [4]; In terB 0>
	<Atom CD2 [5]; In terB 0>
	<Atom HD2 [6]; In terB 0>
	<Atom OD1 [7]; In terB 0>
	<Atom CD3 [8]; In terB 0>
	<Atom CD4 [9]; In terB 0>
	<Atom CD5 [10]; In terB 0>
	<Atom HD3 [11]; In terB 0>
	<Atom CA1 [12]; In terB 0>
	<Atom OA1 [13]; In terB 0>
	<Atom OA2 [14]; In terB 0>
	<Atom CA2 [15]; In terB 0>
	<Atom CA3 [16]; In terB 0>
	<Atom NA1 [17]; In terB 0>
	<Atom HA1 [18]; In terB 0>
	<Atom HA2 [19]; In terB 0>
	<Atom HA3 [20]; In terB 0>
	<Atom CB1 [21]; In terB 0>
	<Atom OB1 [22]; In terB 0>
	<Atom OB2 [23]; In terB 0>
	...
	<Atom HB1 [98]; In terA 2>
	<Atom HB2 [99]; In terA 2>
	<Atom HB3 [100]; In terA 2>
	<Atom CD6 [101]; In terA 2>
	<Atom HD4 [102]; In terA 2>
])

In [28]:
len(ua_ase_struct)

103

In [27]:
len(ua_pmd_struct.atoms)

103

In [29]:
len(aa_ase_struct)

133

In [32]:
len(aa_pmd_struct.atoms)

133

In [33]:
names

['CE1',
 'HE1',
 'HE2',
 'CD1',
 'HD1',
 'CD2',
 'HD2',
 'OD1',
 'CD3',
 'CD4',
 'CD5',
 'HD3',
 'CA1',
 'OA1',
 'OA2',
 'CA2',
 'CA3',
 'NA1',
 'HA1',
 'HA2',
 'HA3',
 'CB1',
 'OB1',
 'OB2',
 'CB2',
 'CB3',
 'NB1',
 'HB1',
 'HB2',
 'HB3',
 'CD6',
 'HD4',
 'CD1',
 'HD1',
 'CD2',
 'HD2',
 'OD1',
 'CD3',
 'CD4',
 'CD5',
 'HD3',
 'CA1',
 'OA1',
 'OA2',
 'CA2',
 'CA3',
 'NA1',
 'HA1',
 'HA2',
 'HA3',
 'CB1',
 'OB1',
 'OB2',
 'CB2',
 'CB3',
 'NB1',
 'HB1',
 'HB2',
 'HB3',
 'CD6',
 'HD4',
 'CC1',
 'HC1',
 'CC2',
 'CC3',
 'HC2',
 'CC4',
 'HC3',
 'CC5',
 'HC4',
 'CC6',
 'HC5',
 'CC7',
 'HC6',
 'CD1',
 'HD1',
 'CD2',
 'HD2',
 'OD1',
 'CD3',
 'CD4',
 'CD5',
 'HD3',
 'CA1',
 'OA1',
 'OA2',
 'CA2',
 'CA3',
 'NA1',
 'HA1',
 'HA2',
 'HA3',
 'CB1',
 'OB1',
 'OB2',
 'CB2',
 'CB3',
 'NB1',
 'HB1',
 'HB2',
 'HB3',
 'CD6',
 'HD4',
 '1CD3',
 '1CD4',
 '1CA2',
 '2CA2',
 '1CA3',
 '2CA3',
 '1CB2',
 '2CB2',
 '1CB3',
 '2CB3',
 '1CD3',
 '1CD4',
 '1CA2',
 '2CA2',
 '1CA3',
 '2CA3',
 '1CB2',
 '2CB2',
 '1CB3',
 '2CB

In [34]:
residues

['terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'OXO0',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terA',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 'terB',
 

In [35]:
atom_residue_list = list(zip(names,residues))

In [36]:
ase_id = range(aa_count)

In [42]:
ase2pmd = dict(zip(ase_id,atom_residue_list))

In [43]:
pmd2ase = dict(zip(atom_residue_list,ase_id))

In [44]:
ase2pmd

{0: ('CE1', 'terB'),
 1: ('HE1', 'terB'),
 2: ('HE2', 'terB'),
 3: ('CD1', 'terB'),
 4: ('HD1', 'terB'),
 5: ('CD2', 'terB'),
 6: ('HD2', 'terB'),
 7: ('OD1', 'terB'),
 8: ('CD3', 'terB'),
 9: ('CD4', 'terB'),
 10: ('CD5', 'terB'),
 11: ('HD3', 'terB'),
 12: ('CA1', 'terB'),
 13: ('OA1', 'terB'),
 14: ('OA2', 'terB'),
 15: ('CA2', 'terB'),
 16: ('CA3', 'terB'),
 17: ('NA1', 'terB'),
 18: ('HA1', 'terB'),
 19: ('HA2', 'terB'),
 20: ('HA3', 'terB'),
 21: ('CB1', 'terB'),
 22: ('OB1', 'terB'),
 23: ('OB2', 'terB'),
 24: ('CB2', 'terB'),
 25: ('CB3', 'terB'),
 26: ('NB1', 'terB'),
 27: ('HB1', 'terB'),
 28: ('HB2', 'terB'),
 29: ('HB3', 'terB'),
 30: ('CD6', 'terB'),
 31: ('HD4', 'terB'),
 32: ('CD1', 'OXO0'),
 33: ('HD1', 'OXO0'),
 34: ('CD2', 'OXO0'),
 35: ('HD2', 'OXO0'),
 36: ('OD1', 'OXO0'),
 37: ('CD3', 'OXO0'),
 38: ('CD4', 'OXO0'),
 39: ('CD5', 'OXO0'),
 40: ('HD3', 'OXO0'),
 41: ('CA1', 'OXO0'),
 42: ('OA1', 'OXO0'),
 43: ('OA2', 'OXO0'),
 44: ('CA2', 'OXO0'),
 45: ('CA3', 'OXO0')

In [46]:
pmd2ase[('1CA3', 'terA')]

127