# Run a test calculation with `a-bam-p`/`acd-1-p` to find gaps in our workflow...

In [4]:
%load_ext autoreload
%autoreload 2

import os as os
import glob as glob
import numpy as np

import parmed as pmd
from paprika import align
from paprika import dummy
from paprika import build

from smirnovert.convert import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
prefix = './benchmarksets/input_files/'
suffix = '/prmtop-rst7/'
benchmarksets = [
    'cd-set1'
    ]

systems = []

for system in benchmarksets:
    for index, file in enumerate(sorted(glob.glob(prefix + system + suffix + 'acd-1-p.prmtop'))):
        systems.append(file)

## Re-convert!

## Load in the reference coordinates and SMIRNOFF99Frosst parameters

In [6]:
for file in systems:
    base_name = file[:-7].split('/')[-1]
    host_set = file[:-7].split('/')[-3]
    host = host_set.split('-')[0]
    guest = base_name.split('-')[1]
        
    destination = prefix + host_set + '/' + suffix + 'smirnoff/' + base_name + '/'
        
    reference_prmtop = base_name + '.prmtop'
    reference_inpcrd = base_name + '.rst7'
    
    name = 'smirnoff'
    
    if 'cb7' in host:
        host_resname = 'CB7'
    elif 'cd' in host:
        host_resname = 'MGO'
    elif 'gdcc' in host and 'temoa' in base_name:
        host_resname = 'OCB'
    elif 'gdcc' in host and 'oa' in base_name:
        host_resname = 'OCT'
    guest_resname = 'MOL'
    
    convert(destination, name, 
            reference_prmtop, reference_inpcrd, 
            host_resname, guest_resname)

INFO:root:Creating ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/smirnoff.pdb with CONECT records...
DEBUG:root:PDB file written by cpptraj.
INFO:root:Pruning water-water CONECT records...
DEBUG:root:First water residue = 151
DEBUG:root:Found first water CONECT entry at line = 6304
INFO:root:Splitting topology into components...
INFO:root:Creating a combined topology for the host and guest molecules...
INFO:root:Writing a `mol2` for the host molecule...
DEBUG:root:MOL2 file written by cpptraj.
INFO:root:Writing a `mol2` for the host molecule...
DEBUG:root:MOL2 file written by cpptraj.
INFO:root:Converting ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MGO.mol2 to SYBYL atom types via Antechamber...
DEBUG:root:MOL2 file written by antechamber.
INFO:root:Converting ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MOL.mol2 to SYBYL atom types via Antechamber...
DEBUG:root:MOL2 file written by antechamber.
INFO:root:Extracting water a

AttributeError: 'list' object has no attribute 'GetMaxAtomIdx'

## Testing!

In [9]:
def load_mol2(filename, name=None, add_tripos=True, flavor='FF'):
    """
    Converts a `mol2` file to an `OEMol` object.
    Parameters
    ----------
    filename : str
        MOL2 file
    name : str
        Residue name
    add_tripos : bool
        Whether to add Tripos atom names to the file

    Returns
    -------
    openeye.oechem.OEMol

    """
    logging.info(f'Loading {filename}...')
    ifs = oemolistream()
    if flavor is not None:
        flavor = OEIFlavor_MOL2_Forcefield
        ifs.SetFlavor(OEFormat_MOL2, flavor)
    molecules = []

    if not ifs.open(filename):
        logging.error(f'Unable to open {filename} for reading...')

    for mol in ifs.GetOEMols():
        if add_tripos:
            OETriposAtomNames(mol)
        if name:
            mol.SetTitle(name)
        # Add all the molecules in this file to a list, but only return the first one.
        molecules.append(OEMol(mol))
        # This should now handle single-residue and multi-residue hosts.
        return molecules[0]


In [11]:
asdf = load_mol2('./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MOL-sybyl.mol2')
from smirnovert.utils import check_unique_atom_names
check_unique_atom_names(asdf)

INFO:root:Loading ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MOL-sybyl.mol2...
INFO:root:Checking all atoms have unique names...
DEBUG:root:17 atoms in structure, 17 unique atom names.


In [13]:
from smirnovert.utils import load_mol2 as lm
load_mol2('./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MGO-sybyl.mol2')

INFO:root:Loading ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MGO-sybyl.mol2...


<openeye.oechem.OEMol; proxy of <Swig Object of type 'OEMolWrapper *' at 0x7f0641f5fcf0> >

In [14]:
lm('./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MGO-sybyl.mol2')

INFO:root:Loading ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MGO-sybyl.mol2...


[<openeye.oechem.OEMol; proxy of <Swig Object of type 'OEMolWrapper *' at 0x7f0641f5fe10> >]

## Make each atom type unique and only two characters!

In [294]:
mapping = unique_two_char_atom_types(reference_prmtop=destination + smirnoff_prmtop,
                          reference_inpcrd=destination + smirnoff_inpcrd,
                          host_resname=host_resname,
                          guest_resname=guest_resname,
                          prefix=destination + base_name)

148 bonds in structure.
(3) 1.O1 (33) 2.C4
(12) 1.C4 (108) 6.O1
(24) 2.O1 (54) 3.C4
(45) 3.O1 (75) 4.C4
(66) 4.O1 (96) 5.C4
(87) 5.O1 (117) 6.C4
Mapping C1 1 → Z0 in MGO
Mapping H1 2 → Z1 in MGO
Mapping O1 3 → Z2 in MGO
Mapping C2 4 → Z3 in MGO
Mapping H2 5 → Z4 in MGO
Mapping O2 6 → Z5 in MGO
Mapping HO2 7 → Z6 in MGO
Mapping C3 8 → Z7 in MGO
Mapping H3 9 → Z8 in MGO
Mapping O3 10 → Z9 in MGO
Mapping HO3 11 → Za in MGO
Mapping C4 12 → Zb in MGO
Mapping H4 13 → Zc in MGO
Mapping C5 14 → Zd in MGO
Mapping H5 15 → Ze in MGO
Mapping O5 16 → Zf in MGO
Mapping C6 17 → Zg in MGO
Mapping H61 18 → Zh in MGO
Mapping H62 19 → Zi in MGO
Mapping O6 20 → Zj in MGO
Mapping HO6 21 → Zk in MGO
Mapping C1 22 → Zl in MGO
Mapping H1 23 → Zm in MGO
Mapping O1 24 → Zn in MGO
Mapping C2 25 → Zo in MGO
Mapping H2 26 → Zp in MGO
Mapping O2 27 → Zq in MGO
Mapping HO2 28 → Zr in MGO
Mapping C3 29 → Zs in MGO
Mapping H3 30 → Zt in MGO
Mapping O3 31 → Zu in MGO
Mapping HO3 32 → Zv in MGO
Mapping C4 33 → Zw in MGO

Moreoever, we need to know the bonding information between residues...

In [280]:
bonds_between_monomers = []
for bond in hg.bonds:
    atom1, atom2 = bond.atom1, bond.atom2
    if atom1.residue != atom2.residue:
        # bonds_between_monomers.append(f'desc  model.{atom1.residue.number + 1}.{atom1.name}')
        bonds_between_monomers.append(f'bond model.{atom1.residue.number + 1}.{atom1.name} '
              f'model.{atom2.residue.number + 1}.{atom2.name}')
        # bonds_between_monomers.append(f'desc  model.{atom1.residue.number + 1}.{atom1.name}')
#         print(f'({atom1.type}) {atom1.residue.number + 1}.{atom1.name} '
#           f'({atom2.type}) {atom2.residue.number + 1}.{atom2.name}'
#         )
        print(f'({mapping[atom1.type]}) {atom1.residue.number + 1}.{atom1.name} '
          f'({mapping[atom2.type]}) {atom2.residue.number + 1}.{atom2.name}'
        )



(Z2) 1.O1 (Zw) 2.C4
(Zb) 1.C4 (ze) 6.O1
(Zn) 2.O1 (ZR) 3.C4
(ZI) 3.O1 (Z+) 4.C4
(Z#) 4.O1 (z2) 5.C4
(Z() 5.O1 (zn) 6.C4


In [281]:
sorted(bonds_between_monomers)

['bond model.1.C4 model.6.O1',
 'bond model.1.O1 model.2.C4',
 'bond model.2.O1 model.3.C4',
 'bond model.3.O1 model.4.C4',
 'bond model.4.O1 model.5.C4',
 'bond model.5.O1 model.6.C4']

## Align

In [282]:
# Find the guest atoms to align by reading first line in /data/nhenriksen/projects/cds/wat6/bgbg-tip3p/pdbs
hg = align.zalign(hg, ':MOL@C4', ':MOL@N1', save=True, filename=destination + 'hg-aligned.pdb')

INFO:root:Moving :MOL@C4 (1 atoms) to the origin...
INFO:root:Aligning :MOL@N1 (1 atoms) with the z axis...
INFO:root:Saved aligned coordinates to ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/hg-aligned.pdb


## Add dummy atoms

In [283]:
hg = pmd.load_file(destination + 'hg-aligned.pdb', structure=True)
# print(hg.atoms[-1].number)

hg = dummy.add_dummy(hg, residue_name='DM1', z=-6.000)
hg = dummy.add_dummy(hg, residue_name='DM2', z=-9.000)
hg = dummy.add_dummy(hg, residue_name='DM3', z=-11.200, y=2.200)

dummy.write_dummy_frcmod(path = destination)
dummy.write_dummy_mol2(path = destination, residue_name='DM1', filename='DM1.mol2')
dummy.write_dummy_mol2(path = destination, residue_name='DM2', filename='DM2.mol2')
dummy.write_dummy_mol2(path = destination, residue_name='DM3', filename='DM3.mol2')

hg.write_pdb(destination + 'hg-aligned-dum.pdb', renumber=False)

## Solvate

In [292]:
with open(destination + 'tleap.in', 'w') as file:
    tleap_header = f'''\n
source leaprc.water.tip3p
loadamberparams {base_name}-unique.frcmod
{host_resname} = loadmol2 {base_name}-{host_resname}-multi-unique.mol2
{guest_resname} = loadmol2 {base_name}-{guest_resname}-unique.mol2

DM1 = loadmol2 DM1.mol2
DM2 = loadmol2 DM2.mol2
DM3 = loadmol2 DM3.mol2
loadamberparams dummy.frcmod

model = loadpdb {str('hg-aligned-dum.pdb')}
bond model.1.O1 model.2.C4
bond model.2.O1 model.3.C4
bond model.3.O1 model.4.C4
bond model.4.O1 model.5.C4
bond model.5.O1 model.6.C4
bond model.6.O1 model.1.C4


'''
    file.write(tleap_header)
#     for line in bonds_between_monomers:
#         file.write(line + '\n')

In [293]:
build.solvate(tleap_file='tleap.in', pdb_file='hg-aligned-dum.pdb', pbc_type='rectangular', buffer_target=2000,
             output_prefix='hg-solvated', path=destination)

DEBUG:root:Cycle 00	 1 743 (2000)
DEBUG:root:Cycle 01	 11 1531 (2000)
DEBUG:root:Cycle 02	 21 2326 (2000)
DEBUG:root:Cycle 03	 16 1933 (2000)
DEBUG:root:Cycle 04	 16 1985 (2000)
DEBUG:root:Cycle 05	 16 1985 (2000)
DEBUG:root:Cycle 06	 16 1997 (2000)
DEBUG:root:Cycle 07	 16 2009 (2000)
DEBUG:root:Manually removing waters... ['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']
INFO:root:Cl-	1
INFO:root:DM1	1
INFO:root:DM2	1
INFO:root:DM3	1
INFO:root:MGO	6
INFO:root:MOL	1
INFO:root:WAT	2000


Let's check...

In [193]:
check = pmd.load_file(destination + 'hg-solvated.prmtop', destination + 'hg-solvated.rst7')

In [195]:
for bond in check.bonds:
    atom1, atom2 = bond.atom1, bond.atom2
    if atom1.residue != atom2.residue:
        print(f'bond model.{atom1.residue.number + 1}.{atom1.name} '
              f'model.{atom2.residue.number + 1}.{atom2.name}'
            )

In [206]:
for bond in check.bonds:


6046

In [215]:
check[':MGO'].bonds

TrackedList([
	<Bond <Atom C6 [16]; In MGO 0>--<Atom O6 [19]; In MGO 0>; type=<BondType; k=320.000, req=1.410>>
	<Bond <Atom C4 [11]; In MGO 0>--<Atom C5 [13]; In MGO 0>; type=<BondType; k=310.000, req=1.526>>
	<Bond <Atom C3 [7]; In MGO 0>--<Atom O3 [9]; In MGO 0>; type=<BondType; k=320.000, req=1.410>>
	<Bond <Atom C2 [3]; In MGO 0>--<Atom O2 [5]; In MGO 0>; type=<BondType; k=320.000, req=1.410>>
	<Bond <Atom C1 [0]; In MGO 0>--<Atom O1 [2]; In MGO 0>; type=<BondType; k=320.000, req=1.370>>
	<Bond <Atom C6 [37]; In MGO 1>--<Atom O6 [40]; In MGO 1>; type=<BondType; k=320.000, req=1.410>>
	<Bond <Atom C4 [32]; In MGO 1>--<Atom C5 [34]; In MGO 1>; type=<BondType; k=310.000, req=1.526>>
	<Bond <Atom C3 [28]; In MGO 1>--<Atom O3 [30]; In MGO 1>; type=<BondType; k=320.000, req=1.410>>
	<Bond <Atom C2 [24]; In MGO 1>--<Atom O2 [26]; In MGO 1>; type=<BondType; k=320.000, req=1.410>>
	<Bond <Atom C1 [21]; In MGO 1>--<Atom O1 [23]; In MGO 1>; type=<BondType; k=320.000, req=1.370>>
	<Bond <Atom

In [240]:
orig = pmd.load_file(destination + 'acd-1-p.prmtop', destination + 'acd-1-p.rst7')

In [295]:
for bond in orig.bonds:
    atom1, atom2 = bond.atom1, bond.atom2
    if atom1.residue != atom2.residue:
        print(f'bond model.{atom1.residue.number + 1}.{atom1.name} '
              f'model.{atom2.residue.number + 1}.{atom2.name}'
            )

bond model.1.C4 model.6.O1
bond model.1.O1 model.2.C4
bond model.2.O1 model.3.C4
bond model.3.O1 model.4.C4
bond model.4.O1 model.5.C4
bond model.5.O1 model.6.C4


In [299]:
from utils import convert

N.B. don't forget to make hydrogens start with `H` (or `h` possibly).

In [346]:
nonunique = convert(destination=destination, prefix='nonunique', reference_prmtop='acd-1-p.prmtop',
        reference_inpcrd='acd-1-p.rst7', host_resname='MGO', guest_resname='MOL')

INFO:root:Creating ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/nonunique.pdb with CONECT records...
DEBUG:root:PDB file written by cpptraj.
INFO:root:Pruning water-water CONECT records...
DEBUG:root:First water residue = 151
DEBUG:root:Found first water CONECT entry at line = 6304
INFO:root:Splitting topology into components...
INFO:root:Creating a combined topology for the host and guest molecules...
INFO:root:Writing a `mol2` for the host molecule...
DEBUG:root:MOL2 file written by cpptraj.
INFO:root:Writing a `mol2` for the host molecule...
DEBUG:root:MOL2 file written by cpptraj.
INFO:root:Converting ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MGO.mol2 to SYBYL atom types via Antechamber...
DEBUG:root:MOL2 file written by antechamber.
INFO:root:Converting ./benchmarksets/input_files/cd-set1//prmtop-rst7/smirnoff/acd-1-p/MOL.mol2 to SYBYL atom types via Antechamber...
DEBUG:root:MOL2 file written by antechamber.
INFO:root:Extracting water 

Check if the host-guest parameter file already exists...
Check if the host-guest coordinate file already exists...
Check if solvated parameter file already exists...
Check if solvated coordinate file already exists...


In [377]:
from utils import create_mapping

In [378]:
host_mapping, guest_mapping = create_mapping(nonunique, 'MGO', 'MOL')

In [379]:
from utils import remap_atom_types

In [394]:
tmp = pmd.amber.LoadParm(destination + 'smirnoff.prmtop')
remap_atom_types(tmp, 'MGO', host_mapping, 'MOL', guest_mapping, destination=destination)

Assigning 0 C1 1 → C0
Assigning 0 H1 2 → H0
Assigning 0 O1 3 → O0
Assigning 0 C2 4 → C1
Assigning 0 H2 5 → H1
Assigning 0 O2 6 → O1
Assigning 0 HO2 7 → H2
Assigning 0 C3 8 → C2
Assigning 0 H3 9 → H3
Assigning 0 O3 10 → O2
Assigning 0 HO3 11 → H4
Assigning 0 C4 12 → C3
Assigning 0 H4 13 → H5
Assigning 0 C5 14 → C4
Assigning 0 H5 15 → H6
Assigning 0 O5 16 → O3
Assigning 0 C6 17 → C5
Assigning 0 H61 18 → H7
Assigning 0 H62 19 → H8
Assigning 0 O6 20 → O4
Assigning 0 HO6 21 → H9
Assigning 1 C1 22 → C0
Assigning 1 H1 23 → H0
Assigning 1 O1 24 → O0
Assigning 1 C2 25 → C1
Assigning 1 H2 26 → H1
Assigning 1 O2 27 → O1
Assigning 1 HO2 28 → H2
Assigning 1 C3 29 → C2
Assigning 1 H3 30 → H3
Assigning 1 O3 31 → O2
Assigning 1 HO3 32 → H4
Assigning 1 C4 33 → C3
Assigning 1 H4 34 → H5
Assigning 1 C5 35 → C4
Assigning 1 H5 36 → H6
Assigning 1 O5 37 → O3
Assigning 1 C6 38 → C5
Assigning 1 H61 39 → H7
Assigning 1 H62 40 → H8
Assigning 1 O6 41 → O4
Assigning 1 HO6 42 → H9
Assigning 2 C1 43 → C0
Assigning 

In [388]:
guest_mapping

{0: 'N0',
 1: 'Ha',
 2: 'Hb',
 3: 'Hc',
 4: 'C6',
 5: 'Hd',
 6: 'He',
 7: 'C7',
 8: 'Hf',
 9: 'Hg',
 10: 'C8',
 11: 'Hh',
 12: 'Hi',
 13: 'C9',
 14: 'Hj',
 15: 'Hk',
 16: 'Hl'}

In [391]:
tmp = pmd.amber.LoadParm(destination + 'smirnoff.prmtop')
tmp.parm_data['AMBER_ATOM_TYPE']

['1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '60',
 '61',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67',
 '68',
 '69',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '80',
 '81',
 '82',
 '83',
 '84',
 '85',
 '86',
 '87',
 '88',
 '89',
 '90',
 '91',
 '92',
 '93',
 '94',
 '95',
 '96',
 '97',
 '98',
 '99',
 '100',
 '101',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '120',
 '121',
 '122',
 '123',
 '124',
 '125',
 '126',
 '127',
 '128',
 '129',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '13