# How to build an AMBER-compatible host-guest complex using the SMIRNOFF force field starting with SYBYL-formatted `mol2` files

In [3]:
import parmed as pmd
import subprocess as sp
import numpy as np

from openeye import oechem
from openforcefield.typing.engines.smirnoff import *
from pdbfixer import PDBFixer

## Step 0: generate SYBYL-formatted `mol2` files

Starting with files containing AM1-BCC charges and GAFF v1.7 Lennard-Jones and bonded parameters:

```
antechamber -i bcd.mol2 -fi mol2 -o bcd-sybyl.mol2 -fo mol2 -at sybyl
antechamber -i ben.mol2 -fi mol2 -o ben-sybyl.mol2 -fo mol2 -at sybyl -dr n
(benzoate)
# Disable `acdoctor` to handle carboxylate group
```

In [2]:
path = './bcd-ben/'
bcd = path + 'bcd-sybyl.mol2'
ben = path + 'ben-sybyl.mol2'

## Step 1: load molecules in an `oechem.OEMol`

In [3]:
mols = []
ifs = oechem.oemolistream(bcd)
for mol in ifs.GetOEGraphMols():
    mols.append(oechem.OEMol(mol))

## Step 1.5: since `bcd` is a cyclic molecule, let's write a version without connectivity and then have OpenEye perceive connectivity based on [this issue](https://github.com/openforcefield/openforcefield/issues/66)

In [4]:
ofs = oechem.oemolostream()
ofs.open(path + 'bcd-no-conect.xyz')
flavor = oechem.OEOFlavor_Generic_Default
ofs.SetFlavor(oechem.OEFormat_XYZ, flavor)
oechem.OEWriteXYZFile(ofs, mols[0])
ofs.close()

In [5]:
ifs = oechem.oemolistream(path + 'bcd-no-conect.xyz')
for mol in ifs.GetOEGraphMols():
    oechem.OEDetermineConnectivity(mol)
    oechem.OEPerceiveBondOrders(mol)
    # Replace existing cyclodextrin
    oechem.OETriposAtomNames(mol)
    mols[0] = oechem.OEMol(mol)

In [6]:
ifs = oechem.oemolistream(ben)
for mol in ifs.GetOEGraphMols():
    mols.append(oechem.OEMol(mol))

## Step 2: build reference molecules for water and ions

In [7]:
smiles = ['[Na+]', '[Cl-]', 'O']
for molecule in smiles:
    mol = oechem.OEMol()
    oechem.OESmilesToMol(mol, molecule)
    for atom in mol.GetAtoms():
        atom.SetPartialCharge(atom.GetFormalCharge())
    oechem.OEAddExplicitHydrogens(mol)
    oechem.OETriposAtomNames(mol)
    mols.append(mol)

## Step 3: read in fully solvated system with host, guest, water, and ions

### Step 3.1: create a PDB of the system with CONECT records
This requires using the "original" `mol2` files with GAFF atom names.

In [8]:
solvated_pdb = 'bcd-ben.pdb'
amber_prmtop = 'solvated.prmtop'
pdb_with_conect = 'bcd-ben-conect.pdb'
cpptraj_input = 'bcd-ben-conect.in'

cpptraj = \
'''
parm {}
trajin {}
trajout {} conect
'''.format(amber_prmtop, solvated_pdb, pdb_with_conect)

In [9]:
with open(path + cpptraj_input, 'w') as file:
    file.write(cpptraj)

In [10]:
cpptraj_output = sp.check_output(['cpptraj', '-i', cpptraj_input], cwd=path)

### Step 3.2: prune the CONECT records that correspond specifically to water molecules

In [11]:
first_water = sp.check_output(['grep', '-m 1', 'WAT', pdb_with_conect], cwd=path).decode("utf-8") 

In [12]:
first_water_residue = int(float(first_water.split()[1]))
print('First water residue = {}'.format(first_water_residue))

First water residue = 175


In [13]:
line_of_first_conect_to_delete = sp.check_output(['egrep', '-n', 'CONECT [ ]* {}'.format(str(first_water_residue)), 
                                                  pdb_with_conect], cwd=path).decode("utf-8")

In [14]:
line_to_delete_from = int(float(line_of_first_conect_to_delete.split(':')[0]))
print('Found first water CONECT entry at line = {}'.format(line_to_delete_from))

Found first water CONECT entry at line = 9192


In [15]:
truncated_file = sp.check_output(['awk', 'NR < {}'.format(line_to_delete_from), pdb_with_conect], cwd=path).decode("utf-8")

In [16]:
file = open(path + 'tmp.pdb', 'w')
file.write(truncated_file)
file.write("END")
file.close()
p = sp.check_output(['mv', 'tmp.pdb', '{}'.format(pdb_with_conect)], cwd=path)

In [17]:
pdb = PDBFixer(path + pdb_with_conect)

In [18]:
# residues = [r.name for r in pdb.topology.residues()]
# print(residues)

In [19]:
# names = [a.name for a in pdb.topology.atoms()]
# print(names)

## Step 4: create a ParmEd structure

In [20]:
ff = ForceField('forcefield/smirnoff99Frosst.ffxml', 'forcefield/tip3p.ffxml') 
system = ff.createSystem(pdb.topology, 
                         mols)
integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds)
context = openmm.Context(system, integrator)
context.setPositions(pdb.positions)

## Step 5: save the ParmEd structure as a `.prmtop` file

In [21]:
structure = pmd.openmm.topsystem.load_topology(pdb.topology, system, pdb.positions)

In [22]:
structure.save(path + 'bcd-ben-smirnoff.prmtop')

AttributeError: 'NoneType' object has no attribute 'used'

# Can we build from SMILES?

In [35]:
bcd_smiles = 'C([C@@H]1[C@@H]2[C@@H]([C@H]([C@H](O1)O[C@@H]3[C@H](O[C@@H]([C@@H]([C@H]3O)O)O[C@@H]4[C@H](O[C@@H]([C@@H]([C@H]4O)O)O[C@@H]5[C@H](O[C@@H]([C@@H]([C@H]5O)O)O[C@@H]6[C@H](O[C@@H]([C@@H]([C@H]6O)O)O[C@@H]7[C@H](O[C@@H]([C@@H]([C@H]7O)O)O[C@@H]8[C@H](O[C@H](O2)[C@@H]([C@H]8O)O)CO)CO)CO)CO)CO)CO)O)O)O'

## Now, use OpenEye tools to process the SMILES entirely and do not use a `mol2` file at all, then selectively replace the cyclodextrin molecule in the list of `OEMol`s

In [50]:
mol = oechem.OEMol()
oechem.OESmilesToMol(mol, bcd_smiles)
for atom in mol.GetAtoms():
    atom.SetPartialCharge(atom.GetFormalCharge())
oechem.OEAddExplicitHydrogens(mol)
oechem.OETriposAtomNames(mol)
mols = [mol]

ofs = oechem.oemolostream()
ofs.open(path + 'tmp.pdb')
# flavor = oechem.OEOFlavor_Generic_Default | oechem.OEOFlavor_PDB_Default | oechem.OEOFlavor_PDB_TER
# ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
# oechem.OEWritePDBFile(ofs, mols[0])

# Unlike `WritePDBFile`, `WriteMolecule` should standardize everything (https://docs.eyesopen.com/toolkits/python/oechemtk/molreadwrite.html)
oechem.OEWriteMolecule(ofs, mols[0])
ofs.close()

pdb = PDBFixer(path + 'tmp.pdb')

In [51]:
ff = ForceField('forcefield/smirnoff99Frosst.ffxml') 
system = ff.createSystem(pdb.topology, 
                         mols)
integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds)
context = openmm.Context(system, integrator)
# context.setPositions(pdb.positions)

In [52]:
structure = pmd.openmm.topsystem.load_topology(pdb.topology, system, pdb.positions)

In [53]:
structure.save(path + 'bcd-from-smiles.prmtop')

In [54]:
prmtop = pmd.load_file(path + 'bcd-from-smiles.prmtop')

In [58]:
pmd.tools.writeFrcmod(prmtop, path + 'bcd-from-smiles.frcmod').execute()

# Okay, load cyclodextrin and benzoate into SMILES

In [60]:
benzoate_smiles = '[O-]C(=O)c1ccccc1'

In [61]:
mol = oechem.OEMol()
oechem.OESmilesToMol(mol, benzoate_smiles)
for atom in mol.GetAtoms():
    atom.SetPartialCharge(atom.GetFormalCharge())
oechem.OEAddExplicitHydrogens(mol)
oechem.OETriposAtomNames(mol)
mols = [mol]

ofs = oechem.oemolostream()
ofs.open(path + 'benzoate-from-smiles.pdb')
# flavor = oechem.OEOFlavor_Generic_Default | oechem.OEOFlavor_PDB_Default | oechem.OEOFlavor_PDB_TER
# ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
# oechem.OEWritePDBFile(ofs, mols[0])

# Unlike `WritePDBFile`, `WriteMolecule` should standardize everything (https://docs.eyesopen.com/toolkits/python/oechemtk/molreadwrite.html)
oechem.OEWriteMolecule(ofs, mols[0])
ofs.close()

pdb = PDBFixer(path + 'benzoate-from-smiles.pdb')

In [62]:
pdb.topology

<Topology; 1 chains, 1 residues, 14 atoms, 14 bonds>

In [63]:
ff = ForceField('forcefield/smirnoff99Frosst.ffxml') 
system = ff.createSystem(pdb.topology, 
                         mols)
integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds)
context = openmm.Context(system, integrator)
# context.setPositions(pdb.positions)

In [64]:
structure = pmd.openmm.topsystem.load_topology(pdb.topology, system, pdb.positions)

In [65]:
structure.save(path + 'ben-from-smiles.prmtop')

In [66]:
# Need to add charges, though, I expect...

In [67]:
prmtop = pmd.load_file(path + 'ben-from-smiles.prmtop')
pmd.tools.writeFrcmod(prmtop, path + 'ben-from-smiles.frcmod').execute()

In [68]:
prmtop.

'CHARGE'

# Load \beta-cyclodextrin and benzoate SMILES, convert to `OEMol`, add charges, then build...

In [1]:
ben_smiles = '[O-]C(=O)c1ccccc1'
bcd_smiles = 'C([C@@H]1[C@@H]2[C@@H]([C@H]([C@H](O1)O[C@@H]3[C@H](O[C@@H]([C@@H]([C@H]3O)O)O[C@@H]4[C@H](O[C@@H]([C@@H]([C@H]4O)O)O[C@@H]5[C@H](O[C@@H]([C@@H]([C@H]5O)O)O[C@@H]6[C@H](O[C@@H]([C@@H]([C@H]6O)O)O[C@@H]7[C@H](O[C@@H]([C@@H]([C@H]7O)O)O[C@@H]8[C@H](O[C@H](O2)[C@@H]([C@H]8O)O)CO)CO)CO)CO)CO)CO)O)O)O'

In [4]:
mols = []
for smiles in [ben_smiles, bcd_smiles]:
    mol = oechem.OEMol()
    oechem.OESmilesToMol(mol, smiles)
    for atom in mol.GetAtoms():
        atom.SetPartialCharge(atom.GetFormalCharge())
    oechem.OEAddExplicitHydrogens(mol)
    oechem.OETriposAtomNames(mol)
    mols.append(mol)

In [10]:
    for mol in ifs.GetOEMols():
        if omega(mol):
            oequacpac.OEAssignCharges(mol, oequacpac.OEAM1BCCELF10Charges())
            conf = mol.GetConf(oechem.OEHasConfIdx(0))
            absFCharge = 0
            sumFCharge = 0
            sumPCharge = 0.0
            for atm in mol.GetAtoms():
                sumFCharge += atm.GetFormalCharge()
                absFCharge += abs(atm.GetFormalCharge())
                sumPCharge += atm.GetPartialCharge()
            print("{}: {} formal charges give total charge {}"
                  "; sum of partial charges {:5.4f}".format(mol.GetTitle(), absFCharge,
                                                            sumFCharge, sumPCharge))
            oechem.OEWriteMolecule(ofs, conf)


0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [None]:
atom.