In [161]:
import numpy as np
import subprocess as sp
from openeye.oechem import *

from openforcefield.typing.engines.smirnoff import *
from pdbfixer import PDBFixer

# Load molecules into a list of `OEMol`s

I'm going to start by loading the host and guest SYBYL-formatted `mol2` files into a list of `OEMol`s. I created these SYBYL-formatted files from files containing AM1-BCC charges and GAFF v1.7 Lennard-Jones and bonded parameters:

```
antechamber -i bcd.mol2 -fi mol2 -o bcd-sybyl.mol2 -fo mol2 -at sybyl
antechamber -i ben.mol2 -fi mol2 -o ben-sybyl.mol2 -fo mol2 -at sybyl -dr n 
# Disable `acdoctor` to handle carboxylate group
```

I think it should be possible to read in GAFF-formatted files directly, using a specific forcefield flavor, but I don't see the relvant flavor listed in their [documentation](https://docs.eyesopen.com/toolkits/python/oechemtk/molreadwrite.html#section-molreadwrite-flavoredinputandoutput). A search for "GAFF" comes up empty (except for one blog post). If I try to load the GAFF files without any special flavor, I get many things with atom type "Du," which I surmise to be a dummy atom type. I believe this can be remedied by running the files through `OETriposAtomNames`, but it is another thing that could go wrong, so for these reasons, I believe it is more straight-forward to simply start with standard atom names.

In [219]:
def load_mol2(filename, name=None, add_tripos=True):
    ifs = oemolistream()
    molecules = []
    if not ifs.open(filename):
        print(f'Unable to open {filename} for reading...')
    for mol in ifs.GetOEMols():
        if add_tripos:
            OETriposAtomNames(mol)
        if name:
            mol.SetTitle(name)
        # Add all the molecules in this file to a list, but only return the first one.
        molecules.append(OEMol(mol))
    return molecules[0]

Even though we have properly formatted files, with unique atom names in each residue (there are seven residues in this host), if we don't call `OETriposAtomNames`, then we get `Exception: Error: Reference molecule must have unique atom names in order to create a Topology.` when trying to define the `System`.

In [220]:
host = load_mol2('original/bcd-sybyl.mol2', name='MGO', add_tripos=True)

In [221]:
def check_unique_atom_names(molecule):
    atoms = molecule.GetMaxAtomIdx()
    atom_names = set()
    for atom in range(atoms):
        atom_names.add(molecule.GetAtom(OEHasAtomIdx(atom)).GetName())
    print(f'{atoms} atoms in structure, {len(atom_names)} unique atom names.')
    assert atoms == len(atom_names)

In [222]:
check_unique_atom_names(host)

147 atoms in structure, 147 unique atom names.


In [223]:
guest = load_mol2('original/ben-sybyl.mol2', name='BEN', add_tripos=False)

In [224]:
molecules = [host, guest]

Now, I'm going to add water and ions to the molecules list.

In [225]:
def add_waters_and_ions():
    smiles = ['[Na+]', '[Cl-]', 'O']
    molecules = []
    for molecule in smiles:
        molecules.append(process_smiles(molecule))
    return molecules

def process_smiles(string, name=None, add_hydrogens=True, add_tripos=True, charge=True):
    mol = OEMol()
    OESmilesToMol(mol, string)
    if add_hydrogens:
        OEAddExplicitHydrogens(mol)
    if add_tripos:
        OETriposAtomNames(mol)
    if charge:
        for atom in mol.GetAtoms():
            atom.SetPartialCharge(atom.GetFormalCharge())
    if name:
        mol.SetTitle(name)
    return mol

In [226]:
water_and_ions = add_waters_and_ions()

In [227]:
molecules += water_and_ions

At this point, the molecules have been converted to `OEMol`s and they have charges, but the coordinates are not set. We could use a docking procedure like [this](https://github.com/openforcefield/openforcefield/blob/master/examples/host_guest_simulation/smirnoff_host_guest.ipynb) to set the position of the host and guest, or we could take the coordinates from an existing fully solvated system. I'm going to attempt the latter approach, because in general, we are not going to want to use a docking calculation to provide the coordinates for intermediate attach-pull-relase windows, where the host and guest are not  closely bound to each other.

# Read in (existing) coordinates to set the positions of the `OEMol`s

In [197]:
def create_pdb_with_conect(solvated_pdb, amber_prmtop, output_pdb, path='./'):
    """
    Create a PDB file containing CONECT records.
    This is not very robust, please manually check the `cpptraj` output.
    `cpptraj` must be in your PATH.
    Parameters
    ----------
    solvated_pdb : str
        Existing solvated structure from e.g., Mobley's Benchmark Sets repository
    amber_prmtop : str
        AMBER (or other) parameters for the residues in the solvated PDB file
    output_pdb : str
        Output PDB file name
    path : str
        Directory for input and output files
    """
    cpptraj = \
    f'''
    parm {amber_prmtop}
    trajin {solvated_pdb}
    trajout {output_pdb} conect
    '''

    cpptraj_input = output_pdb + '.in'
    cpptraj_output = output_pdb + '.out'

    with open(path + cpptraj_input, 'w') as file:
        file.write(cpptraj)
    with open(path + cpptraj_output, 'w') as file:
        p = sp.Popen(['cpptraj', '-i', cpptraj_input], cwd=path,
                     stdout=file, stderr=file)
        output, error = p.communicate()
    if p.returncode == 0:
        print('PDB file written by cpptraj.')
    elif p.returncode == 1:
        print('Error returned by cpptraj.')
        print(f'Output: {output}')
        print(f'Error: {error}')
    else:
        print(f'Output: {output}')
        print(f'Error: {error}')


def prune_conect(input_pdb, output_pdb, path='./'):
    """
    Delete CONECT records that correspond only to water molecules.
    This is necessary to be standards-compliant.
    This is not very robust.
    Parameters
    ----------
    input_pdb : str
        Input PDB file name
    output_pdb : str
        Output PDB file name
    path : str
        Directory for input and output files
    """
    p = sp.Popen(['grep', '-m 1', 'WAT', input_pdb], cwd=path, stdout=sp.PIPE)
    for line in p.stdout:
        first_water_residue = int(float(line.decode("utf-8").split()[1]))
        print(f'First water residue = {first_water_residue}')

    p = sp.Popen(['egrep', '-n', f'CONECT [ ]* {first_water_residue}', input_pdb],
                 cwd=path, stdout=sp.PIPE)
    for line in p.stdout:
        line_to_delete_from = int(float(line.decode("utf-8").split(':')[0]))
        print(f'Found first water CONECT entry at line = {line_to_delete_from}')

    with open(path + output_pdb, 'w') as file:
        sp.Popen(
         ['awk', f'NR < {line_to_delete_from}', input_pdb], cwd=path, stdout=file)

        sp.Popen(['echo', 'END'], cwd=path, stdout=file)

In [198]:
create_pdb_with_conect('original/solvated.inpcrd', 'original/solvated.prmtop', 'generated/solvated.pdb')

PDB file written by cpptraj.


In [199]:
prune_conect('generated/solvated.pdb', 'generated/solvated_conect.pdb')

First water residue = 175
Found first water CONECT entry at line = 9192


In [200]:
fixer = PDBFixer('generated/solvated_conect.pdb')

# Setup the OpenMM system

In [228]:
ff = ForceField('forcefield/smirnoff99Frosst.ffxml', 'forcefield/tip3p.ffxml') 
system = ff.createSystem(fixer.topology, molecules, nonbondedMethod = PME, 
                         nonbondedCutoff = 1.1*unit.nanometer, 
                         ewaldErrorTolerance = 1e-4)

At this point, we should be able to run a simulation with SMIRNOFF99Frosst in OpenMM. Let's do a quick check.

In [229]:
integrator = openmm.VerletIntegrator(2.0*unit.femtoseconds)
simulation = app.Simulation(fixer.topology, system, integrator)
simulation.context.setPositions(fixer.positions) 

state = simulation.context.getState(getEnergy = True)
energy = state.getPotentialEnergy() / unit.kilocalories_per_mole
print("Energy before minimization (kcal/mol): %.2g" % energy)

simulation.minimizeEnergy()
state = simulation.context.getState(getEnergy=True, getPositions=True)
energy = state.getPotentialEnergy() / unit.kilocalories_per_mole
print("Energy after minimization (kcal/mol): %.2g" % energy)

Energy before minimization (kcal/mol): 2.4e+03
Energy after minimization (kcal/mol): -2.2e+03


# Export to AMBER