In [120]:
import numpy as np
import subprocess as sp

from openeye.oechem import *
from openforcefield.typing.engines.smirnoff import *
from openforcefield.utils import mergeStructure
import parmed as pmd

David Mobley suggested:
> *However, I think I CAN offer one more piece of good news*: If it’s just a problem with the solvent (which I expect it is, at least in terms of the “too much space” issue) then you should be able to bypass it entirely by using what could be called a “mixed forcefield” system: Just parameterize your water as normal, and your host-guest part using SMIRNOFF, and join them up. (edited)

# Load in a solvated system to ParmEd

In [3]:
topology = pmd.load_file('generated/solvated_conect.pdb')

# Split the topology into pieces

In [52]:
components = topology.split()

Poking around with the `components` object, I can see that the host is listed first, then the guest, then the ions (Na and Cl, not sure of the ordering though), and then the water molecules.

In [68]:
topologies = pmd.Structure()
numbers = []
current_names = []

for c in components[0:2]:
    topologies += c[0]
    numbers.append( c[1] )    
    current_names.append( c[0].residues[0].name )

In [119]:
print(topologies, numbers, current_names)

<Structure 161 atoms; 8 residues; 168 bonds; NOT parametrized> [{0}, {1}] ['MGO', 'BEN']


# Parameterize the host and guest with SMIRNOFF99Frosst

In [61]:
def load_mol2(filename, name=None, add_tripos=True):
    ifs = oemolistream()
    molecules = []
    if not ifs.open(filename):
        print(f'Unable to open {filename} for reading...')
    for mol in ifs.GetOEMols():
        if add_tripos:
            OETriposAtomNames(mol)
        if name:
            mol.SetTitle(name)
        # Add all the molecules in this file to a list, but only return the first one.
        molecules.append(OEMol(mol))
    return molecules[0]

In [62]:
host = load_mol2('original/bcd-sybyl.mol2', name='MGO', add_tripos=True)
guest = load_mol2('original/ben-sybyl.mol2', name='BEN', add_tripos=False)

In [63]:
molecules = [host, guest]

In [72]:
ff = ForceField('forcefield/smirnoff99Frosst.ffxml') 
system = ff.createSystem(topologies.topology, molecules, 
                         nonbondedCutoff=1.1*unit.nanometer, 
                         ewaldErrorTolerance=1e-4
                         )

In [94]:
hg_structure = pmd.openmm.topsystem.load_topology(topologies.topology, system, topologies.positions)

# Build the water and ions separately using AMBER tools

In [108]:
merged = mergeStructure(hg_structure, water_structure)

In [110]:
def fix_bonds(structure):
    bond_type = pmd.BondType(1.0, 1.0, list=structure.bond_types)
    structure.bond_types.append(bond_type)
    for bond in structure.bonds:
        if bond.type is None:
            bond.type = bond_type

In [111]:
fix_bonds(merged)

In [112]:
merged.save('tmp.prmtop')

I should note that building the water parameters using `openmmtools` still presents a problem when saving with ParmEd, so I will do this using `cpptraj` and `tleap`.

In [124]:
def extract_water_and_ions(amber_prmtop, amber_inpcrd, host_residue, guest_residue, 
                           output_pdb, path='./'):
    """
    Create a PDB file containing just the water and ions.
    This is not very robust, please manually check the `cpptraj` output.
    `cpptraj` must be in your PATH.
    Parameters
    ----------
    amber_prmtop : str
        Existing solvated structure parameters from e.g., Mobley's Benchmark Sets repository
    amber_inpcrd : str
        Existing solvated structure coordinates
    host_residue : str
        Residue name of the host molecule (to be stripped)
    guest_residue : str
        Residue name of the guest molecule (to be stripped)
    output_pdb : str
        Output PDB file name
    path : str
        Directory for input and output files
    """
    
    cpptraj = \
    f'''
parm {amber_prmtop}
trajin {amber_inpcrd}
strip {host_residue}
strip {guest_residue}
trajout {output_pdb}
    '''
    cpptraj_input = output_pdb + '.in'
    cpptraj_output = output_pdb + '.out'

    with open(path + cpptraj_input, 'w') as file:
        file.write(cpptraj)
    with open(path + cpptraj_output, 'w') as file:
        p = sp.Popen(['cpptraj', '-i', cpptraj_input], cwd=path,
                     stdout=file, stderr=file)
        output, error = p.communicate()
    if p.returncode == 0:
        print('Water and ion PDB file written by cpptraj.')
    elif p.returncode == 1:
        print('Error returned by cpptraj.')
        print(f'Output: {output}')
        print(f'Error: {error}')
    else:
        print(f'Output: {output}')
        print(f'Error: {error}')
        
def create_water_and_ions_parameters(input_pdb, output_prmtop, output_inpcrd, 
                                     water_model='tip3p', ion_model ='ionsjc_tip3p',
                                    path='./'):
    """
    Create AMBER coordinates and parameters for just the water and ions.
    This is not very robust, please manually check the `tleap` output.
    `tleap` must be in your PATH.
    Parameters
    ----------
    input_pdb : str
        PDB structure containing everything except the host and guest
    output_prmtop : str
        AMBER parameters for the water and ions
    output_inpcrd : str
        AMBER coordinates for the water and ions
    water_model : str
        Water model, must match AMBER `leaprc.water` and `frcmod`files
    ion_model : str
        Ion model, must match AMBER `leaprc.water` and `frcmod`files
    path : str
        Directory for input and output files
    """
    
    tleap = \
    f'''
source leaprc.protein.ff14sb
source leaprc.water.{water_model}
source leaprc.gaff
loadamberparams frcmod.{water_model}
loadamberparams frcmod.{ion_model}
mol = loadpdb {input_pdb}
saveamberparm mol {output_prmtop} {output_inpcrd}
quit
    '''
    tleap_input = output_prmtop + '.in'
    tleap_output = output_prmtop + '.out'

    with open(path + tleap_input, 'w') as file:
        file.write(tleap)
    with open(path + tleap_output, 'w') as file:
        p = sp.Popen(['tleap', '-f', tleap_input], cwd=path,
                     stdout=file, stderr=file)
        output, error = p.communicate()
    if p.returncode == 0:
        print('Water and ion parameters and coordinates written by tleap.')
    elif p.returncode == 1:
        print('Error returned by tleap.')
        print(f'Output: {output}')
        print(f'Error: {error}')
    else:
        print(f'Output: {output}')
        print(f'Error: {error}')

In [130]:
extract_water_and_ions('../original/solvated.prmtop', '../original/solvated.inpcrd', ':MGO', ':BEN', 
                           'water_ions.pdb', 'generated/')

Water and ion PDB file written by cpptraj.


In [131]:
create_water_and_ions_parameters('water_ions.pdb', 'water_ions.prmtop', 'water_ions.inpcrd', 
                                     water_model='tip3p', ion_model ='ionsjc_tip3p',
                                    path='generated/')

Water and ion parameters and coordinates written by tleap.


In [132]:
water = pmd.amber.AmberParm('generated/water_ions.prmtop', xyz='generated/water_ions.inpcrd')

In [133]:
merged = mergeStructure(hg_structure, water)

In [139]:
merged.save('generated/solvated_smirnoff.prmtop')

In [140]:
merged.save('generated/solvated_smirnoff.inpcrd')

I think we *might* be in business.

In [179]:
def check_amber_crash(topology, coordinates, input_file, output_file, restart_file, executable, path):
    """
    Quickly checks to see if an AMBER minimization can succeed.    
    Parameters
    ----------
    topology : PDBFixer topology (probably others work, as well)
        Simulation topology
    coordinates : PDBFixer coordinates (others will work)
        Coordinates of the atoms in the topology
    output_file : str
        Output of the minimization
    restart_file : str
        Simulation restart final
    executable : str
        AMBER executable (sander is safe), must be in path
    path : str
        Directory of files
    """

    minimization = \
'''
&cntrl
    imin   = 1,
    maxcyc = 500,
    ncyc   = 250,
    ntb    = 1,
    igb    = 0,
    cut    = 12
    ntxo   = 1
/     
'''
    with open(path + input_file, 'w') as file:
        file.write(minimization)
    print(f"{executable} -O -p {topology} -c {coordinates} -i {input_file}"
          f" -o {output_file} -r tmp.rst")

    sp.call(f"{executable} -O -p {topology} -c {coordinates} -i {input_file}"
            f" -o {output_file} -r tmp.rst", cwd=path, shell=True)
    
    # convert_binary_to_ascii_restart(topology, 'tmp.rst', restart_file, path)
    
def convert_binary_to_ascii_restart(topology, restart_file, ascii_file, path):
    print(f"cpptraj -p {topology} -y {restart_file} -x {ascii_file}")
    sp.call(f"cpptraj -p {topology} -y {restart_file} -x {ascii_file}", cwd=path, shell=True)
    

In [172]:
check_amber_crash('solvated_smirnoff.prmtop', 'solvated_smirnoff.inpcrd', 'solvated_smirnoff.in', 
               'solvated_smirnoff.out', 'solvated_smirnoff.rst', 'sander', 'generated/')

sander -O -p solvated_smirnoff.prmtop -c solvated_smirnoff.inpcrd -i solvated_smirnoff.in -o solvated_smirnoff.out -r solvated_smirnoff.rst


NameError: name 'ascill_file' is not defined

There is a problem getting box information in the `inpcrd` and it does not appear like there is a (90, 90, 90) missing at the bottom. In fact, the rows of coordinates seem to fill up the entire block

In [180]:
check_amber_crash('solvated_smirnoff.prmtop', '../original/solvated.inpcrd', 'solvated_smirnoff.in', 
               'solvated_smirnoff.out', 'solvated_smirnoff.rst7', 'sander', 'generated/')

sander -O -p solvated_smirnoff.prmtop -c ../original/solvated.inpcrd -i solvated_smirnoff.in -o solvated_smirnoff.out -r tmp.rst
cpptraj -p solvated_smirnoff.prmtop -y tmp.rst -x solvated_smirnoff.rst7


In [181]:
import nglview as nv
import mdtraj
from IPython import display

trajectory = mdtraj.load('generated/solvated_smirnoff.rst7', 
                         top='generated/solvated_smirnoff.prmtop')
view = nv.show_mdtraj(trajectory, gui=True)
view

A Jupyter Widget

A Jupyter Widget