In [None]:
!wget https://raw.githubusercontent.com/openforcefield/2023-workshop-vignettes/main/colab_setup.ipynb
%run colab_setup.ipynb

In [None]:
%env LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/

# Making a box of mixed solvents


In [1]:
import time

import mdtraj
import nglview
import openmm
import openmm.app
import openmm.unit
from openff.interchange import Interchange
from openff.interchange.components._packmol import pack_box
from openff.interchange.interop.openmm import to_openmm_positions
from openff.toolkit import ForceField, Molecule, Topology
from openff.units import unit
from rich.pretty import pprint



## Construct the topology

In this example we'll construct a topology consisting of one ligand in a cubic box of length 4 nanometers. For simplicity, we will use a prepared PDB file  (`solvated.pdb`) with the same number of waters, molecule and atom ordering, etc. We'll also use _mapped_ SMILES when creating `Molecule` objects to ensure the atom ordering matches. (Atom ordering is not strictly a part of SMILES and therefore liable to be changed with updates to RDKit.)

In [2]:
ligand = Molecule.from_mapped_smiles(
    "[H:7][C@:6]1([C:13](=[C:11]([C:9](=[O:10])[O:8]1)[O:12][H:19])[O:14][H:20])[C@:3]([H:4])([C:2]([H:16])([H:17])[O:1][H:15])[O:5][H:18]"
)
dmso = Molecule.from_smiles("CS(=O)C")
ethanol = Molecule.from_smiles("CCO")

There are a few ways to convert the information in this trajectory to an Openff [`Topology`](https://docs.openforcefield.org/projects/toolkit/en/stable/api/generated/openff.toolkit.topology.Topology.html#openff.toolkit.topology.Topology) object. Since we already know how many of which molecules we want, we'll use a PACKMOL wrapper shipped with Interchange. The `Topology` object returned by `pack_box` contains the ligand, 2100 copies of water, the box vectors we asked for (plus some padding), and the positions generated by PACKMOL.

In [3]:
topology = pack_box(
    molecules=[ethanol, dmso, ligand],
    number_of_copies=[100, 40, 1],
    box_size=unit.Quantity([2, 2, 2], unit.nanometer),
)
topology.n_molecules, topology.box_vectors, topology.get_positions().shape

(141,
 array([[2.2, 0. , 0. ],
        [0. , 2.2, 0. ],
        [0. , 0. , 2.2]], dtype=float32) <Unit('nanometer')>,
 (1320, 3))

The ["Sage"](https://openforcefield.org/community/news/general/sage2.0.0-release/) force field line (version 2.x.x) includes TIP3P  parameters for water, so we don't need to use multiple force fields to parametrize this topology. (One could use a different water model provided they accept the risks of using a different one than the force field was optimized with.)

Note that the "Parsley" (version 1.x.x) line did *not* include TIP3P parameters, so loading in an extra force field was required.

In [4]:
sage = ForceField("openff_unconstrained-2.1.0.offxml")

From here, we can create an ``Interchange`` object, which stores the results of applying the force field to the topology. Since the `Topology` object contained positions and box vectors, we don't need to set them again - they're already set on the `Interchange` object!

In [5]:
interchange: Interchange = Interchange.from_smirnoff(
    force_field=sage, topology=topology
)
interchange.topology.n_atoms, interchange.box, interchange.positions.shape

(1320,
 array([[2.20000005, 0.        , 0.        ],
        [0.        , 2.20000005, 0.        ],
        [0.        , 0.        , 2.20000005]]) <Unit('nanometer')>,
 (1320, 3))

Now, we can prepare everything that OpenMM needs to run and report a brief equilibration simulation:
* A [`Simulation`](http://docs.openmm.org/latest/api-python/generated/openmm.app.simulation.Simulation.html#openmm.app.simulation.Simulation) object containing
  * An `openmm.System`
  * A topology in OpenMM's object model (`openmm.app.Topology`)
  * Positions and box vectors in OpenMM's unit solution (`openmm.unit.Quantity`)
* A barostat, since we want to use NPT dynamics to relax the box size toward equilibrium
* An integrator
* Reporters for the trajectory and simulation data

For convenience, let's wrap some boilerplate code into a function that can be called again later with different inputs.

In [6]:
def create_simulation(
    interchange: Interchange,
    dcd_stride: int = 10,
    trajectory_name: str = "trajectory.dcd",
) -> openmm.app.Simulation:
    system = interchange.to_openmm(combine_nonbonded_forces=True)
    topology = interchange.to_openmm_topology()
    positions = to_openmm_positions(interchange, include_virtual_sites=True)

    barostat = openmm.MonteCarloBarostat(
        1.00 * openmm.unit.bar, 293.15 * openmm.unit.kelvin, 25
    )
    system.addForce(barostat)

    integrator = openmm.LangevinIntegrator(
        300 * openmm.unit.kelvin,
        1 / openmm.unit.picosecond,
        1 * openmm.unit.femtoseconds,
    )

    simulation = openmm.app.Simulation(topology, system, integrator)
    simulation.context.setPositions(positions)

    # https://github.com/openmm/openmm/issues/3736#issuecomment-1217250635
    simulation.minimizeEnergy()

    simulation.context.setVelocitiesToTemperature(300 * openmm.unit.kelvin)
    simulation.context.computeVirtualSites()

    dcd_reporter = openmm.app.DCDReporter(trajectory_name, dcd_stride)
    state_data_reporter = openmm.app.StateDataReporter(
        "data.csv", 10, step=True, potentialEnergy=True, temperature=True, density=True
    )
    simulation.reporters.append(dcd_reporter)
    simulation.reporters.append(state_data_reporter)

    return simulation

In [7]:
simulation = create_simulation(interchange)

Finally, we can run this simulation. This should take approximately 10-20 seconds on a laptop or small workstation.

Again, let's wrap this up into a function to avoid copy-pasting code.

In [8]:
def run_simulation(simulation: openmm.app.Simulation, n_steps: int = 5000):
    print("Starting simulation")
    start_time = time.process_time()

    print("Step, box lengths (nm)")

    for step in range(n_steps):
        simulation.step(1)
        if step % 500 == 0:
            box_vectors = simulation.context.getState().getPeriodicBoxVectors()
            print(step, [round(box_vectors[dim][dim]._value, 3) for dim in range(3)])

    end_time = time.process_time()
    print(f"Elapsed time: {(end_time - start_time):.2f} seconds")

In [9]:
run_simulation(simulation)

Starting simulation
Step, box lengths (nm)
0 [2.2, 2.2, 2.2]
500 [2.251, 2.251, 2.251]
1000 [2.282, 2.282, 2.282]
1500 [2.313, 2.313, 2.313]
2000 [2.358, 2.358, 2.358]
2500 [2.386, 2.386, 2.386]
3000 [2.397, 2.397, 2.397]
3500 [2.432, 2.432, 2.432]
4000 [2.433, 2.433, 2.433]
4500 [2.437, 2.437, 2.437]
Elapsed time: 11.71 seconds


## Appendix A: visualizing the trajectory

If [NGLView](http://nglviewer.org/nglview/latest/) is installed, we can use it and MDTraj to load and visualize the PDB trajectory:

In [13]:
# NBVAL_SKIP
trajectory: mdtraj.Trajectory = mdtraj.load(
    "trajectory.dcd", top=mdtraj.Topology.from_openmm(topology.to_openmm())
)
view = nglview.show_mdtraj(trajectory.image_molecules())
view.add_representation("line", selection="water")
view

NGLWidget(max_frame=499)

# Appendix B: In GROMACS and AMBER


In [None]:
!rm  gromacs_input*

interchange.to_gromacs("gromacs_input")
!ls gromacs_input*

In [None]:
!gmx grompp -f inputs/emin.mdp -c gromacs_input.gro -p gromacs_input.top -o em.tpr
!gmx mdrun -v -deffnm em

In [None]:
! gmx grompp -f inputs/npt.mdp -c em.gro -p gromacs_input.top -o npt.tpr --maxwarn 2
! gmx mdrun -deffnm npt

In [None]:
!gmx grompp -f inputs/md.mdp -c npt.gro -t npt.cpt -p gromacs_input.top -o md.tpr
!gmx mdrun -deffnm md

In [4]:
import nglview

trajectory: mdtraj.Trajectory = mdtraj.load(
    "md.xtc", top=mdtraj.Topology.from_openmm(topology.to_openmm())
)

view = nglview.show_mdtraj(trajectory.image_molecules())
view.add_representation("line", selection="water")
# view.add_representation(
#    "hyperball", radiusSize=1, radiusScale=0.5, selection="not protein and not water"
# )
view

NGLWidget(max_frame=1000)