In [None]:
%load_ext lab_black

# Workflow 0: Installing non-standard dependencies

This example requires installing some packages that are not installed by default.

### Option #1: Installing everything from scratch.

```shell
$ conda env create --file devtools/conda-envs/examples.yaml
$ conda activate openff-examples
```

### Option #2: Install extra packages after a normal OpenFF Toolkit installation

If the OpenFF Toolkit is already installed (`python -c "from openforcefield.topology import Molecule"` runs with no errors), the extra packages can be installed manually.

```shell
$ conda install mdtraj nglview -c conda-forge
```

### Option #2b (optional): Install GROMACS

If GROMACS is installed by other means, or if you don't wish to run the example in OpenMM _and_ GROMACS, this step can be skipped.

```shell
$ conda install gromacs -c conda-forge -c bioconda
```

If you do install GROMACS with `conda`, it is recommended that you re-activate your environment and restart this notebook.

# Workflow 1: Prepare and run a protein-ligand simulation

In [None]:
from simtk.openmm import app, LangevinIntegrator
from simtk import unit
import parmed as pmd
from parmed.openmm import NetCDFReporter
import numpy as np
import mdtraj as mdt
from tempfile import NamedTemporaryFile
import nglview

In [None]:
def find_clashing_water(pmd_struct, lig_resname, distance):
    """
    Find waters that are sterically clashing with a ligand.

    Parameters
    ----------
    pmd_struct : parmed.Structure
        The structure to analyze.
    lig_resname : str
        The up-to-three character residue name.
    distance : float
        The distance cutoff (in nanometers) for clash detection.

    Returns
    -------
    water_resnums : Iterable[int]
        The residue numbers of waters that are clashing with the ligand.

    """
    with NamedTemporaryFile(suffix=".pdb") as tf:
        app.PDBFile.writeFile(
            pmd_struct.topology, pmd_struct.positions, open(tf.name, "w")
        )
        traj = mdt.load(tf.name)
    top = traj.topology
    lig_atom_idxs = top.select(f"resname {lig_resname}")
    lig_res_idx = top.atom(lig_atom_idxs[1]).residue.index
    wat_atom_idxs = top.select("resname HOH and name O")
    wat_res_idxs = [top.atom(i).residue.index for i in wat_atom_idxs]
    potential_contacts = [(lig_res_idx, wat_res_idx) for wat_res_idx in wat_res_idxs]
    contacts = mdt.compute_contacts(
        traj, contacts=potential_contacts, scheme="closest", ignore_nonprotein=False
    )

    # Note that this is 0-indexed, while the parmed structure is
    # 1-indexed, therefore we add 1 before returning
    clash_res_idx = [i[1] + 1 for i in contacts[1][(contacts[0] < 0.15)[0, :]]]
    return clash_res_idx

## Introducing the main cast

https://github.com/MCompChem/fep-benchmark


In [None]:
! rm -rf fep-benchmark
! git clone https://github.com/MCompChem/fep-benchmark.git

### Split off the first ligand in the SDF

In [None]:
! head -n119 fep-benchmark/eg5/ligands.sdf > fep-benchmark/eg5/chembl_1078774.sdf
! cat fep-benchmark/eg5/chembl_1078774.sdf

Magic:
* The ligand coordinates are already superimposable on the protein
* Protein+Ligand already prepared (by the Schrodinger suite, I think)

In [None]:
receptor_path = "fep-benchmark/eg5/3l9h_prepared.pdb"
ligand_path = "fep-benchmark/eg5/chembl_1078774.sdf"

In [None]:
view = nglview.show_file(ligand_path)
view

In [None]:
view = nglview.show_file(receptor_path)
view

# The plan:

* Parameterize the ligand (OFF Toolkit)
* Solvate and parameterize the protein (OpenMM)
* Combine the parameterized ligand and the parameterized protein (ParmEd)
* Remove waters that clash with the ligand (ParmEd/MDAnalysis)
* Simulate the complex (OpenMM)
* Visualize the simulation (nglview)

_Note that there's a new package `OpenMMForceFields` to replace much of this!_
* Home: https://github.com/openmm/openmmforcefields
* `conda install -c conda-forge -c omnia openmmforcefields`
* [Example notebook available](https://github.com/openforcefield/openforcefield/blob/master/examples/swap_amber_parameters/swap_existing_ligand_parameters_with_openmmforcefields.ipynb)

## Parameterize the ligand (OFF Toolkit)

In [None]:
from openforcefield.typing.engines.smirnoff import ForceField
from openforcefield.topology import Molecule, Topology

_(The OpenEye loading warning is expected -- The toolkit is informing us that OETK is unavailable, so it will use RDKit and AmberTools instead)_

In [None]:
ligand = Molecule.from_file(ligand_path)

ligand_positions = ligand.conformers[0]
off_forcefield = ForceField("openff_unconstrained-1.2.0.offxml")

ligand_topology = ligand.to_topology()
ligand_system = off_forcefield.create_openmm_system(ligand_topology)

_(takes ~100 seconds)_
### This is the only block in the first workflow that uses the Open Force Field Toolkit
* Design goal: **Just work**
    * Validate early and fail as quickly as possible if inputs have problems
    * Calculate charges with no user intervention -- Using RDKit+AmberTools right now, OpenEye is faster

## Solvate and parameterize the protein (OpenMM)

In [None]:
omm_forcefield = app.ForceField("amber99sb.xml", "tip3p.xml")
pdb = app.PDBFile(receptor_path)
modeller = app.Modeller(pdb.topology, pdb.positions)
modeller.addSolvent(
    omm_forcefield,
    model="tip3p",
    padding=4.0 * unit.angstrom,
    ionicStrength=0.15 * unit.molar,
)
protein_system = omm_forcefield.createSystem(
    modeller.topology, nonbondedMethod=app.PME, rigidWater=False
)

* Magic: 
    * The protein was already prepared
    * AMBER-compatible residue names
    * `rigidWater=False` is necessary at this step due to design differences between OFF/OMM and ParmEd

* Look out! Confusion between OpenMM and OpenFF `Topology` and `ForceField`

## Combine the parameterized ligand and the parameterized protein (ParmEd)

In [None]:
pmd_receptor_struct = pmd.openmm.load_topology(
    modeller.topology, protein_system, modeller.positions
)
pmd_ligand_struct = pmd.openmm.load_topology(
    ligand_topology.to_openmm(), ligand_system, ligand_positions
)

pmd_complex_struct = pmd_receptor_struct + pmd_ligand_struct

# Assign periodic box vectors from the solvated receptor structure
pmd_complex_struct.box_vectors = modeller.topology.getPeriodicBoxVectors()

## Visualize combined system

In [None]:
view = nglview.show_parmed(pmd_complex_struct)
view.add_licorice(selection="(not protein)")
view.add_surface(selection=":.NA or :.CL")

view

* Note waters clashing with ligand, since protein was solvated alone

## Remove waters that clash with the ligand (ParmEd/MDAnalysis)

* Magic:
    * Uses function at top of file to find clashes

In [None]:
clashes = find_clashing_water(pmd_complex_struct, "CHE", 0.15)

if len(clashes) != 0:
    clash_residues_str = ",".join([str(i) for i in clashes])
    print(f"Removing ligand-clashing water residues {clash_residues_str}")
    pmd_complex_struct.strip(f":{clash_residues_str}")
else:
    print("No ligand-water clashes to resolve")

view = nglview.show_parmed(pmd_complex_struct)

view.add_licorice(selection="(not protein)")
view.add_surface(selection=":.NA or :.CL")
view

## Convert the combined system from ParmEd back to OpenMM

In [None]:
system = pmd_complex_struct.createSystem(
    nonbondedMethod=app.PME,
    nonbondedCutoff=9 * unit.angstrom,
    constraints=app.HBonds,
    rigidWater=True,
)
integrator = LangevinIntegrator(
    300 * unit.kelvin, 1 / unit.picosecond, 0.002 * unit.picoseconds
)

simulation = app.Simulation(pmd_complex_struct.topology, system, integrator)

# The box is about 75 angstroms per side, so add (30, 30, 30) to center the protein
simulation.context.setPositions(
    pmd_complex_struct.positions + np.array([30, 30, 30]) * unit.angstrom
)

nc_reporter = NetCDFReporter("trajectory.nc", 10)
simulation.reporters.append(nc_reporter)

## Simulate the complex (OpenMM)
### Minimize the combined system
_(Takes 110 seconds)_

In [None]:
simulation.minimizeEnergy()
minimized_coords = simulation.context.getState(getPositions=True).getPositions()

### Run a short simulation
_(Takes 85 seconds, largely due to trajectory writing frequency)_

In [None]:
simulation.context.setVelocitiesToTemperature(300 * unit.kelvin)
simulation.step(1000)

## While we wait, a few asides...

### Force Fields
* Reproducibility - User *must* see the name of what they're using
* Conda data packages - "Plugin" support for additional force fields (anybody can add!)
* Evolving together - Toolkit will support all functional forms in [OpenForceFields repo](https://github.com/openforcefield/openforcefields/)

<img src="img/openforcefields.png" alt="drawing" width="800"/>

<hr/>
    
### Charge generation
* Released FFs only use AM1-BCC, though different semiempirical methods and charge corrections are now available
* "Graph based" charges are coming in the near future -- Consistency and speed!
* Library charge support is available
    
    
<img src="img/xkcd_charge.png" alt="drawing" width="400"/>

<hr/>

### Current cheminformatics toolkit differences
* File formats
* Slight differences in partial charge
* Speed
* SMILES canonicalization
* Behavior stability
* Stereochemistry definition (Edge cases)

## Visualize the simulation (nglview)

In [None]:
app.PDBFile.writeFile(
    pmd_complex_struct.topology, pmd_complex_struct.positions, open("system.pdb", "w")
)
mdt_traj = mdt.load("trajectory.nc", top="system.pdb")
print(mdt_traj)
import nglview

view = nglview.show_mdtraj(mdt_traj)
view

# What about GROMACS?
_Thanks, Dennis Della Corte!_

In [None]:
pmd_complex_struct.coordinates = minimized_coords

# Export GROMACS files.
pmd_complex_struct.save("system.top", overwrite=True)
pmd_complex_struct.save("system.gro", overwrite=True)

In [None]:
! gmx grompp -f minim.mdp -c system.gro -p system.top -o em.tpr -maxwarn 1
! gmx mdrun -deffnm em

! gmx grompp -f nvt.mdp -c em.gro -r em.gro -p system.top -o nvt.tpr -maxwarn 1
! gmx mdrun -deffnm nvt

! gmx grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p system.top -o npt.tpr -maxwarn 1
! gmx mdrun -deffnm npt

! gmx grompp -f md.mdp -c npt.gro -t npt.cpt -p system.top -o md_0_1.tpr -maxwarn 1
! gmx mdrun -deffnm md_0_1

* Magic:
    * MDP files already prepared
    * `maxwarn 1` becuase of rounding errors with charges
* ParmEd *is* great, but *isn't* perfect, and we're actively working on bugfixes.
* We have philosophical differences about what constitutes "parameterization" 
    * Hbond constraints?
    * Electrostatics cutoffs?
* ParmEd is unable to process several OpenMM GBSA models

_(Takes 120 seconds)_


In [None]:
mdt_traj = mdt.load("md_0_1.xtc", top="system.gro", stride=1000000)
print(mdt_traj)

In [None]:
import nglview

view = nglview.show_mdtraj(mdt_traj)
view

### Workflow 1 Conclusions
* Toolkit parameterization requires *8 lines*, three of which are cheap hacks 
* Conda-installable, open source tools performed everything from basic system prep to simulation and visualization
* Using OpenMM, we never had to leave Python
* Using ParmEd, there was little additional work to running with GROMACS


<img src="img/dog_food.jpg" alt="drawing" width="350"/>


## Workflow 2: Changing force field parameters and energy-minimizing the resulting molecule


### Note the recent change to the SMIRNOFF 0.3 specification

```
<Angles version="0.3" potential="harmonic">
		<Angle smirks="[*:1]~[#6X4:2]-[*:3]" angle="109.5*degree" k="100.0*mole**-1*radian**-2*kilocalorie"/>
		<Angle smirks="[#1:1]-[#6X4:2]-[#1:3]" angle="109.5*degree" k="70.0*mole**-1*radian**-2*kilocalorie"/>
</Angles>
```
<hr/>

### Getting started

Let's reload the ligand, in case the live demo had a hiccup above.

Magic:
* To avoid spending time running AM1-BCC again, I'm providing explicitly-defined charges

In [None]:
from openforcefield.topology import Molecule, Topology
from openforcefield.typing.engines.smirnoff import ForceField

ligand_path = "fep-benchmark/eg5/chembl_1078774.sdf"
ligand = Molecule.from_file(ligand_path)
ligand.partial_charges = (
    np.array(
        [
            -0.085767,
            -0.0027,
            -0.085767,
            -0.085767,
            -0.1043,
            -0.092,
            -0.174,
            0.1506,
            -0.1383,
            -0.073,
            0.2004,
            -0.4076,
            0.1254,
            -0.1114,
            -0.0684,
            -0.1077,
            0.2508,
            -0.1043,
            -0.092,
            -0.138,
            0.1021,
            -0.4871,
            0.0369,
            -0.1449,
            -0.124,
            -0.7206,
            0.036144,
            0.036144,
            0.036144,
            0.036144,
            0.036144,
            0.036144,
            0.036144,
            0.036144,
            0.036144,
            0.131,
            0.135,
            0.138,
            0.0437,
            0.0442,
            0.0442,
            0.0497,
            0.0497,
            0.0497,
            0.0497,
            0.0567,
            0.0837,
            0.147,
            0.159,
            0.432,
            0.15,
            0.3978,
        ]
    )
    * unit.elementary_charge
)
ligand_positions = ligand.conformers[0]
ligand_topology = ligand.to_topology()

First, we use `ForceField.label_molecules` to identify which torsion parameters were assigned to the hydroxyl.

In [None]:
openff_forcefield = ForceField("openff-1.2.0.offxml")
ff_applied_parameters = openff_forcefield.label_molecules(ligand_topology)[0]
for atoms, parameter in ff_applied_parameters["ProperTorsions"].items():
    ele_1 = ligand.atoms[atoms[0]].element.symbol
    ele_2 = ligand.atoms[atoms[1]].element.symbol
    ele_3 = ligand.atoms[atoms[2]].element.symbol
    ele_4 = ligand.atoms[atoms[3]].element.symbol
    if (ele_1 == "H" and ele_2 == "O") or (ele_3 == "O" and ele_4 == "H"):
        print(atoms, parameter)

In [None]:
hydroxyl_torsion = openff_forcefield.get_parameter_handler("ProperTorsions").parameters[
    "[*:1]~[#6X3:2]-[#8X2:3]-[#1:4]"
]
hydroxyl_torsion.periodicity1 = 2
hydroxyl_torsion.phase1 = 180 * unit.degree
hydroxyl_torsion.k1 = -10 * unit.kilocalorie / unit.mole

## Define a function to take an OpenFF molecule and ForceField and visualize the minimized structure

In [None]:
def minimize_and_visualize(molecule, forcefield):
    mol_topology = molecule.to_topology()
    mol_system = openff_forcefield.create_openmm_system(
        mol_topology, charge_from_molecules=[molecule]
    )

    integrator = LangevinIntegrator(
        300 * unit.kelvin, 1 / unit.picosecond, 0.002 * unit.picoseconds
    )
    simulation = app.Simulation(mol_topology.to_openmm(), mol_system, integrator)
    simulation.context.setPositions(molecule.conformers[0])
    simulation.minimizeEnergy()
    with NamedTemporaryFile(suffix=".pdb") as tf:
        app.PDBFile.writeModel(
            simulation.topology,
            simulation.context.getState(getPositions=True).getPositions(),
            open(tf.name, "w"),
        )
        view = nglview.show_file(tf.name)
    return view

In [None]:
view = minimize_and_visualize(ligand, openff_forcefield)
view

## But we didn't need the OFF toolkit to change the parameters for a _single term_
## So, how about changing FF parameters for all H-X-H angles?

In [None]:
ff_applied_parameters = openff_forcefield.label_molecules(ligand_topology)[0]
for atoms, parameter in ff_applied_parameters["Angles"].items():
    ele_1 = ligand.atoms[atoms[0]].element.symbol
    ele_2 = ligand.atoms[atoms[1]].element.symbol
    ele_3 = ligand.atoms[atoms[2]].element.symbol
    if ele_1 == "H" and ele_3 == "H":
        print(atoms, parameter)

In [None]:
hxh_angle = openff_forcefield.get_parameter_handler("Angles").parameters[
    "[#1:1]-[#6X4:2]-[#1:3]"
]
hxh_angle.angle = 50 * unit.degree

view = minimize_and_visualize(ligand, openff_forcefield)
view

![title](img/aperture.jpg)


## Now, let's mess with some torsion parameters
### Load a molecule with more interesting torsion from PDB, supplying complete topological information using SMILES

In [None]:
view = nglview.show_file("CID_15513.pdb")
view

In [None]:
ligand = Molecule.from_smiles('COC(=O)C1=CC=C(C=C1)C(=O)O')

In [None]:
omm_pdbfile = app.PDBFile('CID_15513.pdb')
ligand_topology = Topology.from_openmm(omm_pdbfile.topology, unique_molecules=[ligand])

In [None]:
openff_forcefield = ForceField('openff-1.2.0.offxml')
ligand_system = openff_forcefield.create_openmm_system(ligand_topology)

integrator = LangevinIntegrator(300*unit.kelvin, 1/unit.picosecond, 0.002*unit.picoseconds)
simulation = app.Simulation(ligand_topology.to_openmm(), ligand_system, integrator)
simulation.context.setPositions(omm_pdbfile.positions)
simulation.minimizeEnergy()

lig_struct = pmd.openmm.load_topology(simulation.topology, ligand_system, simulation.context.getState(getPositions=True).getPositions())
with NamedTemporaryFile(suffix='.pdb') as tf:
    app.PDBFile.writeModel(simulation.topology, simulation.context.getState(getPositions=True).getPositions(), open(tf.name, 'w'))
    view = nglview.show_file(tf.name)
view


### Let's make the substituent groups perpendicular to the ring.

In [None]:
torsion_smirkses = set()
ff_term_labels = openff_forcefield.label_molecules(ligand_topology)[0]
for atoms, parameter in ff_term_labels['ProperTorsions'].items():
    ele_1 = ligand.atoms[atoms[0]].element.symbol
    ele_2 = ligand.atoms[atoms[1]].element.symbol
    ele_3 = ligand.atoms[atoms[2]].element.symbol
    ele_4 = ligand.atoms[atoms[3]].element.symbol
    if (ele_1 == 'O') or (ele_4 == 'O'):
        print(atoms, parameter)
        torsion_smirkses.add(parameter.smirks)

_You can go back to the [original FF file](https://github.com/openforcefield/openforcefields/blob/master/openforcefields/offxml/openff-1.2.0.offxml) to see where these are defined._

This returns three _unique_ parameters, so I use a Python `set` to record all of their SMIRKSes.

Now let's change the underlying FF to prefer those torsions being perpendicular.

In [None]:
for smarts in torsion_smirkses:
    oxygen_torsion = openff_forcefield.get_parameter_handler('ProperTorsions').parameters[smarts]
    oxygen_torsion.periodicity1 = 2
    oxygen_torsion.phase1 = 180 * unit.degree
    oxygen_torsion.k1 = -10 * unit.kilocalorie / unit.mole

In [None]:
ligand_system = openff_forcefield.create_openmm_system(ligand_topology)

integrator = LangevinIntegrator(300*unit.kelvin, 1/unit.picosecond, 0.002*unit.picoseconds)
simulation = app.Simulation(ligand_topology.to_openmm(), ligand_system, integrator)
simulation.context.setPositions(omm_pdbfile.positions)
simulation.minimizeEnergy()

lig_struct = pmd.openmm.load_topology(simulation.topology, ligand_system, simulation.context.getState(getPositions=True).getPositions())
with NamedTemporaryFile(suffix='.pdb') as tf:
    app.PDBFile.writeModel(simulation.topology, simulation.context.getState(getPositions=True).getPositions(), open(tf.name, 'w'))
    view = nglview.show_file(tf.name)
view

## Workflow 2 Conclusions:

* The 0.3 update of the SMIRNOFF specification has brought the object model more closely in line with the XML representation
* The ForceField object model exposes a way to inspect the parameters assigned to molecules
* The SMARTS-based parameters themselves can be modified prior to system creation
* The resulting systems are *immediately* ready for calculation
* This API enables fully automated cycles of parameter optimization
* Generally, this creates opportunities to bridge cheminformatics and FF science

Yet to come - An OpenFF `System` class
* Could use a layer of indirection to make parameter optimization more efficient
* Will require resolving questions in the SMIRNOFF spec
    * How will the hierarchy of charge models be resolved?
    * How will `GBSA` and `Electrostatics` forces know to inherit the same charges?
    * Where will VirtualSites, which have both charge and vdW parameters, be defined?
