In [None]:
!pip install -U https://github.com/conda-incubator/condacolab/archive/cuda-version-12.tar.gz
import condacolab
condacolab.install_mambaforge()

In [None]:
!wget -qN https://raw.githubusercontent.com/openforcefield/openff-docs/main/source/workshops/2024/protein_prep/utils.py
!wget -qN https://raw.githubusercontent.com/openforcefield/openff-docs/main/source/workshops/2024/protein_prep/colab-env.yml
!mamba env update -q --name=base --file=colab-env.yml

In [None]:
from google.colab import output
output.enable_custom_widget_manager()

# Open Source Protein Preparation in Jupyter

The point of this workshop is to demonstrate what is possible entirely with Open Source tools, and to provide a framework for semi-interactive preparation of chemical systems in Jupyter.

What can we do?

- Repair proteins with missing loops, residues, atoms
    - Automatically if missing parts are described in the PDB header
    - Manually otherwise
    - Restored loop structures unlikely to be high quality
- Returning non-standard amino acids to what was translated
    - Automatically if
        - in the PDBFixer [substitution table]
        - or in MODRES records in the PDB header
    - Manually otherwise
- Titrate protonation states realistically (with PropPKa via PDB2PQR)
- Dock a small molecule (with Autodock Vina via Dockstring)
- Solvate
- Parametrize
- Simulate
 
What can't we do?

- Non-standard amino acids and PTMs (see "Things to make and do"!)
- Metalloproteins
- Biopolymers other than proteins
- Lipids
 
What's a bit tricky?

- Disulfide bonds
    - Disulfide bonds in the PDB CONECT records are used
    - Disulfide bonds not in the PDB CONECT records may be detected from proximity

[substitution table]: https://github.com/openmm/pdbfixer/blob/6bf10e138f6475f6f1850dae78d7bf1b420a1612/pdbfixer/pdbfixer.py#L67


In [None]:
from utils import nglview_show_openmm, show_fixer

## Loading a PDB

Choose and download a PDB! These were chosen by [searching] for models with features that are particularly difficult to prepare automatically - missing loops, post-translationally modified residues, and so on. If you already have a PDB, either put it's ID in here, or skip the next two cells and load it into PDBFixer directly.

[searching]: https://www.rcsb.org/search?request=%7B%22query%22%3A%7B%22type%22%3A%22group%22%2C%22logical_operator%22%3A%22and%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22group%22%2C%22logical_operator%22%3A%22and%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22group%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_assembly_info.unmodeled_polymer_monomer_count%22%2C%22operator%22%3A%22greater_or_equal%22%2C%22negation%22%3Afalse%2C%22value%22%3A20%7D%7D%2C%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_assembly_info.polymer_entity_instance_count_protein%22%2C%22operator%22%3A%22equals%22%2C%22negation%22%3Afalse%2C%22value%22%3A1%7D%7D%2C%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_assembly_info.polymer_monomer_count%22%2C%22operator%22%3A%22less%22%2C%22negation%22%3Afalse%2C%22value%22%3A400%7D%7D%2C%7B%22type%22%3A%22group%22%2C%22logical_operator%22%3A%22and%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_polymer_instance_feature_summary.coverage%22%2C%22operator%22%3A%22greater_or_equal%22%2C%22negation%22%3Afalse%2C%22value%22%3A0.05%7D%7D%2C%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_polymer_instance_feature_summary.type%22%2C%22operator%22%3A%22exact_match%22%2C%22value%22%3A%22UNOBSERVED_ATOM_XYZ%22%2C%22negation%22%3Afalse%7D%7D%5D%2C%22label%22%3A%22nested-attribute%22%7D%2C%7B%22type%22%3A%22group%22%2C%22logical_operator%22%3A%22and%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_polymer_entity_feature_summary.count%22%2C%22operator%22%3A%22greater_or_equal%22%2C%22negation%22%3Afalse%2C%22value%22%3A2%7D%7D%2C%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_polymer_entity_feature_summary.type%22%2C%22operator%22%3A%22exact_match%22%2C%22value%22%3A%22modified_monomer%22%2C%22negation%22%3Afalse%7D%7D%5D%2C%22label%22%3A%22nested-attribute%22%7D%2C%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_entry_info.deposited_polymer_entity_instance_count%22%2C%22operator%22%3A%22equals%22%2C%22negation%22%3Afalse%2C%22value%22%3A1%7D%7D%2C%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22text%22%2C%22parameters%22%3A%7B%22attribute%22%3A%22rcsb_entity_source_organism.ncbi_scientific_name%22%2C%22operator%22%3A%22contains_phrase%22%2C%22negation%22%3Afalse%2C%22value%22%3A%22homo%20sapiens%22%7D%7D%5D%2C%22logical_operator%22%3A%22and%22%7D%5D%2C%22label%22%3A%22text%22%7D%5D%7D%2C%22return_type%22%3A%22entry%22%2C%22request_options%22%3A%7B%22paginate%22%3A%7B%22start%22%3A0%2C%22rows%22%3A25%7D%2C%22results_content_type%22%3A%5B%22experimental%22%5D%2C%22sort%22%3A%5B%7B%22sort_by%22%3A%22score%22%2C%22direction%22%3A%22desc%22%7D%5D%2C%22scoring_strategy%22%3A%22combined%22%7D%2C%22request_info%22%3A%7B%22query_id%22%3A%223fa061d70a5f7b845dbc81ffb7ed7866%22%7D%7D

In [None]:
pdbid = "5ap1"

# pdbid = "2r2j"

# pdbid = "2bvx"
# pdbid = "4dzr"
# pdbid = "4isc"
# pdbid = "1xjo"
# pdbid = "1kl9"
# pdbid = "1gz2"
# pdbid = "5O11"

In [None]:
import urllib.request

urllib.request.urlretrieve(
    f"https://files.rcsb.org/download/{pdbid}.pdb", f"{pdbid}.pdb"
)

Load and visualize the entire PDB file. If you have your own PDB file, follow the instructions in the comment to load it. Red dots are crystallographic water oxygens. Spheres are non-polymers, cartoons are protein.

In [None]:
from pdbfixer import PDBFixer

fixer = PDBFixer(filename=f"{pdbid}.pdb")

# # If you have your own PDB file, uncomment this and enter the filename here
# fixer = PDBFixer(filename=f"my_pdb_file.pdb")

show_fixer(fixer)

## Repairing the protein model

### Terminal caps and missing residues

Identify missing loops and label where they connect to modelled residues. Loops connect spheres of the same colour. A single sphere of a given color indicates residues removed from a terminus.

In [None]:
from utils import check_missing_residues

fixer.findMissingResidues()

widget, missing_loops_selection = check_missing_residues(fixer)
widget

`findMissingResidues()` stores the residues it finds in the `missingResidues` dictionary: 

```python
fixer.missingResidues = {
    (chain_index: int, residue_index_before_loop: int): [
        residue_name_to_insert: str,
        residue_name_to_insert: str,
        residue_name_to_insert: str,
        ...
    ]
}
```

So the residues that are actually added can be modified by tweaking this dictionary. This can be used to replace missing terminal loops with a neutral capping residue - `'ACE'` for an acetyl group or `NME` for an N-methyl group.

If this cell is not run, terminal loops will be restored automatically based on the metadata in the PDB header. Note that this will most likely increase the size of the box needed, as the restored loops will be unfolded and probably extended. However, if you have some process for optimizing terminal loops and the unmodelled residues are important for protein structure or function, you may want to include them.

In [None]:
# Run this cell only if you want capped termini

chains_to_cap = {chain for chain, resi in fixer.missingResidues}
for chainid in chains_to_cap:
    chain = [*fixer.topology.chains()][chainid]
    last_resi = len([*chain.residues()])

    fixer.missingResidues[chainid, 0] = ["ACE"]
    fixer.missingResidues[chainid, last_resi] = ["NME"]

fixer.missingResidues

If no missing residues were detected, but you know there is one, you can tell PDBFixer about it in a similar way:

In [None]:
# # If there is a GGGS loop missing between residue indices 23 and 24 in chain 0: 
# fixer.missingResidues.update({
#     (0, 23): ['GLY', 'GLY', 'GLY', 'S'],
# })

Now that we've made our modifications, we can check what's going to happen again:

In [None]:
widget, missing_loops_selection = check_missing_residues(fixer)
widget

The missing loops will be added to the model alongside the other missing heavy atoms in a later step.

### Standardizing non-standard residues

Identify non-standard residues.

In [None]:
from pprint import pprint

fixer.findNonstandardResidues()

`fixer.findNonstandardResidues()` stores the residues it finds in `fixer.nonstandardResidues`, a list of tuples defining which residues should be replaced with what:

```python
[(from_residue: openmm.app.Residue, to_residue_name: str), ...]`
```

It can be modified now to mutate standard residues, tell PDBFixer about nonstandard residues it hasn't found, or change the default behavior.

In [None]:
# # To mutate residue at index 10 to glycine
# fixer.nonstandardResidues.append(([*fixer.topology.residues()][10], 'GLY'))

Visualize the changes we're about to make. Non-standard residues are displayed in licorice, and their modelled and expected residue names are displayed.

In [None]:
pprint(fixer.nonstandardResidues)

nsaa_selections = [
    f"{residue.index+1}:{residue.chain.id}"
    for residue, target in fixer.nonstandardResidues
]

widget = show_fixer(fixer, "cartoononly")
widget.add_representation(
    "licorice",
    selection=" or ".join(nsaa_selections),
    radius=0.25,
)
widget

Remove atoms in non-standard residues that are missing in the target residue. For instance, remove selenium from selenomethionine so that sulfur can replace it. Modelled small molecules are difficult to protonate unambigously and are often non-biologically relevant artifacts of crystallisation, and so are removed as well. Remaining atoms of non-standard residues are displayed in licorice.

In [None]:
fixer.replaceNonstandardResidues()

widget = show_fixer(fixer, "cartoononly")
widget.add_representation(
    "licorice",
    selection=" or ".join(nsaa_selections),
    radius=0.25,
)
widget

### Removing non-biopolymer components

Doing this earlier would remove non-standard residues before they could be repaired. All atoms that aren't a part of a standard [DNA], [RNA] or [protein] residue are removed.

[protein]: https://github.com/openmm/pdbfixer/blob/6bf10e138f6475f6f1850dae78d7bf1b420a1612/pdbfixer/pdbfixer.py#L83
[RNA]: https://github.com/openmm/pdbfixer/blob/6bf10e138f6475f6f1850dae78d7bf1b420a1612/pdbfixer/pdbfixer.py#L84
[DNA]: https://github.com/openmm/pdbfixer/blob/6bf10e138f6475f6f1850dae78d7bf1b420a1612/pdbfixer/pdbfixer.py#L85

In [None]:
fixer.removeHeterogens(keepWater=True)

show_fixer(fixer)

### Check disulfide bonds

There isn't a simple way to adjust disulfide bonds with PDBFixer, but we can at least inspect which bonds have been autodetected.

It may be possible to add a bond to the topology and then use PDBFixer's `_createForceField()` private method to prepare a partial force field for energy minimization even without hydrogens, but as this uses a private method it is left as an exercise to the reader.

In [None]:
disulfides = [
    bond for bond in fixer.topology.bonds() 
    if bond.atom1.element.symbol == 'S' and bond.atom2.element.symbol == 'S'
]
print(disulfides)

w = show_fixer(fixer, 'cartoononly')
w.add_licorice(selection='CYS CYX', radius=0.5)
w

### Restoring missing heavy atoms

Find atoms missing from standard residues. All residues that include missing atoms (including atoms removed from non-standard residues) are displayed in licorice. Earlier residues are colored red in the cartoon/ribbon, later residues are colored blue.

In [None]:
from itertools import chain

fixer.findMissingAtoms()
pprint(fixer.missingAtoms)
pprint(fixer.missingTerminals)

missing_atom_selections = [
    f"{residue.index+1}:{residue.chain.id}"
    for residue in chain(fixer.missingAtoms, fixer.missingTerminals)
]

widget = show_fixer(fixer, "cartoononly")
widget.add_representation(
    "licorice",
    selection=" or ".join(missing_atom_selections),
    radius=0.25,
)
widget

Add missing atoms from non-standard or incomplete residues. Residues for which this step has added atoms are in licorice.

In [None]:
fixer.addMissingAtoms()

widget = show_fixer(fixer, "cartoononly")
widget.add_representation(
    "licorice",
    selection=" or ".join(missing_atom_selections),
    radius=0.25,
)
widget.add_representation(
    "backbone", selection=missing_loops_selection, radius=1, color="green"
)
widget.add_representation(
    "ball+stick", selection="ACE NME", radius=0.25
)
widget

### OPTIONAL: All of the above, automatically

In [None]:
# # If you don't want to check what's happening at every step,
# # all of the above can be done at once like this:
# from pdbfixer import PDBFixer

# fixer = PDBFixer(filename=f"{pdbid}.pdb")
# fixer.findMissingResidues()

# # This section adds caps; leave it commented to rebuild terminal loops
# chains_to_cap = {chain for chain, resi in fixer.missingResidues}
# for chainidx in chains_to_cap:
#     chain = [*fixer.topology.chains()][chainidx]
#     last_resi = len([*chain.residues()])
#     fixer.missingResidues[chainidx, 0] = ['ACE']
#     fixer.missingResidues[chainidx, last_resi] = ['NME']

# fixer.findNonstandardResidues()
# fixer.replaceNonstandardResidues()
# fixer.removeHeterogens(keepWater=True)
# fixer.findMissingAtoms()
# fixer.addMissingAtoms()

# # This last step is not included above, but may be desirable
# # fixer.addMissingHydrogens(pH=7.4)

# show_fixer(fixer)

### Protonating

PDBFixer can protonate a protein, but it uses standard pKₐ values for all residues. As a result, the protonation state of all copies of each residue will be the same, regardless of their chemical environment. The program `pdb2pqr` can automatically protonate a protein after titrating residues with `propka`, so we'll use it for protonation, however it struggles to protonate crystallographic water. As a result, we'll use PDBFixer to provide a starting point for `pdb2pqr` to optimize. This will also give us an opportunity to run an energy minimization to tidy up any messes from the above steps!


In [None]:
import openmm.app

pH = 7.4
fixer.addMissingHydrogens(pH=pH)

# # Optional energy minimization
# simulation = openmm.app.Simulation(
#     fixer.topology, 
#     openmm.app.ForceField('amber14-all.xml', 'amber14/tip3p.xml').createSystem(fixer.topology, nonbondedMethod=openmm.app.CutoffNonPeriodic), 
#     openmm.LangevinMiddleIntegrator(300*openmm.unit.kelvin, 1/openmm.unit.picosecond, 2*openmm.unit.femtosecond)
# )
# simulation.context.setPositions(fixer.positions)
# simulation.minimizeEnergy()
# fixer.positions = simulation.context.getState(getPositions=True).getPositions()

In [None]:
from openmm.app import PDBFile

PDBFile.writeFile(fixer.topology, fixer.positions, f'{pdbid}_repaired.pdb')

!pdb2pqr --with-ph {pH} --pdb-output '{pdbid}_protonated.pdb' '{pdbid}_repaired.pdb' '{pdbid}_protonated.pqr'

In [None]:
import nglview

w = nglview.show_structure_file(f'{pdbid}_protonated.pdb')
w.clear_representations()
w.add_licorice()
w

## Docking

In [None]:
from utils import DockTarget

target = DockTarget.from_pdb(f'{pdbid}_protonated.pdb')
target.visualize()

Next, we position the search space for docking. The purple box displays the space that will be searched. The x, y, and z axes are depicted meeting at the origin; each arrow is 10 angstroms in length, and the target is specified in angstroms.

When docking is performed, crystallographic water will be removed from the selected area so they don't get in the way. Note that the resulting system will not include these waters. However, the `DockTarget` instance will remember them in case you want to perform a second docking at a different site. 

In [None]:
target.center = [0, -20, -50]
target.size = [20, 25, 20]

target.visualize()

In [None]:
from openff.toolkit import Molecule

ligand = Molecule.from_smiles(
    r"C1CCCCC1Nc(n2)c(C#N)ccc2Nc3cc(OCC#N)c(c4cn(C)nc4)cc3",
)
# # Alternatively:
# ligand = Molecule.from_file("ligand.sdf")
ligand

In [None]:
# This will re-protonate your ligand at the specified pH
result = target.dock(ligand, pH=pH)
print(result.scores)
result.visualize()

## Solvating

In [None]:
import numpy as np
import openmm.unit

# If you did docking
solvator = result.to_pdbfixer(pose=0)
# Or if you skipped docking
# solvator = PDBFixer(f"{pdbid}_protonated.pdb")

# This works well if your ligand is neutral, but PDBFixer does not always
# correctly identify charged ligands, so you may need to do additional work 
# to neutralize your simulation box. Charged proteins are correctly identified
# and will be automatically neutralized.
# For example, you could delete ions to achieve the desired charge 
# (at the cost of some under-density).
solvator.addSolvent(
    padding=2.0 * openmm.unit.nanometer,
    positiveIon="Na+",
    negativeIon="Cl-",
    ionicStrength=0.1 * openmm.unit.molar,
    boxShape="dodecahedron",
)

# Place the corner of the box at the origin
solvator.positions -= (
    np.min(solvator.positions.value_in_unit(solvator.positions.unit), axis=0)
    * solvator.positions.unit
)

# Write the prepared system to disk
PDBFile.writeFile(solvator.topology, solvator.positions, f"{pdbid}_prepared.pdb")

show_fixer(solvator, 'withunitcell')

## Parametrizing

We're using OpenFF tools to load the prepared PDB and prepare the simulation. OpenFF tools follow a philosophy of requiring exact chemical identities for the entire system, and they use that to parametrize the chemistry directly, without the intermediate step of assigning atom types.

Usually, the easiest way to load a system into the Toolkit is with the `Topology.from_pdb()` method. Since a topology requires complete chemical information about all its components, and since PDB files do not include this information, `from_pdb` requires that any molecules it doesn't know how to read be identified ahead of time with the `unique_molecules` argument.

All the molecules in the PDB file can be provided in `unique_molecules` if desired, but the Toolkit knows how to read:

- Proteins composed of the 20 canonical amino acids, plus NME and ACE caps
- Water
- Common monatomic, monovalent ions: Li+, Na+, K+, Rb+, Cs+, F-, Cl-, Br-, and I-

The Toolkit has upcoming or experimental support for

- User-specified non-standard amino acids
- Nucleic acids and other non-protein biopolymers
- PDBx and mmCIF files

The Toolkit does not and is not planned to support:

- Radicals

In [None]:
from openff.toolkit import Molecule, Topology

top = Topology.from_pdb(f"{pdbid}_prepared.pdb", unique_molecules=[result.ligand])

w = top.visualize()
w.add_line()
w

In [None]:
from openff.toolkit import ForceField

sage_ff14sb = ForceField("openff-2.1.1.offxml", "ff14sb_off_impropers_0.0.3.offxml")

interchange = sage_ff14sb.create_interchange(top)

The SMIRNOFF OFFXML format is a general format for force fields, but at present OpenFF seems to be the only people publishing them. Force fields use [semantic versioning]; the mainline OpenFF force field v2.x.y is called "Sage" and supports druglike small molecules. While it can parametrize proteins, Sage is not trained on peptides and so like many general force fields the parameters are not appropriate for proteins. 

We will use a port of Amber ff14sb to the OFFXML format in this workshop, but this combination of force fields has not been rigorously validated. The upcoming v3.x.y "Rosemary" force fields is planned to support proteins out of the box, and we recommend waiting for that release.

Sage currently supports and is recommended for:

- Small molecules and ions composed of the elements C, H, O, N, P, and S
- Water (TIP3P)
- Common monatomic, monovalent ions: Li+, Na+, K+, Rb+, Cs+, F-, Cl-, Br-, and I-
- Monatomic xenon

Sage does not yet support (though it may or may not be able to parametrize):

- Bonds involving elements other than CHONPS
- Most monatomic ions (these will fail to parametrize)
- Proteins and other biopolymers
- Lipid membranes

[semantic versioning]: https://semver.org/

OpenFF Topologies are parametrized by converting them into an `Interchange`, which represents the combination of a chemical system and a force field. An `Interchange` can export parameters ready for simulation in a number of engines.

## Simulating in OpenMM

In [None]:
import openmm

# Construct and configure a Langevin integrator at 293.15 K (20 °C, 68 °F) with an appropriate friction constant and time-step
integrator = openmm.LangevinMiddleIntegrator(
    293.15 * openmm.unit.kelvin,
    1 / openmm.unit.picosecond,
    2 * openmm.unit.femtosecond,
)

# Under the hood, this creates *OpenMM* `System` and `Topology` objects, then combines them together
simulation = interchange.to_openmm_simulation(integrator=integrator)

### Energy Minimization

Energy minimization to ensure forces are small enough to integrate:

In [None]:
def describe_state_of(simulation: openmm.app.Simulation, name: str = "State"):
    state = simulation.context.getState(getEnergy=True, getForces=True)
    max_force = max(np.sqrt(v.x**2 + v.y**2 + v.z**2) for v in state.getForces())
    print(
        f"{name} has energy {round(state.getPotentialEnergy()._value, 2)} kJ/mol "
        f"with maximum force {round(max_force, 2)} kJ/(mol nm)"
    )


describe_state_of(simulation, "Original state")

simulation.minimizeEnergy()

describe_state_of(simulation, "Minimized state")

### Equilibration

Create a force providing position restraints, restraining protein heavy atoms to their crystallographic coordinates. This prevents minimization and equilibration from disrupting the crystal structure too much, as there will be unrealistic forces at the start of the simulation. A similar technique could be applied to any subset of the atoms. 

In [None]:
from openmm import CustomExternalForce

# An external force has no equal-opposite pair force (and does not conserve momentum)
# The pair force is applied "externally"
restraint = CustomExternalForce("k*periodicdistance(x, y, z, x0, y0, z0)^2")
restraint.index = simulation.system.addForce(restraint)
restraint.addGlobalParameter(
    "k", 100.0 * openmm.unit.kilojoules_per_mole / openmm.unit.nanometer
)
restraint.addPerParticleParameter("x0")
restraint.addPerParticleParameter("y0")
restraint.addPerParticleParameter("z0")

# The first molecule in the topology is the protein
for atom in interchange.topology.molecule(0).atoms:
    if atom.symbol != "H":
        atom_index = interchange.topology.atom_index(atom)
        restraint.addParticle(
            atom_index,
            interchange.positions[atom_index].to_openmm(),
        )

# The context must be reinitialized after forces have been changed
simulation.context.reinitialize(preserveState=True)

NVT equilibration:

In [None]:
simulation.context.setVelocitiesToTemperature(simulation.integrator.getTemperature())

In [None]:
simulation.step(1 * openmm.unit.picosecond / simulation.integrator.getStepSize())

NPT equilibration:

In [None]:
simulation.system.addForce(
    openmm.MonteCarloBarostat(
        1.00 * openmm.unit.bar,
        simulation.integrator.getTemperature(),
        25,  # Attempt MC move every 25 steps
    )
)
# The context must be reinitialized after forces have been changed
simulation.context.reinitialize(preserveState=True)

In [None]:
simulation.step(1 * openmm.unit.picosecond / simulation.integrator.getStepSize())

Remove position restraints:

In [None]:
simulation.system.removeForce(restraint.index)
# The context must be reinitialized after forces have been changed
simulation.context.reinitialize(preserveState=True)

### "Production"

This part should probably be run on a supercomputer!

In [None]:
# Add reporters to record a trajectory, checkpoints for continuation, and state information in CSV format
# You will probably want to change the report intervals for your own runs
simulation.reporters = [
    openmm.app.XTCReporter(  # Compressed trajectory format
        file="trajectory.xtc",
        reportInterval=int(
            0.01 * openmm.unit.picosecond / simulation.integrator.getStepSize()
        ),
    ),
    openmm.app.CheckpointReporter(  # Checkpoint for exact continuation in case of crash
        file="checkpoint.chk",
        reportInterval=int(
            1 * openmm.unit.picosecond / simulation.integrator.getStepSize()
        ),
    ),
    openmm.app.StateDataReporter(  # Human readable state information over time
        file="states.csv",
        reportInterval=int(
            0.01 * openmm.unit.picosecond / simulation.integrator.getStepSize()
        ),
        step=True,
        time=True,
        potentialEnergy=True,
        kineticEnergy=True,
        temperature=True,
        volume=True,
        density=True,
        speed=True,
        elapsedTime=True,
    ),
]

"production" run

In [None]:
simulation.runForClockTime(1 * openmm.unit.minute)

## Visualization

In [None]:
w = nglview_show_openmm(
    simulation.topology,
    "trajectory.xtc",
)
w.add_line(sele="not water")
w.add_unitcell()
w

## Next steps

OpenFF Examples page: https://docs.openforcefield.org/en/latest/examples.html

"Things to make and do with OpenFF tools" workshop: https://docs.openforcefield.org/en/latest/workshops/2024/vignettes.html