In [1]:
import openmm
import openmm.app
import openmm.unit
from sys import stdout

import pandas as pd
import numpy as np
import plotly.express as px

## Section 1: Confirming the correct protein (GRO hash alternative)

Running a simulation and saving a checkpoint as a miner would do

In [69]:
pdb = openmm.app.PDBFile('ala_ala_ala.pdb')
forcefield = openmm.app.ForceField('amber99sb.xml', 'tip3p.xml')
system = forcefield.createSystem(pdb.topology, nonbondedMethod=openmm.app.NoCutoff, nonbondedCutoff=1.0*openmm.unit.nanometers, constraints=openmm.app.HBonds)

# Set the random seed for the integrator
integrator = openmm.LangevinIntegrator(700*openmm.unit.kelvin, 1.0/openmm.unit.picosecond, 0.002*openmm.unit.picoseconds)
integrator.setRandomNumberSeed(42)

# Use CUDA platform with deterministic properties
platform = openmm.Platform.getPlatformByName('CUDA')
properties = {'DeterministicForces': 'true', 'Precision': 'double'}
simulation = openmm.app.Simulation(pdb.topology, system, integrator, platform, properties)

# Set initial positions
simulation.context.setPositions(pdb.positions)
simulation.context.setVelocitiesToTemperature(700*openmm.unit.kelvin)

# Minimize energy
simulation.minimizeEnergy()

# Run the simulation
simulation.reporters.append(openmm.app.StateDataReporter('output.log', 10, step=True, potentialEnergy=True, temperature=True))


# Writes a checkpoint every 1000 steps
simulation.reporters.append(openmm.app.CheckpointReporter("chkpt_new_test.chk", reportInterval=1000))
# Report infomation to the screen as the simulation runs
simulation.reporters.append(openmm.app.StateDataReporter(stdout, 100, step=True,
        potentialEnergy=True, temperature=True, volume=True, kineticEnergy=True, totalEnergy=True))
simulation.step(5000)


#"Step","Potential Energy (kJ/mole)","Kinetic Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)","Box Volume (nm^3)"
100,-38.360044151782404,118.19944520689371,79.83940105511131,359.90191493105726,8.0
200,-14.35001459424518,148.53221234277646,134.18219774853128,452.26123995372114,8.0
300,-17.643788541044728,148.5696682354412,130.92587969439649,452.3752882681773,8.0
400,17.812211009473003,149.3204597705531,167.1326707800261,454.66135070043174,8.0
500,-0.8342087653820727,169.31260592729757,168.4783971619155,515.5348317290403,8.0
600,-122.77901666656282,287.06204281839445,164.28302615183162,874.0665299530167,8.0
700,-150.1486095735171,301.72160180175587,151.57299222823877,918.7029776192605,8.0
800,-92.38115951570569,259.1850604904545,166.80390097474879,789.1847497994523,8.0
900,-104.59276316044668,293.15783983988064,188.56507667943396,892.6274378931834,8.0
1000,-78.20206565193769,230.53701795630982,152.33495230437214,701.9551917501925,8.0
1100,-104.5062111810904,206.205911941503

In [47]:
T = 941.9969260132757

# Calculate the number of degrees of freedom
n_atoms = system.getNumParticles()
n_constraints = system.getNumConstraints()
print(f"n_constraints: {n_constraints}")
degrees_of_freedom = 3 * n_atoms - n_constraints

k_B = openmm.unit.BOLTZMANN_CONSTANT_kB * openmm.unit.AVOGADRO_CONSTANT_NA  # in kJ/mol*K

KE = k_B * degrees_of_freedom * T / 2
KE

n_constraints: 17


Quantity(value=321120.1273378584, unit=joule/(kelvin*mole))

 Validators will then load the checkpoint

In [72]:
pdb = openmm.app.PDBFile('ala_ala_ala.pdb')

## this is going to get overwritten by load_checkpoint anyways, just initializing a simulation object

forcefield = openmm.app.ForceField('amber99sb.xml', 'tip3p.xml')
system = forcefield.createSystem(pdb.topology, nonbondedMethod=openmm.app.NoCutoff, nonbondedCutoff=1.0*openmm.unit.nanometers, constraints=openmm.app.HBonds)

# forcefield = openmm.app.ForceField('amber14-all.xml', 'tip3p.xml')
# system = forcefield.createSystem(pdb.topology, nonbondedMethod=openmm.app.NoCutoff, nonbondedCutoff=3.0*openmm.unit.nanometers, constraints=openmm.app.HBonds)


# Set the random seed for the integrator
integrator = openmm.LangevinIntegrator(4734737*openmm.unit.kelvin, 1.0/openmm.unit.picosecond, 0.002*openmm.unit.picoseconds)
integrator.setRandomNumberSeed(42)

# Use CUDA platform with deterministic properties
platform = openmm.Platform.getPlatformByName('CUDA')
properties = {'DeterministicForces': 'true', 'Precision': 'double'}
simulation = openmm.app.Simulation(pdb.topology, system, integrator, platform, properties)

simulation.loadCheckpoint("chkpt_new_test.cpt")



In [73]:
#so this doesn't seem to overright the original variable. 
simulation.integrator.getTemperature()

Quantity(value=4734737.0, unit=kelvin)

In [74]:
state = simulation.context.getState(getPositions=True, getVelocities=True, getEnergy=True)
state

<openmm.openmm.State; proxy of <Swig Object of type 'OpenMM::State *' at 0x7fdc634cdef0> >

In [75]:
system.getNumParticles()

33

In [76]:
kinetic_energy = state.getKineticEnergy()

# Calculate the number of degrees of freedom
n_atoms = system.getNumParticles()
n_constraints = system.getNumConstraints()
degrees_of_freedom = 3 * n_atoms - n_constraints

# Boltzmann constant
k_B = openmm.unit.BOLTZMANN_CONSTANT_kB * openmm.unit.AVOGADRO_CONSTANT_NA  # in kJ/mol*K

# Calculate temperature
temperature = (2 * kinetic_energy) / (degrees_of_freedom * k_B)
temperature = temperature.in_units_of(openmm.unit.kelvin)

# Print the calculated temperature
print(f"Calculated kinetic energy: {kinetic_energy}")
print(f"Calculated Temperature: {temperature}")

Calculated kinetic energy: 98.54999553930345 kJ/mol
Calculated Temperature: 289.0936598283457 K


In [25]:
#so this doesn't seem to overright the original variable. 
simulation.integrator.getTemperature()

Quantity(value=30.0, unit=kelvin)

In [45]:
# Load the PDB file
pdb = openmm.app.PDBFile('ala_ala_ala.pdb')

# Create a forcefield and system (parameters will be overwritten by checkpoint)
forcefield = openmm.app.ForceField('amber14-all.xml', 'tip3p.xml')
system = forcefield.createSystem(pdb.topology, nonbondedMethod=openmm.app.NoCutoff, nonbondedCutoff=3.0*openmm.unit.nanometers, constraints=openmm.app.HBonds)

# Set the random seed for the integrator
integrator = openmm.LangevinIntegrator(30*openmm.unit.kelvin, 1.0/openmm.unit.picosecond, 0.002*openmm.unit.picoseconds)
integrator.setRandomNumberSeed(42)

# Use CUDA platform with deterministic properties
platform = openmm.Platform.getPlatformByName('CUDA')
properties = {'DeterministicForces': 'true', 'Precision': 'double'}
simulation = openmm.app.Simulation(pdb.topology, system, integrator, platform, properties)

# # Load the checkpoint file
# checkpoint_file = 'checkpoint.chk'
# with open(checkpoint_file, 'rb') as f:
#     simulation.context.loadCheckpoint(f.read())

simulation.loadCheckpoint("chkpt_new_test.cpt")

# Extract integrator parameters from the context
loaded_integrator = simulation.integrator
temperature = loaded_integrator.getTemperature()
friction = loaded_integrator.getFriction()
timestep = loaded_integrator.getStepSize()

# Print the extracted parameters
print(f"Temperature: {temperature}")
print(f"Friction Coefficient: {friction}")
print(f"Timestep: {timestep}")

# Extract state information (positions, velocities, energies)
state = simulation.context.getState(getPositions=True, getVelocities=True, getEnergy=True)
kinetic_energy = state.getKineticEnergy()

# Calculate the number of degrees of freedom
n_atoms = system.getNumParticles()
n_constraints = system.getNumConstraints()
print(f"n_constraints: {n_constraints}")
degrees_of_freedom = 3 * n_atoms - n_constraints

# Boltzmann constant
k_B = openmm.unit.BOLTZMANN_CONSTANT_kB * openmm.unit.AVOGADRO_CONSTANT_NA  # in kJ/mol*K

# Calculate temperature from kinetic energy
calculated_temperature = (2 * kinetic_energy) / (degrees_of_freedom * k_B)
calculated_temperature = calculated_temperature.in_units_of(openmm.unit.kelvin)

# Print the calculated temperature
print(f"Calculated Temperature from Kinetic Energy: {calculated_temperature}")


Temperature: 30.0 K
Friction Coefficient: 1.0 /ps
Timestep: 0.002 ps
n_constraints: 17
Calculated Temperature from Kinetic Energy: 289.1286973050088 K


## Section 2: controlling water, box, forcefield as well as temperature, pressure and other parameters in the .mdp file

Forcefields can be created using the forcefield object, this includes water as well. All built in forcefields are found [here](https://ommprotocol.readthedocs.io/en/latest/forcefields.html)

[PBC Documentation](http://docs.openmm.org/latest/userguide/theory/05_other_features.html)

In [None]:
forcefield = openmm.app.ForceField('amber99sb.xml', 'tip3p.xml')


In [None]:


# Load your system and topology
# Replace 'your_pdb_file.pdb' and 'your_forcefield.xml' with your actual files
pdb = openmm.app.PDBFile('input.pdb')
forcefield = openmm.app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')

# Create the system
system = forcefield.createSystem(pdb.topology, 
                                 nonbondedMethod=openmm.app.PME,
                                 nonbondedCutoff=1.2*openmm.unit.nanometers, 
                                 constraints=openmm.app.HBonds, 
                                 ewaldErrorTolerance=0.0005)

# Integrator and thermostat settings
integrator = openmm.LangevinIntegrator(300*openmm.unit.kelvin,    # ref_t
                                   1/openmm.unit.picosecond,  # tau_t
                                   0.002*openmm.unit.picoseconds)  # dt

# Barostat settings (pressure coupling)
system.addForce(openmm.MonteCarloBarostat(1*openmm.unit.bar, 300*openmm.unit.kelvin, 25))  # ref_p, ref_t, frequency

# Set periodic boundary conditions
pdb.topology.setPeriodicBoxVectors(system.getDefaultPeriodicBoxVectors())

# Create the simulation object
simulation = openmm.app.Simulation(pdb.topology, system, integrator)

# Set the initial positions
simulation.context.setPositions(pdb.positions)

# Minimize energy
simulation.minimizeEnergy()

# Set velocities to match the given temperature
simulation.context.setVelocitiesToTemperature(300*openmm.unit.kelvin)

# Set up reporters to output data
simulation.reporters.append(openmm.app.DCDReporter('trajectory.dcd', 2500))  # nstxout-compressed
simulation.reporters.append(openmm.app.StateDataReporter(stdout, 50,  # nstlog
                                                  step=True, 
                                                  potentialEnergy=True, 
                                                  kineticEnergy=True, 
                                                  totalEnergy=True, 
                                                  temperature=True, 
                                                  volume=True))


# Run the simulation
simulation.step(50000)  # nsteps

simulation.saveCheckpoint("nvt.cpt")

print("Simulation complete.")

## Section 3: OpenMM allows for re-running to recalculate energy as well

In [16]:


# Load your system and topology
# Replace 'your_pdb_file.pdb' and 'your_forcefield.xml' with your actual files
pdb = openmm.app.PDBFile('input.pdb')
forcefield = openmm.app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')

# Create the system
system = forcefield.createSystem(pdb.topology, 
                                 nonbondedMethod=openmm.app.PME,
                                 nonbondedCutoff=1.2*openmm.unit.nanometers, 
                                 constraints=openmm.app.HBonds, 
                                 ewaldErrorTolerance=0.0005)

# Integrator and thermostat settings
integrator = openmm.LangevinIntegrator(300*openmm.unit.kelvin,    # ref_t
                                   1/openmm.unit.picosecond,  # tau_t
                                   0.002*openmm.unit.picoseconds)  # dt

# Barostat settings (pressure coupling)
system.addForce(openmm.MonteCarloBarostat(1*openmm.unit.bar, 300*openmm.unit.kelvin, 25))  # ref_p, ref_t, frequency

# Create the simulation object
simulation = openmm.app.Simulation(pdb.topology, system, integrator)

# Set the initial positions
simulation.context.setPositions(pdb.positions)

# Minimize energy
simulation.minimizeEnergy()

# Set velocities to match the given temperature
simulation.context.setVelocitiesToTemperature(300*openmm.unit.kelvin)

# Set up reporters to output data
simulation.reporters.append(openmm.app.DCDReporter('trajectory.dcd', 2500))  # nstxout-compressed
simulation.reporters.append(openmm.app.PDBReporter("output.pdb", 2500, enforcePeriodicBox=False))
simulation.reporters.append(openmm.app.StateDataReporter(stdout, 50,  # nstlog
                                                  step=True, 
                                                  potentialEnergy=True, 
                                                  kineticEnergy=True, 
                                                  totalEnergy=True, 
                                                  temperature=True, 
                                                  volume=True))

# Run the simulation
simulation.step(10000)  # nsteps



print("Simulation complete.")

Simulation complete.


In [20]:
pdb = openmm.app.PDBFile("output.pdb")
simulation.context.setPositions(pdb.positions)
state = simulation.context.getState(getEnergy=True)
print(state.getPotentialEnergy())
print(state.getKineticEnergy())


-141163.43034704903 kJ/mol
17735.555667047884 kJ/mol


AttributeError: 'State' object has no attribute 'getTotalEnergy'

## Section 4: Visualizations TODO

In [2]:
import nglview as nv
import MDAnalysis as mda



In [3]:
traj = mda.Universe("input.pdb","trajectory.dcd",all_coordinates=True)

  return self.ts.dt


In [8]:
view = nv.show_mdanalysis(traj)
view.add_cartoon()
view.add_surface(opacity=0.3)
view.center('resid 50')

view.


Image(value=b'', width='99%')

In [None]:
traj.atoms.positions

## Section 5: RMSD calculations

In [23]:
import MDAnalysis as mda
from MDAnalysis.analysis import rms


traj = mda.Universe("input.pdb","trajectory.dcd",all_coordinates=True)
ref = mda.Universe("input.pdb")

We can calculate RMSD to a reference structure like so:

In [24]:
traj.trajectory[-1] ## Setting the traj Universe to the last timestep

< Timestep 4 with unit cell dimensions [49.204155 46.01949  38.901535 90.       90.       90.      ] >

In [25]:
rms.rmsd(traj.select_atoms('backbone').positions,  # coordinates to align
         ref.select_atoms('backbone').positions,  # reference coordinates
         center=True,  # subtract the center of geometry
         superposition=True)  # superimpose coordinates

0.6662518708464211

We can also leverage our trajcetories and use the initial trajectory as reference and calculate the RMSD for every step

In [26]:
R = rms.RMSD(traj,  # universe to align
             traj,  # reference universe or atomgroup
             select='backbone',  # group to superimpose and calculate RMSD
             ref_frame=0)  # frame index of the reference
R.run()

  self.times[i] = ts.time


<MDAnalysis.analysis.rms.RMSD at 0x7f32f48b5d30>

Every row is a timestep. The first two columns of each row are the frame index of the time step, and the time (which is guessed in trajectory formats without timesteps). The third column is RMSD of select.

In [27]:
R.rmsd



array([[0.00000000e+00, 0.00000000e+00, 1.61779200e-07],
       [1.00000000e+00, 1.00000000e+00, 5.79340171e-01],
       [2.00000000e+00, 6.00000001e+00, 6.92759471e-01],
       [3.00000000e+00, 1.10000000e+01, 7.09995346e-01],
       [4.00000000e+00, 1.60000000e+01, 6.66251871e-01]])

## Bonus Section

Special custom modelling capabilities: http://docs.openmm.org/latest/userguide/application/03_model_building_editing.html

Alows for the

In [4]:
import os

class ExitFileReporter(object):
    def __init__(self, filename, reportInterval, checkpointFile):
        self.filename = filename
        self.reportInterval = reportInterval
        self.checkpointFile = checkpointFile

    def describeNextReport(self, simulation):
        steps_left = simulation.currentStep % self.reportInterval
        return (steps_left, False, False, False, False)

    def report(self, simulation, state):
        if os.path.exists(self.filename):
            with open(self.checkpointFile, 'wb') as f:
                f.write(simulation.context.createCheckpoint())
            raise Exception("Simulation stopped")

    def finalize(self):
        pass


In [5]:


# Load your system and topology
# Replace 'your_pdb_file.pdb' and 'your_forcefield.xml' with your actual files
pdb = openmm.app.PDBFile('input.pdb')
forcefield = openmm.app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')

# Create the system
system = forcefield.createSystem(pdb.topology, 
                                 nonbondedMethod=openmm.app.PME,
                                 nonbondedCutoff=1.2*openmm.unit.nanometers, 
                                 constraints=openmm.app.HBonds, 
                                 ewaldErrorTolerance=0.0005)

# Integrator and thermostat settings
integrator = openmm.LangevinIntegrator(300*openmm.unit.kelvin,    # ref_t
                                   1/openmm.unit.picosecond,  # tau_t
                                   0.002*openmm.unit.picoseconds)  # dt

# Barostat settings (pressure coupling)
system.addForce(openmm.MonteCarloBarostat(1*openmm.unit.bar, 300*openmm.unit.kelvin, 25))  # ref_p, ref_t, frequency

# Set periodic boundary conditions
pdb.topology.setPeriodicBoxVectors(system.getDefaultPeriodicBoxVectors())

# Create the simulation object
simulation = openmm.app.Simulation(pdb.topology, system, integrator)

# Set the initial positions
simulation.context.setPositions(pdb.positions)

# Minimize energy
simulation.minimizeEnergy()

# Set velocities to match the given temperature
simulation.context.setVelocitiesToTemperature(300*openmm.unit.kelvin)

# Set up reporters to output data
simulation.reporters.append(openmm.app.DCDReporter('trajectory.dcd', 2500))  # nstxout-compressed
simulation.reporters.append(openmm.app.StateDataReporter(stdout, 50,  # nstlog
                                                  step=True, 
                                                  potentialEnergy=True, 
                                                  kineticEnergy=True, 
                                                  totalEnergy=True, 
                                                  temperature=True, 
                                                  volume=True))

# Set up your OpenMM simulation here
simulation.reporters.append(ExitFileReporter('EXIT', 1000, 'checkpoint.chk'))


In [7]:
try:
    simulation.step(1000000)
except Exception as e:
    print(e)
    # Do any necessary cleanup or post-processing here


#"Step","Potential Energy (kJ/mole)","Kinetic Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)","Box Volume (nm^3)"
50,-153436.12803506054,11396.10341467557,-142040.02462038497,152.10700210781079,88.66340517450686
100,-152801.17848151422,12882.364860076457,-139918.81362143776,171.94455224071174,88.1238593460973
150,-152249.60278567165,13956.742839333048,-138292.8599463386,186.2845777396836,87.68104833288933
200,-151459.22609328473,14743.70281428448,-136715.52327900025,196.78835418089693,87.71156272526898
250,-150507.0525417797,15239.41676282906,-135267.63577895064,203.4047878751579,87.88815944154462
300,-149951.1570339672,16037.778679936659,-133913.37835403054,214.06074924980626,87.88815944154462
350,-149240.7365187119,16656.360497905174,-132584.37602080673,222.31713500429348,87.90838306376074
400,-148704.77795776544,17227.974120896484,-131476.80383686896,229.94662303133484,87.55021617054182
450,-148176.34729370294,17737.26670897688,-130439.08058472606,236.74429465204665,87.

## Modeller

You can add water and a box via the Modeller class

In [None]:
import openmm as mm
import openmm.app as app
import openmm.unit as unit
from sys import stdout

# Load your system and topology
# Replace 'your_pdb_file.pdb' and 'your_forcefield.xml' with your actual files
pdb = app.PDBFile('input.pdb')
forcefield = app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')


modeller = app.Modeller(pdb.topology, pdb.positions)
modeller.deleteWater()
residues=modeller.addHydrogens(forcefield)

modeller.addSolvent(forcefield, padding=1.0*unit.nanometer)


# Create the system
system = forcefield.createSystem(modeller.topology, 
                                 nonbondedMethod=app.PME,
                                 nonbondedCutoff=1.2*unit.nanometers, 
                                 constraints=app.HBonds, 
                                 ewaldErrorTolerance=0.0005)

“Extra particles” are particles that do not represent ordinary atoms. This includes the virtual interaction sites used in many water models, Drude particles, etc. If you are using a force field that involves extra particles, you must add them to the Topology. To do this, call:

`modeller.addExtraParticles(forcefield)`

This looks at the force field to determine what extra particles are needed, then modifies each residue to include them. This function can remove extra particles as well as adding them.



I believe that we want to have as many steps of setting up the simulation on the validator side as possible for now. A possible pipeline for this would be:

In [None]:

print('Loading...')
pdb = app.PDBFile('input.pdb')
forcefield = app.ForceField('amber99sb.xml', 'tip3p.xml')
modeller = app.Modeller(pdb.topology, pdb.positions)
print('Adding hydrogens...')
modeller.addHydrogens(forcefield)
print('Adding solvent...')
modeller.addSolvent(forcefield, model='tip3p', padding=1*unit.nanometer)
print('Minimizing...')
system = forcefield.createSystem(modeller.topology, nonbondedMethod=app.PME)
integrator = VerletIntegrator(0.001*unit.picoseconds)
simulation = Simulation(modeller.topology, system, integrator)
simulation.context.setPositions(modeller.positions)
simulation.minimizeEnergy()
print('Saving...')
positions = simulation.context.getState(getPositions=True).getPositions()
PDBFile.writeFile(simulation.topology, positions, open('output.pdb', 'w'))
print('Done')