# Imports and settings

In [268]:
from openmm.app import *
from openmm import *
from openmm.unit import *

# Setup a simulation system + Restraints

In [269]:
# Load an already solvated PDB file and set up the system + state
pdb = PDBFile("../villin.pdb")
omm_forcefield = ForceField("amber/ff14SB.xml", "amber14/tip3p.xml")
system = omm_forcefield.createSystem(pdb.topology,
                                     nonbondedMethod=PME,
                                     nonbondedCutoff=10.0 * angstrom,
                                     constraints=HBonds,
                                     rigidWater=True,
                                     hydrogenMass=4.0 * amu)

## Use some atoms to set up Harmonic restraints

In [270]:
# Define three atom indices - these will be used to measure useful distances 
d1_atom1_ind = 83
d1_atom2_ind = 151
d2_atom2_ind = 254

Ultimately we want to track the value of these distances (lets call them D1 and D2), \
as a function of the free energy boost applied. \
To track D1 and D2, we could use a whole `Reporter` object to save the whole trajectory and measure it afterwards.\

A lightweight way of doing this without saving the whole trajectory is to use a `customCVForce` object set to 0 (i.e. no bias). This will compute the value of a CV at each step of the simulation.\
We then pass D1 and D2 as bondForces to the `customCVForce` object (using `r` to track their distance)

In [271]:
# define a distance measurer
dist_measurer = CustomCVForce("0")
# Create our two distances as separate Bond Forces and add them
# Note: If we added both D1 and D2 to the same BondForce, then doing 
#     dist_measurer.getCollectiveVariableValues will return only the first distance (D1)
# Hence we define them as separate BondForces
D1 = CustomBondForce("r")
D1.addBond(d1_atom2_ind, d1_atom1_ind)
D2 = CustomBondForce("r")
D2.addBond(d2_atom2_ind, d1_atom1_ind)

# Add each BondForce as CVs into the dist_measurer
dist_measurer.addCollectiveVariable("D1", D1)
dist_measurer.addCollectiveVariable("D2", D2)
system.addForce(dist_measurer)

5

# Time to setup Gaussian Accelerated MD 

For Langevin-GAMD, we're adding a boost potential to the simulation. 
This boosted potential botential takes the form of a Gaussian \
$V'(r) = \frac{k}{2}(E-V_{0})^2$ 
where $V_0(r)$ is the initial potential energy\
and $k$ and E are hyper parameters defining the potential.\
For more details, check out the [GAMD paper](https://pubs.acs.org/doi/10.1021/acs.jctc.5b00436) 

OpenMM gives us the tools to do this oursleves. \
Let's first define a `customIntegrator` to run this. \
Note that in our `customIntegrator` we are adding $V'(r)$ as a Force `fprime`.\
To derive this yourself, remember that a Force is the derivative of Energy with respect to position:\
$f_{prime} = - \frac{\delta V(r)}{\delta r}$  and that 
$V(r) = V_0 + V'(r)$

In [272]:
kB = BOLTZMANN_CONSTANT_kB * AVOGADRO_CONSTANT_NA

'''
Useful links in the writing of this integrator: 
Writing custom integrators: https://github.com/choderalab/openmm-tutorials/blob/master/02%20-%20Integrators%20and%20sampling.ipynb
AMD integrator: https://github.com/openmm/openmm/blob/master/wrappers/python/openmm/amd.py
GAMD from Miao lab: https://github.com/MiaoLab20/gamd-openmm
'''

import numpy as np
class GAMDLangevinIntegrator(CustomIntegrator):
    """GAMDLangevinIntegrator implements the aMD integration algorithm.
    The system is integrated based on a modified potential.  Whenever the energy V(r) is less than a
    cutoff value E, the following effective potential is used:
    V*(r) = V(r) + ((k/2)*(E-V(r))**2)
    """

    def __init__(self, dt, E, k, temperature=300*kelvin, collision_rate=1.0/picosecond):
        """Create an GAMDLangevinIntegrator.
        Parameters
        ----------
        dt : time
            The integration time step to use
        k : energy
            The k parameter to use
        E : energy
            The energy cutoff to use
        temperature : temperature
            temperature of the system
        collision rate : collision rate
            Collision for the thermostat to update on
        """
        gamma = collision_rate*picoseconds # add this because SWIG 
        CustomIntegrator.__init__(self, dt)
        # GaMD boost parameters
        self.addGlobalVariable("k", k)
        self.addGlobalVariable("E", E)
        self.addPerDofVariable("oldx", 0)
        self.addGlobalVariable("deltaV", 0)
        self.addGlobalVariable("V0", 0)
        self.addPerDofVariable("sigma", 0)
        self.addUpdateContextState();
        
        # Comment out old AMD method - verlet I believe
        # self.addComputePerDof("v", "v+dt*fprime/m; fprime=f*((1-modify) + modify*(alpha/(alpha+E-energy))^2); modify=step(E-energy)")
        # self.addComputePerDof("oldx", "x")
        # self.addComputePerDof("x", "x+dt*v")
        # self.addConstrainPositions()
        # self.addComputePerDof("v", "(x-oldx)/dt")

        # new variables for Langevin kernel
        self.addGlobalVariable('kT', kB * temperature)
        self.addComputePerDof("sigma", "sqrt(kT/m)")
        self.addGlobalVariable("a", np.exp(-1 * gamma)) #vscale?
        self.addGlobalVariable("b", np.sqrt(1 - np.exp(-2 * gamma))) # noise-scale?

        #before position restraints
        #self.addPerDofVariable("oldx", 0)

        # Compute steps for Langevin - attempt at VRORV splitting?
        # Original Langevin line in AMDIntegrator - replace this with GAMD
        # self.addComputePerDof("v", "v + (dt / 2) * fprime / m; fprime=f*((1-modify) + modify*(alpha/(alpha+E-energy))^2); modify=step(E-energy)")

        # The below dof_string lines are comments on how to modify fprime (thinking out loud basically)
        # dof_string = "modify=step(E-energy);" # this is defining the scaling
        # dof_string+= "v + (dt / 2) * fprime/m;" # standard v updating for VRORV (I think)
        # dof_string+="fprime=f*((1-modify) + modify*(1-(k*E-energy)));" # the right hand addition is the secret amd sauce

        # Put the above together into a single new -line for computing v
        self.addComputePerDof("v", "v + (dt/2) * fprime/m; fprime=f*((1-modify) + modify*(1-k*(E-energy))); modify=step(E-energy)")

        # now langevin like normal
        self.addComputePerDof("x", "x + (dt / 2)*v")
        self.addComputePerDof("oldx", "x")
        self.addConstrainPositions()
        self.addComputePerDof("v", "v + (x - oldx)/(dt / 2)")
        self.addComputePerDof("v", "(a * v) + (b * sigma * gaussian)")
        self.addComputePerDof("x", "x + (dt / 2)*v")
        self.addComputePerDof("oldx", "x")
        self.addConstrainPositions()
        self.addComputePerDof("v", "v + (x - oldx) / (dt / 2)")
        # Update v with the same as the first line above
        # comment out old line from AMDIntegrator attempt
        #self.addComputePerDof("v", "v + (dt / 2) * fprime / m; fprime=f*((1-modify) + modify*(alpha/(alpha+E-energy))^2); modify=step(E-energy)")
        self.addComputePerDof("v", "v + (dt/2) * fprime/m; fprime=f*((1-modify) + modify*(1-k*(E-energy))); modify=step(E-energy)")
        # compute the V and deltaV values
        self.addComputeGlobal("V0", "energy")
        #self.addComputeGlobal("deltaV","modify*(E-energy)^2/(alpha+E-energy); modify=step(E-energy)")
        self.addComputeGlobal("deltaV","modify*((k / 2 * (E-energy)^2)); modify=step(E-energy)")
        self.addConstrainVelocities()

    def getk(self):
        """Get the value of k for the integrator."""
        return self.getGlobalVariable(0)*kilojoules_per_mole

    def setk(self, k):
        """Set the value of k for the integrator."""
        self.setGlobalVariable(0, k)

    def getE(self):
        """Get the energy threshold E for the integrator."""
        return self.getGlobalVariable(1)*kilojoules_per_mole

    def setE(self, E):
        """Set the energy threshold E for the integrator."""
        self.setGlobalVariable(1, E)

    def getEffectiveEnergy(self, energy):
        """Given the actual potential energy of the system, return the value of the effective potential."""
        k = self.getk()
        E = self.getE()
        if not is_quantity(energy):
            energy = energy*kilojoules_per_mole # Assume kJ/mole
        if (energy > E):
            return energy*kilojoules_per_mole # Assume kJ/mole
        boost = (0.5 * k * (E-energy)*(E-energy)) / kilojoules_per_mole / kilojoules_per_mole
        return energy + boost #energy+(E-energy)*(E-energy)/(alpha+E-energy)

Before running a Gaussian AMD simulation, you first need to select a strength for the boost potential. This is usually done by running a short simulation, recording the potential energy, and computing some simple statistics of it. The following are the minimum, maximum, mean, and standard deviation of the energies observed during an earlier simulation.

In [273]:
#potential energy statistics:
Vmin = -118900.7367725638
Vmax = -100000.8288112064
Vavg = -115420.040535957
Vstd = 904.4602237453533

We will do the same for the maximum standard deviation of the boost potential. \
We will use the following formula: \
`(MOLAR_GAS_CONSTANT_R * sim_temp ).value_in_unit(kilojoule_per_mole) * 10`

In [274]:
sigma_0 = 2.4943387854459722

We will then define two constants $k_0$ and $k$ which are built upon all these above \
computed values. $k_0$ and $Vmax$ are what goes into the integrator. 

Note that the formula gives you an upper bound for $k_0$. If you wish to use a lower value to obtain 
finer-grained sampling between states, choose a lower value of $k_0$

TODO: Add proper latex equations\
`k_0 = min(1, sigma_0/Vstd * ((Vmax-Vmin)/(Vmax-Vavg)))`\
`k = k_0 * (1 / (Vmax - Vmin) )`

In [275]:
# define k_0 and k
k_0 = 0.0009512432921778509516
k = 2.8354183998193767e-05

And finally we can define the integrator:

In [276]:
# defintion of gamd_integrator
gamd_integrator = GAMDLangevinIntegrator(dt=0.002 * picosecond,
                                            k=k_0,
                                            E=Vmax,
                                            temperature=300*kelvin,
                                            collision_rate=1.0/picosecond)

# We have the integrator, now lets run the simulation

In [277]:
gamd_simulation =  Simulation(pdb.topology, system, gamd_integrator)

With GAMD, you want to track the boost potential applied at each frame.\
From this set of boosts you can reconstruct the effective FES explored.

You'll want to project the FES on some features you care about \
(We did so with the `dist_measurer` above). \
We can directly extract both the total energy (just for comparison sake), and the \
boost potential (which is what we really care about) from the integrator directly.

In [278]:
gamd_simulation.context.setPositions(pdb.positions)
for i in range(10):
    gamd_simulation.step(10)
    n_steps = str(gamd_simulation.context.getStepCount())
    deltaV_val = str(gamd_integrator.getGlobalVariable(2))
    v0 = str(gamd_integrator.getGlobalVariable(3))
    d1, d2 = dist_measurer.getCollectiveVariableValues(gamd_simulation.context)
    print("%s %s %s %s %s" % (n_steps, v0, deltaV_val, str(d1), str(d2)))

10 -101068.73186972504 542.4069833688806 0.4650728702545166 0.4211561977863312
20 -100625.98186972504 185.88071407656207 0.4602440893650055 0.4246651828289032
30 -100644.32561972504 196.94928399679773 0.45890623331069946 0.42319074273109436
40 -100625.29436972504 185.47210143282425 0.45469164848327637 0.42254123091697693
50 -100547.45061972504 142.11356072815153 0.45749568939208984 0.4185861051082611
60 -100500.85686972504 118.91875713550188 0.4628557562828064 0.4187903106212616
70 -100487.51311972504 112.65651177247855 0.46062958240509033 0.42376694083213806
80 -100486.01311972504 111.9631491453524 0.4494323134422302 0.41940203309059143
90 -100552.88811972504 144.9549618133298 0.43990200757980347 0.41844552755355835
100 -100564.45061972504 151.09051097196647 0.43350622057914734 0.406695693731308
