# Imports and settings

In [232]:
from openmm.app import *
from openmm import *
from openmm.unit import *

# Setup a simulation system + state + CVForces

In [233]:
# Load an already solvated PDB file and set up the system + state
pdb = PDBFile("../villin.pdb")
omm_forcefield = ForceField("amber/ff14SB.xml", "amber14/tip3p.xml")
system = omm_forcefield.createSystem(pdb.topology,
                                     nonbondedMethod=PME,
                                     nonbondedCutoff=10.0 * angstrom,
                                     constraints=HBonds,
                                     rigidWater=True,
                                     hydrogenMass=4.0 * amu)

## Quickly identify a couple of atoms that we care about and set `CVForce`

In [234]:
# Define three atom indices - these will be used to measure useful distances 
d1_atom1_ind = 83
d1_atom2_ind = 151
d2_atom2_ind = 254

Ultimately we want to track the value of these distances (lets call them D1 and D2), \
as a function of the free energy boost applied. \
To track D1 and D2, we could use a whole `Reporter` object to save the whole trajectory and measure it afterwards.\
A lightweight way of doing this without the whole trajectory is to use a `customCVForce` object set to 0 (i.e. no bias). This will compute the value of a CV at each step of the simulation.\
We then pass D1 and D2 as bondForces to the `customCVForce` object (using `r` to track their distance)

In [235]:
# define a distance measurer
dist_measurer = CustomCVForce("0")
# Create our two distances as separate Bond Forces and add them
# Note: If we added both D1 and D2 to the same BondForce, then doing 
#     dist_measurer.getCollectiveVariableValues will return only the first distance (D1)
# Hence we define them as separate BondForces
D1 = CustomBondForce("r")
D1.addBond(d1_atom2_ind, d1_atom1_ind)
D2 = CustomBondForce("r")
D2.addBond(d2_atom2_ind, d1_atom1_ind)

# Add each BondForce as CVs into the dist_measurer
dist_measurer.addCollectiveVariable("D1", D1)
dist_measurer.addCollectiveVariable("D2", D2)
system.addForce(dist_measurer)

5

# Time to setup Gaussian Accelerated MD 

For Langevin-GAMD, we're effectively adding a boost potential to the simulation. 
This boosted potential botential takes the form of a Gaussian \
TODO: Add GAMD boost equation - the one below is for AMD\
$V'(r) = V_{0}(r) + \frac{(E-V(r))^2}{(\alpha+E-V_0(r))}$ 
where $V_0(r)$ is the initial potential energy\
and $\alpha$ and E are hyper parameters defining the potential.\

OpenMM gives us the tools to do this oursleves. \
Let's first define a `customIntegrator` to run this:

In [236]:
kB = BOLTZMANN_CONSTANT_kB * AVOGADRO_CONSTANT_NA

'''
Useful links in the writing of this integrator: 
Writing custom integrators: https://github.com/choderalab/openmm-tutorials/blob/master/02%20-%20Integrators%20and%20sampling.ipynb
AMD integrator: https://github.com/openmm/openmm/blob/master/wrappers/python/openmm/amd.py
GAMD from Miao lab: https://github.com/MiaoLab20/gamd-openmm
'''

import numpy as np
class GAMDLangevinIntegrator(CustomIntegrator):
    """GAMDLangevinIntegrator implements the aMD integration algorithm.
    The system is integrated based on a modified potential.  Whenever the energy V(r) is less than a
    cutoff value E, the following effective potential is used:
    V*(r) = V(r) + ((k/2)*(E-V(r))**2)
    """

    def __init__(self, dt, E, k, temperature=300*kelvin, collision_rate=1.0/picosecond):
        """Create an GAMDLangevinIntegrator.
        Parameters
        ----------
        dt : time
            The integration time step to use
        k : energy
            The k parameter to use
        E : energy
            The energy cutoff to use
        temperature : temperature
            temperature of the system
        collision rate : collision rate
            Collision for the thermostat to update on
        """
        gamma = collision_rate*picoseconds # add this because SWIG 
        CustomIntegrator.__init__(self, dt)
        # GaMD boost parameters
        self.addGlobalVariable("k", k)
        self.addGlobalVariable("E", E)
        self.addPerDofVariable("oldx", 0)
        self.addGlobalVariable("deltaV", 0)
        self.addGlobalVariable("V0", 0)
        self.addPerDofVariable("sigma", 0)
        self.addUpdateContextState();
        
        # Comment out old AMD method - verlet I believe
        # self.addComputePerDof("v", "v+dt*fprime/m; fprime=f*((1-modify) + modify*(alpha/(alpha+E-energy))^2); modify=step(E-energy)")
        # self.addComputePerDof("oldx", "x")
        # self.addComputePerDof("x", "x+dt*v")
        # self.addConstrainPositions()
        # self.addComputePerDof("v", "(x-oldx)/dt")

        # new variables for Langevin kernel
        self.addGlobalVariable('kT', kB * temperature)
        self.addComputePerDof("sigma", "sqrt(kT/m)")
        self.addGlobalVariable("a", np.exp(-1 * gamma)) #vscale?
        self.addGlobalVariable("b", np.sqrt(1 - np.exp(-2 * gamma))) # noise-scale?

        #before position restraints
        #self.addPerDofVariable("oldx", 0)

        # Compute steps for Langevin - attempt at VRORV splitting?
        # Original Langevin line in AMDIntegrator - replace this with GAMD
        # self.addComputePerDof("v", "v + (dt / 2) * fprime / m; fprime=f*((1-modify) + modify*(alpha/(alpha+E-energy))^2); modify=step(E-energy)")

        # The below dof_string lines are comments on how to modify fprime (thinking out loud basically)
        # dof_string = "modify=step(E-energy);" # this is defining the scaling
        # dof_string+= "v + (dt / 2) * fprime/m;" # standard v updating for VRORV (I think)
        # dof_string+="fprime=f*((1-modify) + modify*(1-(k*E-energy)));" # the right hand addition is the secret amd sauce

        # Put the above together into a single new -line for computing v
        self.addComputePerDof("v", "v + (dt/2) * fprime/m; fprime=f*((1-modify) + modify*(1-k*(E-energy))); modify=step(E-energy)")

        # now langevin like normal
        self.addComputePerDof("x", "x + (dt / 2)*v")
        self.addComputePerDof("oldx", "x")
        self.addConstrainPositions()
        self.addComputePerDof("v", "v + (x - oldx)/(dt / 2)")
        self.addComputePerDof("v", "(a * v) + (b * sigma * gaussian)")
        self.addComputePerDof("x", "x + (dt / 2)*v")
        self.addComputePerDof("oldx", "x")
        self.addConstrainPositions()
        self.addComputePerDof("v", "v + (x - oldx) / (dt / 2)")
        # Update v with the same as the first line above
        # comment out old line from AMDIntegrator attempt
        #self.addComputePerDof("v", "v + (dt / 2) * fprime / m; fprime=f*((1-modify) + modify*(alpha/(alpha+E-energy))^2); modify=step(E-energy)")
        self.addComputePerDof("v", "v + (dt/2) * fprime/m; fprime=f*((1-modify) + modify*(1-k*(E-energy))); modify=step(E-energy)")
        # compute the V and deltaV values
        self.addComputeGlobal("V0", "energy")
        #self.addComputeGlobal("deltaV","modify*(E-energy)^2/(alpha+E-energy); modify=step(E-energy)")
        self.addComputeGlobal("deltaV","modify*((k / 2 * (E-energy)^2)); modify=step(E-energy)")
        self.addConstrainVelocities()

    def getk(self):
        """Get the value of k for the integrator."""
        return self.getGlobalVariable(0)*kilojoules_per_mole

    def setk(self, k):
        """Set the value of k for the integrator."""
        self.setGlobalVariable(0, k)

    def getE(self):
        """Get the energy threshold E for the integrator."""
        return self.getGlobalVariable(1)*kilojoules_per_mole

    def setE(self, E):
        """Set the energy threshold E for the integrator."""
        self.setGlobalVariable(1, E)

    def getEffectiveEnergy(self, energy):
        """Given the actual potential energy of the system, return the value of the effective potential."""
        k = self.getk()
        E = self.getE()
        if not is_quantity(energy):
            energy = energy*kilojoules_per_mole # Assume kJ/mole
        if (energy > E):
            return energy*kilojoules_per_mole # Assume kJ/mole
        boost = (0.5 * k * (E-energy)*(E-energy)) / kilojoules_per_mole / kilojoules_per_mole
        return energy + boost #energy+(E-energy)*(E-energy)/(alpha+E-energy)

Before running a Gaussian AMD simulation, you first need to select a strength for the boost potential. This is usually done by running a short simulation, recording the potential energy, and computing some simple statistics of it. The following are the minimum, maximum, mean, and standard deviation of the energies observed during an earlier simulation.

In [237]:
#potential energy statistics:
Vmin = -118900.7367725638
Vmax = -100000.8288112064
Vavg = -115420.040535957
Vstd = 904.4602237453533

We will do the same for the maximum standard deviation of the boost potential. \
We will use the following formula: \
`(MOLAR_GAS_CONSTANT_R * sim_temp ).value_in_unit(kilojoule_per_mole) * 10`

As above, we drew upon a sample trajectory, took the last frame, and computed the value:

In [238]:
sigma_0 = 2.4943387854459722

We will then define two constants $k_0$ and $k$ which are built upon all these above \
computed values. $k_0$ and $Vmax$ are what goes into the integrator and boost. 

Note that the formula gives you an upper value of $k_0$. If you wish to use a lower value to obtain 
finer-grained sampling between states, choose a lower value of $k_0$

TODO: Add proper latex equations\
`k_0 = min(1, sigma_0/Vstd * ((Vmax-Vmin)/(Vmax-Vavg)))`\
`k = k_0 * (1 / (Vmax - Vmin) )`

In [239]:
# define k_0 and k
k_0 = 0.0009512432921778509516
k = 2.8354183998193767e-05

In [240]:
# defintion of gamd_integrator
gamd_integrator = GAMDLangevinIntegrator(dt=0.002 * picosecond,
                                            k=k_0,
                                            E=Vmax,
                                            temperature=300*kelvin,
                                            collision_rate=1.0/picosecond)

# We have the integrator, now lets run the simulation

In [241]:
gamd_simulation =  Simulation(pdb.topology, system, gamd_integrator)

With GAMD, you want to track the boost potential applied at each frame.\
From this set of boosts you can reconstruct the effective FES explored.

You'll want to project the FES on some features you care about \
(We did so with the `dist_measurer` above)

In [242]:
gamd_simulation.context.setPositions(pdb.positions)

In [243]:
for i in range(10):
    gamd_simulation.step(10)
    n_steps = str(gamd_simulation.context.getStepCount())
    deltaV_val = str(gamd_integrator.getGlobalVariable(2))
    v0 = str(gamd_integrator.getGlobalVariable(3))
    d1, d2 = dist_measurer.getCollectiveVariableValues(gamd_simulation.context)
    print("%s %s %s %s %s" % (n_steps, v0, deltaV_val, str(d1), str(d2)))

10 -100771.98186972504 282.841272495641 0.4661301076412201 0.43017590045928955
20 -100615.32561972504 179.59774314456334 0.45594143867492676 0.4320942461490631
30 -100592.20061972504 166.33469501063638 0.4590217173099518 0.43697890639305115
40 -100538.45061972504 137.4723531230814 0.4406464397907257 0.431123286485672
50 -100587.13811972504 163.49903368890494 0.43526703119277954 0.4224798083305359
60 -100540.57561972504 138.56124527262602 0.4393979609012604 0.4221265912055969
70 -100561.76311972504 149.65306603729772 0.4421063959598541 0.4138357639312744
80 -100478.51311972504 108.52844047083259 0.43589234352111816 0.4157506823539734
90 -100536.91936972504 136.69037308156953 0.44068843126296997 0.4205670952796936
100 -100469.57561972504 104.50528628403931 0.44310981035232544 0.43028485774993896
