In [None]:
import os
from sys import stdout
from openmm import *
from openmm.app import *
from openmm.unit import *

import nglview as ngl
import pytraj as pt
import numpy as np
import matplotlib.pyplot as plt

In [None]:
%%bash
export PYTHOPATH='/home/andreas/adaptive_sampling'
echo $PYTHOPATH

# Enhanced sampling with OpenMM and Adaptive Sampling

In this short tutorial we will show how to use the adaptive sampling package together with OpenMM.

We will run test simulations on the mimimal "protein" alanine-dipeptide. Data files for this notebook were taken from the OpenMM test systems.

*    alanine-dipeptide.pdb (All-atom PDB file.)
*    alanine-dipeptide.crd (AMBER input coordinates)
*    alanine-dipeptide.prmtop (AMBER parameters and topology)

A thorough discussion of the alanine dipeptide system can be found [here](https://doi.org/10.1073/pnas.100127697).

For this tutorial we will use the following programm packages:
*    OpenMM for molecular simulation: https://openmm.org/
*    PyTraj for trajectory analysis: https://amber-md.github.io/pytraj/latest/index.html
*    Nglview for molecular visualisation: https://github.com/nglviewer/nglview

We begin with a minimal example of an unbiased OpenMM simulation of alanine dipeptide in vacuum.

In [None]:
def run(nsteps: int=1000, T: float=300.0, dcd_freq: int=10, out_freq: int=10):
    
    # load system topology and coordinates from AMBER format
    prmtop = AmberPrmtopFile(f"../data/alanine-dipeptide.prmtop")
    crd = AmberInpcrdFile(f"../data/alanine-dipeptide.crd")

    # create the system and integrator 
    system = prmtop.createSystem(
        nonbondedMethod=NoCutoff,
    )
    platform = Platform.getPlatformByName('CPU')
    integrator = LangevinIntegrator(T * kelvin, 1.0 / picoseconds, 2.0 * femtosecond)

    # setup an simulation and run MD for nsteps
    simulation = Simulation(prmtop.topology, system, integrator, platform)
    simulation.context.setPositions(crd.positions)
    simulation.context.setVelocitiesToTemperature(T)
    simulation.reporters.append(DCDReporter('alanine-dipeptide-test.dcd', dcd_freq))
    simulation.reporters.append(StateDataReporter(
        stdout, 
        out_freq,
        step=True,
        time=True,
        potentialEnergy=True,
        kineticEnergy=True,
        totalEnergy=True,
        temperature=True,
        separator='\t')
    )
    simulation.step(nsteps)

In [None]:
run(nsteps=100)

We can visualize the trajectory using pytraj and nglview. 

Alanine dipeptide is a popular testsystem for enhanced sampling algorithms because it is a minimal example for sampling challanges that are posed by many biological systems. The slow motions of the molecule are largely governed by the $\psi$ (backbone N-C-C-N) and $\phi$ (backbone C-N-C-C) diherals. Below atoms involved in this collective variables (CVs) are shown in ball and stick representation, while other atoms are transperent.  

In [None]:
traj = pt.iterload("alanine-dipeptide-test.dcd", top="../data/alanine-dipeptide.pdb")

view = ngl.show_pytraj(traj)
view.clear_representations()
view.add_ball_and_stick('@6,8,14,16', opacity=1.0) # CV atoms of phi torsion
view.add_ball_and_stick('@4,6,8,14', opacity=1.0)  # CV atoms of psi torsion
#view.add_ball_and_stick('@1,4,6,8', opacity=1.0)   # CV atoms of theta torsion

view.add_licorice(opacity=0.5)
view

# Enhanced sampling of $\psi$ with WTM-eABF

In the following example we will apply the WTM-eABF sampling algorithm to enhance sampling along the $\phi$ angle. Note, that this is a suboptimal CV and there is still an orthogonal free energy barrier along the $\psi$ torsion. 

In [None]:
from adaptive_sampling.sampling_tools import *
from adaptive_sampling.interface.interface_openmm import AdaptiveSamplingOpenMM

# ------------------------------------------------------------------------------------
# define collective variables
cv_atoms_psi    = [6, 8, 14, 16]  # backbone N-C-C-N torsion
cv_atoms_phi    = [4, 6, 8, 14]   # backbone C-N-C-C torsion
minimum         = -180.0          # minimum of the CV
maximum         = 180.0           # maximum of the CV
bin_width       = 5.0             # bin with along the CV

collective_var_phi = [["torsion", cv_atoms_phi, minimum, maximum, bin_width]]
#collective_var_psi = [["torsion", cv_atoms_psi, minimum, maximum, bin_width]]

periodicity = [[-np.pi, np.pi]]

# ------------------------------------------------------------------------------------
# Setup OpenMM
prmtop = AmberPrmtopFile(f"../data/alanine-dipeptide.prmtop")
crd = AmberInpcrdFile(f"../data/alanine-dipeptide.crd")
system = prmtop.createSystem(
    nonbondedMethod=NoCutoff,
    constraints=HBonds,
)

# remove center of mass motion
#cmm_force = CMMotionRemover()
#cmm_force.setFrequency(0)
#system.addForce(cmm_force)

# Initialize the `AdaptiveSamplingOpenMM` interface to couple the OpenMM simulaiton to an bias potential
# the Openmm `simulation` object is set up internally, but can still be modified by calling `the_md.simulation` or `the_md.integrator`
the_md = AdaptiveSamplingOpenMM(
    crd.positions,
    prmtop.topology,
    system,
    dt=2.0,                                       # timestep in fs
    equil_temp=300.0,                             # temperature of simulation
    langevin_damping=1.0,                         # langevin damping in 1/ps
    cv_atoms=np.unique(cv_atoms_phi+cv_atoms_psi) # specifying CV atoms significantly speeds up simulation of large systems
)                                                 # as the bias force will only be set for those
the_md.integrator.setConstraintTolerance(0.00001)

# Append OpenMM reporters to simulation for output 
the_md.simulation.reporters.append(DCDReporter('alanine-dipeptide.dcd', 1000))
the_md.simulation.reporters.append(StateDataReporter(
    stdout, 
    10000,
    step=True,    
    time=True,
    potentialEnergy=True,
    kineticEnergy=True,
    totalEnergy=True,
    temperature=True,
    speed=False,
    separator='\t')
)

# --------------------------------------------------------------------------------------
# Setup the sampling algorithm
eabf_ext_sigma    = 5.0     # thermal width of coupling between CV and extended variable 
eabf_ext_mass     = 100.0   # mass of extended variable 
abf_nfull         = 500     # number of samples per bin when abf force is fully applied
mtd_hill_height   = 0.1     # MtD hill height    
mtd_hill_std      = 15.0    # MtD hill width
mtd_well_tempered = 1000.0  # Well-tempered temperature
mtd_frequency     = 100     # frequency of hill creation

the_bias = WTMeABF(
    eabf_ext_sigma, 
    eabf_ext_mass, 
    mtd_hill_height,
    mtd_hill_std,
    the_md, 
    collective_var_phi,     # collective variable
    output_freq=1000,       # frequency of writing outputs
    f_conf=0.0,             # confinement force of CV at boundaries
    nfull=abf_nfull,        
    equil_temp=300.0,       # equilibrium temperature of simulation
    well_tempered_temp=mtd_well_tempered,
    hill_drop_freq=mtd_frequency,
    force_from_grid=True,   # accumulate metadynamics force and bias on grid
    periodicity=periodicity,
    kinetics=True,          # calculate importent metrics to get accurate kinetics
    verbose=False,          # print verbose output
)
the_md.set_sampling_algorithm(the_bias) # to take affect the sampling algorithm has to be set in the MD interface

In [None]:
# Warning: this may take a while!
if True:
    os.system("rm CV_traj.dat wtmeabf.out")
    the_md.run(nsteps=500000) # 500000 * 2 fs = 1 ns

# Analysis of Results

### Visualising the trajectory

In [None]:
traj = pt.iterload("alanine-dipeptide.dcd", top="../data/alanine-dipeptide.pdb")

view = ngl.show_pytraj(traj)
view.clear_representations()
view.add_ball_and_stick('@6,8,14,16', opacity=1.0) # CV atoms of phi torsion
view.add_ball_and_stick('@4,6,8,14', opacity=1.0)  # CV atoms of psi torsion
#view.add_ball_and_stick('@1,4,6,8', opacity=1.0)   # CV atoms of theta torsion

view.add_licorice(opacity=0.5)
view

In [None]:
cv_traj = np.loadtxt('CV_traj.dat', skiprows=1, usecols=[0,1,2,3,4])

In [None]:
fig, axs = plt.subplots(1, 1, sharey=False, figsize=(8,6))
axs.scatter(cv_traj[:,0]/1000, cv_traj[:,1], s=1)
#axs.set_yticks([-180,0,180])
axs.set_xlabel('time / ps', fontsize=30)
axs.set_ylabel('CV / Degree', fontsize=30)
axs.tick_params(axis='y',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.tick_params(axis='x',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.spines['bottom'].set_linewidth(3)
axs.spines['top'].set_linewidth(3)
axs.spines['left'].set_linewidth(3)
axs.spines['right'].set_linewidth(3)
fig.tight_layout()

### Use the MBAR estimator to compute ensamble properties

Now we will use the MBAR estimator to calculate the unbiased weights of simulation frames. From those we compute an periodic PMF along $\psi$.

In [None]:
from adaptive_sampling.processing_tools import mbar
ext_sigma = 5.0    # thermal width of coupling between CV and extended variable 

# grid for free energy profile can be different than during sampling
minimum   = -180.0    
maximum   = 180.0    
bin_width = 5.0  
grid = np.arange(minimum, maximum, bin_width)

cv = cv_traj[:,1]  # trajectory of collective variable
la = cv_traj[:,2]  # trajectory of extended system

# run MBAR and compute free energy profile and probability density from statistical weights
traj_list, indices, meta_f = mbar.get_windows(grid, cv, la, ext_sigma, equil_temp=300.0)

exp_U, frames_per_traj = mbar.build_boltzmann(
    traj_list, 
    meta_f, 
    equil_temp=300.0,
    periodicity=[-180.0,180.0],
)

weights = mbar.run_mbar(
    exp_U,
    frames_per_traj,
    max_iter=10000,
    conv=1.0e-4,
    conv_errvec=1.0,
    outfreq=100,
    device='cpu',
)

pmf_mbar, rho_mbar = mbar.pmf_from_weights(grid, cv[indices], weights, equil_temp=300.0)

In [None]:
fig, axs = plt.subplots(1, 1, sharey=False, figsize=(8,6))

#plt.plot(np.degrees(the_bias.grid[0]), the_bias.pmf[0], linewidth=5)
plt.plot(grid, pmf_mbar-pmf_mbar.min(), linewidth=5)

axs.set_xlabel(r'$\psi$', fontsize=30)
axs.set_ylabel(r'A($\psi$)', fontsize=30)
axs.set_xticks([-180,0,180])
axs.tick_params(axis='y',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.tick_params(axis='x',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.spines['bottom'].set_linewidth(3)
axs.spines['top'].set_linewidth(3)
axs.spines['left'].set_linewidth(3)
axs.spines['right'].set_linewidth(3)
fig.tight_layout()

### Sampling of the ($\phi$, $\psi$) plane (Ramadachan plot)

The above PMF is largely characterised by two distict minima, which are refered to as $C7_\mathrm{eq}$ and $C_\mathrm{ax}$. Below those are marked in the $(\phi,\psi)$ plane together with samples of the trajectory.

In [None]:
phi, psi = pt.multidihedral(traj, 'phi psi')

In [None]:
from matplotlib.patches import Rectangle
fig, axs = plt.subplots(1, 1, sharey=False, figsize=(8,6))

axs.scatter(phi, psi, alpha=0.5, s=20)

# markup minima
fontdict = {'family': 'serif', 'color':  'red', 'weight': 'normal', 'size': 30,}
axs.add_patch(Rectangle((-160,-5),120,180, edgecolor='red', facecolor='none', lw=4))
axs.add_patch(Rectangle((20,-175),70,210, edgecolor='red', facecolor='none', lw=4))
axs.text(-85,145,r'$C_{eq}$', fontdict=fontdict)
axs.text(45,5,r'$C_{ax}$', fontdict=fontdict)

# formatting
axs.set_xlabel(r'$\phi$', fontsize=fontdict['size'])
axs.set_ylabel(r'$\psi$', fontsize=fontdict['size'])
axs.set_xlim([-180,180])
axs.set_ylim([-180,180])
axs.set_xticks([-180,0,180])
axs.tick_params(axis='y',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.tick_params(axis='x',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.spines['bottom'].set_linewidth(3)
axs.spines['top'].set_linewidth(3)
axs.spines['left'].set_linewidth(3)
axs.spines['right'].set_linewidth(3)
fig.tight_layout()