In [None]:
import os
from sys import stdout
from openmm import *
from openmm.app import *
from openmm.unit import *

import nglview as ngl
import pytraj as pt
import numpy as np
import matplotlib.pyplot as plt

# Compute Ramadachan plot using OPES-eABF

In this brief tutorial, we calculate the Ramachandran [$\Phi$,$\Psi$] plot of alanine dipeptide in vacuum using the OPES-eABF method.


Alanine dipeptide is a popular test system for enhanced sampling algorithms because it is a minimal example of sampling challenges that are posed by many biological systems. The slow motions of the molecule are largely governed by the $\psi$ (backbone N-C-C-N) and $\phi$ (backbone C-N-C-C) dihedral. Below, atoms involved in this collective variable (CV) are shown in ball and stick representation, while other atoms are transparent.  

In [None]:
traj = pt.load("../data/alanine-dipeptide.pdb", top='../data/alanine-dipeptide.prmtop')

view = ngl.show_pytraj(traj)
view.clear_representations()
view.add_ball_and_stick('@6,8,14,16', opacity=1.0) # CV atoms of phi torsion
view.add_ball_and_stick('@4,6,8,14', opacity=1.0)  # CV atoms of psi torsion
#view.add_ball_and_stick('@1,4,6,8', opacity=1.0)   # CV atoms of theta torsion

view.add_licorice(opacity=0.5)
view

# Importance sampling of $\phi/\psi$ with OPES-eABF

Below, the OPES-eABF sampling algorithm is applied to enhance sampling in the $\phi/\psi$ plane. 

In [None]:
from adaptive_sampling.sampling_tools import *
from adaptive_sampling.interface.interface_openmm import AdaptiveSamplingOpenMM

# ------------------------------------------------------------------------------------
# define collective variables
cv_atoms_psi    = [6, 8, 14, 16]  # backbone N-C-C-N torsion
cv_atoms_phi    = [4, 6, 8, 14]   # backbone C-N-C-C torsion
minimum         = -180.0          # minimum of the CV
maximum         = 180.0           # maximum of the CV
bin_width       = 5.0             # bin with along the CV
periodicity     = [               # define periodicity of CVs
    [-np.pi, np.pi],
    [-np.pi, np.pi],
]

collective_var = [
    ["torsion", cv_atoms_psi, minimum, maximum, bin_width],
    ["torsion", cv_atoms_phi, minimum, maximum, bin_width],
]

# ------------------------------------------------------------------------------------
# Setup OpenMM
prmtop = AmberPrmtopFile(f"../data/alanine-dipeptide.prmtop")
crd = AmberInpcrdFile(f"../data/alanine-dipeptide.crd")
system = prmtop.createSystem(
    nonbondedMethod=NoCutoff,
    constraints=HBonds,
)

# remove center of mass motion
#cmm_force = CMMotionRemover()
#cmm_force.setFrequency(10)
#system.addForce(cmm_force)

# Initialize the `AdaptiveSamplingOpenMM` interface to couple the OpenMM simulaiton to an bias potential
# the Openmm `simulation` object is set up internally, but can still be modified by calling `the_md.simulation` or `the_md.integrator`
the_md = AdaptiveSamplingOpenMM(
    crd.positions,
    prmtop.topology,
    system,
    dt=2.0,                                       # timestep in fs
    equil_temp=300.0,                             # temperature of simulation
    langevin_damping=1.0,                         # langevin damping in 1/ps
    cv_atoms=np.unique(cv_atoms_phi+cv_atoms_psi) # specifying CV atoms significantly speeds up simulation of large systems, as the bias force will only be calculated for those
)
the_md.integrator.setConstraintTolerance(0.00001)

# Append OpenMM reporters to simulation for output 
the_md.simulation.reporters.append(DCDReporter('alanine-dipeptide.dcd', 100))
the_md.simulation.reporters.append(StateDataReporter(
    stdout, 
    1000,
    step=True,
    time=True,
    potentialEnergy=True,
    kineticEnergy=True,
    totalEnergy=True,
    temperature=True,
    speed=False,
    separator='\t')
)

In [None]:
# --------------------------------------------------------------------------------------
# Setup the sampling algorithm
eabf_ext_sigma    = 5.0           # thermal width of coupling between CV and extended variable in Degree
eabf_ext_mass     = 100.0         # mass of extended variable 
abf_nfull         = 100           # number of samples per bin when abf force is fully applied

opes_kernel_std   = None          # kernel standard deviation
opes_frequency    = 500           # frequency of kernel creation in MD steps
opes_barrier      = 50.0          # Barrier parameter in kJ/mol 
opes_adaptive     = True          # Adaptive kernels
opes_gamma        = None          # Bias factor for Well-Tempered distribution, if None, calculated from barrier factor

the_bias = OPESeABF(
    the_md, 
    collective_var,               # collective variable
    # eABF parameters 
    ext_sigma=eabf_ext_sigma,
    ext_mass=eabf_ext_mass,
    nfull=abf_nfull,       
    # OPES parameters
    kernel_std=opes_kernel_std,
    update_freq=opes_frequency,
    bias_factor=opes_gamma,
    adaptive_std=opes_adaptive,
    energy_barr=opes_barrier,
    # general parameters
    output_freq=1000,             # frequency of writing outputs
    f_conf=0.0,                   # confinement force of CV at boundaries
    equil_temp=300.0,             # equilibrium temperature of simulation
    periodicity=periodicity,      # periodicity of CVs
    verbose=True,                 # print verbose output
)
the_md.set_sampling_algorithm(the_bias) # to take affect the sampling algorithm has to be set in the MD interface

In [None]:
# Warning: this may take a while!
if True:
    os.system("rm CV_traj.dat eabf.out")
the_md.run(nsteps=50000) # 500000 * 2 fs = 1 ns

# Analysis of Results

### Visualize the trajectory with NGlView

In [None]:
path = '.' 
traj = pt.iterload(f"{path}/alanine-dipeptide.dcd", top="../data/alanine-dipeptide.pdb")

view = ngl.show_pytraj(traj)
view.clear_representations()
view.add_ball_and_stick('@6,8,14,16', opacity=1.0) # CV atoms of phi torsion
view.add_ball_and_stick('@4,6,8,14', opacity=1.0)  # CV atoms of psi torsion
#view.add_ball_and_stick('@1,4,6,8', opacity=1.0)   # CV atoms of theta torsion

view.add_licorice(opacity=0.5)
view

# Trajectory of CVs

In [None]:
cv_traj = np.loadtxt(f'{path}/CV_traj.dat', skiprows=1)
cv_phi = cv_traj[:,1] # Phi trajectory 
cv_psi = cv_traj[:,2] # Psi trajectory
la_phi = cv_traj[:,3] # extended system Phi trajectory
la_psi = cv_traj[:,4] # extended system Psi trajectory

In [None]:
fig, axs = plt.subplots(1, 1, sharey=False, figsize=(8,6))
axs.scatter(cv_traj[:,0]/1000, cv_phi, s=1)
#axs.scatter(cv_traj[:,0]/1000, la_phi, s=1)

axs.scatter(cv_traj[:,0]/1000, cv_psi, s=1)
#axs.scatter(cv_traj[:,0]/1000, la_psi, s=1)

axs.set_yticks([-180,0,180])
axs.set_xlabel('time / ps', fontsize=30)
axs.set_ylabel('CV / Degree', fontsize=30)
axs.tick_params(axis='y',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.tick_params(axis='x',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.spines['bottom'].set_linewidth(3)
axs.spines['top'].set_linewidth(3)
axs.spines['left'].set_linewidth(3)
axs.spines['right'].set_linewidth(3)
fig.tight_layout()

# Use the MBAR estimator to compute ensemble properties

Now we will use the MBAR estimator to calculate the unbiased weights of simulation frames. From those, we compute periodic PMFs in 1D and 2D.

WARNING: For long simulations, this can become expensive, and it is recommended to perform the computation on an HPC cluster.

In [None]:
from adaptive_sampling import units
from adaptive_sampling.processing_tools import mbar
import os

ext_sigma = np.asarray([5.0,5.0])

# create grid for PMF
minimum   = -180.0 
maximum   = 180.0
bin_width = 5.0
grid_1d = np.arange(minimum, maximum, bin_width)
xx, yy = np.meshgrid(grid_1d, grid_1d)
grid = np.vstack([xx.flatten(),yy.flatten()])  

# trajectories of CVs and extended system
cv = np.vstack([cv_phi,cv_psi])
la = np.vstack([cv_phi,cv_psi])

if not os.path.isfile(f'{path}/results.npz'):

    # run MBAR to obtain unbiased weights of frames
    traj_list, indices, meta_f = mbar.get_windows(
        grid.T,
        cv.T,
        la.T,
        ext_sigma,
        dx=np.asarray([bin_width,bin_width]),
        equil_temp=300.0,
        progress_bar=True,
    )
    exp_U, frames_per_traj = mbar.build_boltzmann(
        traj_list,
        meta_f,
        equil_temp=300.0,
        periodicity=[-180.,180.],
        progress_bar=True,
    )
    weights = mbar.run_mbar(
        exp_U,
        frames_per_traj,
        max_iter=10000,
        conv=1.0e-4,
        conv_errvec=1.0,
        outfreq=10,
        device='cpu',
    )
    np.savez(f"{path}/results.npz", W=weights, idx=indices, pmf=pmf)
else:
    data = np.load(f'{path}/results.npz')
    weights = data['weigths']
    indices = data['idx']
    grid = data['grid']
    pmf = data['pmf']

# Compute PMFs from frame weights

In [None]:
# 1D PMFs along phi and psi 
pmf_psi, rho_psi = mbar.pmf_from_weights(grid_1d, cv_psi[indices], weights, equil_temp=300.0)
pmf_phi, rho_phi = mbar.pmf_from_weights(grid_1d, cv_phi[indices], weights, equil_temp=300.0)
pmf_psi -= pmf_psi.min()
pmf_phi -= pmf_phi.min()

if False:
    # 2D (phi,psi) PMF (Ramachandran plot)
    pmf_2d, rho = mbar.pmf_from_weights(
        grid.T,
        cv.T[indices],
        weights,
        dx=np.asarray([bin_width,bin_width]),
        equil_temp=300.0,
    )
    pmf_2d  -= pmf_2d.min()

In [None]:
fig, axs = plt.subplots(1, 1, sharey=False, figsize=(8,6))

#plt.plot(np.degrees(the_bias.grid[0]), the_bias.pmf[0], linewidth=5)
plt.plot(grid_1d, pmf_psi*units.kJ_to_kcal, linewidth=5, label=r"$\psi$")
plt.plot(grid_1d, pmf_phi*units.kJ_to_kcal, linewidth=5, label=r"$\phi$")

axs.set_xlabel(r'CV', fontsize=30)
axs.set_ylabel(r'PMF / kcal mol$^{-1}$', fontsize=30)
axs.set_xticks([-180,0,180])
axs.tick_params(axis='y',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.tick_params(axis='x',length=6,width=3,labelsize=25, pad=10, direction='in')
axs.spines['bottom'].set_linewidth(3)
axs.spines['top'].set_linewidth(3)
axs.spines['left'].set_linewidth(3)
axs.spines['right'].set_linewidth(3)
axs.legend(fontsize=30)
fig.tight_layout()

In [None]:
fig, axs = plt.subplots(1, 1, sharey=False, figsize=(8,6))

cs = axs.contourf(xx, yy, pmf.reshape(xx.shape), levels=np.linspace(0,20,21), zorder=-1)
CS = axs.contour(xx, yy, pmf.reshape(xx.shape), levels=np.linspace(0,20,11), colors='black', zorder=100)
axs.clabel(CS, CS.levels, inline='true', fontsize=15, fmt="%2d")

axs.set_xlabel(r'$\phi$', fontsize=25)
axs.set_ylabel(r'$\psi$', fontsize=25)
axs.set_xlim([-180,175])
axs.set_ylim([-180,175])
axs.set_xticks([-150,0,150])
axs.set_yticks([-150,0,150])
axs.tick_params(axis='y',length=6,width=3,labelsize=20, pad=10, direction='in')
axs.tick_params(axis='x',length=6,width=3,labelsize=20, pad=10, direction='in')
axs.spines['bottom'].set_linewidth(3)
axs.spines['top'].set_linewidth(3)
axs.spines['left'].set_linewidth(3)
axs.spines['right'].set_linewidth(3)
fig.tight_layout()
fig.savefig('PMF_2d_Ramachandran.png')