# MEMM project: NaCL umbrella sampling

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pyemma
import mdshare

mpl.rcParams.update({'font.size': 14})

Now it is your turn. Below you will find a dataset of a Na-Cl-dimer in TiP3P water; the dataset includes Na-Cl-distance timeseries from NN biased simulations which incrementally pull the ions apart and 20 unbiased simulations started at a Na-Cl-distance of approximately 3.5 Angstrom. The umbrella sampling parameters and the kT value are also given.

All distances in the dataset are in Angstrom, energies in kcal/mol, and the trajectory timestep is 1 ps.

In [None]:
with np.load(mdshare.fetch('pyemma-tutorial-us-nacl.npz', working_directory='data')) as fh:
    us_trajs = [fh['us_traj_%03d' % i] for i in range(60)]
    us_centers = fh['us_centers'].tolist()
    us_force_constants = fh['us_force_constants'].tolist()
    md_trajs = [fh['md_traj_%03d' % i] for i in range(20)]
    kT = float(fh['kT'])

### Step 1

You can experiment with visualizations of the raw data or jump straight into the discretization.

In [None]:
plt.plot(us_centers, us_force_constants, '--o')
plt.xlabel('reaction coordinate')
plt.ylabel('force constant')

In [None]:
from scipy.stats import gaussian_kde

fig, axes = plt.subplots(2, figsize=(14, 8))

for ax, dataset in zip(axes, (us_trajs, md_trajs)):
    for traj in dataset:
        kde = gaussian_kde(traj[:, 0])
        x = np.linspace(np.min(traj), np.max(traj), 100)
        y = kde(x)
        ax.plot(x, y)
        ax.fill_between(x, 0, y, alpha=0.1)
axes[0].set_ylabel('biased')
axes[1].set_ylabel('unbiased')
fig.tight_layout()

In [None]:
data = np.concatenate([traj[:, 0] for traj in md_trajs])
kde_obs = gaussian_kde(data)
x_obs = np.linspace(np.min(data), np.max(data), 1000)
plt.plot(x_obs, kde_obs(x_obs))

In [None]:
for traj in md_trajs:
    plt.plot(traj[:20, 0])

In [None]:
xmin = np.min([np.min(traj) for traj in us_trajs + md_trajs])
xmax = np.max([np.max(traj) for traj in us_trajs + md_trajs])
x = np.linspace(xmin, xmax, 101)
centers = 0.5 * (x[:-1] + x[1:])

us_dtrajs = pyemma.coordinates.assign_to_centers(us_trajs, centers=centers.reshape(-1, 1))
md_dtrajs = pyemma.coordinates.assign_to_centers(md_trajs, centers=centers.reshape(-1, 1))

### Step 2
Try to apply WHAM to get a quick estimate of the stationary properties of the system. Try only the biased data or use both biased and unbiased.

In [None]:
wham = pyemma.thermo.estimate_umbrella_sampling(
    us_trajs, us_dtrajs, us_centers, us_force_constants,
    md_trajs=None, md_dtrajs=None, kT=kT,
    maxiter=100000, maxerr=1e-15, save_convergence_info=50,
    estimator='wham', lag=1, dt_traj='1 step',
    init=None, init_maxiter=10000, init_maxerr=1e-08)

pyemma.plots.plot_convergence_info(wham);

In [None]:
y_obs = kde_obs(centers[wham.active_set])
y_obs /= y_obs.sum()

plt.plot(centers[wham.active_set], y_obs, '--s', label='unbiased observation')
plt.plot(centers[wham.active_set], wham.pi, '--o', label='wham (biased)')
plt.xlabel('reaction coordinate')
plt.ylabel('stationary distribution')
plt.legend();

### Step 3
Now apply DTRAM and estimate the kinetic properties of the system. Remember: kinetics require unbiased data!

In [None]:
dtram_list = pyemma.thermo.estimate_umbrella_sampling(
    us_trajs, us_dtrajs, us_centers, us_force_constants,
    md_trajs=md_trajs, md_dtrajs=md_dtrajs, kT=kT,
    maxiter=20000, maxerr=1e-15, save_convergence_info=10,
    estimator='dtram', lag=[1, 2, 3, 5, 7, 10, 15, 20], dt_traj='1 step',
    init=None, init_maxiter=10000, init_maxerr=1e-08)

pyemma.plots.plot_convergence_info(dtram_list)
pyemma.plots.plot_memm_implied_timescales(dtram_list, nits=10);

In [None]:
dtram = dtram_list[3]
print(f'lag time = {dtram.lag} steps')

y_obs = kde_obs(centers[dtram.active_set])
y_obs /= y_obs.sum()

plt.plot(centers[dtram.active_set], y_obs, '--s', label='unbiased observation')
plt.plot(centers[dtram.active_set], dtram.pi, '--o', label='dtram (biased + unbiased)')
plt.xlabel('reaction coordinate')
plt.ylabel('stationary distribution')
plt.legend();

In [None]:
n = 4

msm = dtram.msm
msm.pcca(n)

for i, (metastable_set) in enumerate(msm.metastable_sets):
    print(f'weight of metastable state {i}: {msm.pi[metastable_set].sum():.3f}')
    plt.plot(
        centers[metastable_set],
        msm.pi[metastable_set],
        linewidth=3,
        label=f'metastable state {i}')
plt.legend()

from itertools import product
mfpt = np.zeros((n, n))
for i, j in product(range(n), range(n)):
    start = msm.metastable_sets[i]
    stop = msm.metastable_sets[j]
    mfpt[i, j] = msm.mfpt(start, stop)

print('\nMFPTs in steps')
print(mfpt.round(decimals=0).astype(np.int))

### Step 4
We have unbiased data, so let's build a regular MSM and compare with the MEMM results.

In [None]:
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(md_dtrajs, 50, nits=5, errors='bayes'));

In [None]:
msm = pyemma.msm.bayesian_markov_model(md_dtrajs, 5)

pyemma.plots.plot_cktest(msm.cktest(4));

In [None]:
n = len(msm.metastable_sets)

for i, (metastable_set) in enumerate(msm.metastable_sets):
    print(f'weight of metastable state {i}: {msm.pi[metastable_set].sum():.3f}')
    plt.plot(
        centers[metastable_set],
        msm.pi[metastable_set],
        linewidth=3,
        label=f'metastable state {i}')
plt.legend()

from itertools import product
mfpt = np.zeros((n, n))
for i, j in product(range(n), range(n)):
    start = msm.metastable_sets[i]
    stop = msm.metastable_sets[j]
    mfpt[i, j] = msm.mfpt(start, stop)

print('\nMFPTs in steps')
print(mfpt.round(decimals=0).astype(np.int))