In [None]:
from __future__ import print_function
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pyemma
import pyemma.thermo.util
import mdshare

# import some functions which should not clutter the notebook
import shortcuts_thermo as shortcuts

# figure size parameters
pw = 6
ph = 0.75 * pw

## Umbrella sampling simulations

The bias is computed via a harmonic potential based on the deviation of a frame from a reference structure. In the usual one-dimensional case, this reads

$$b^{(i)}(\mathbf{x}) = \frac{k^{(i)}}{2} \left\Vert \mathbf{x} - \mathbf{x}^{(i)} \right\Vert^2.$$

In the more general case, though, one can use a non-symmetric force matrix:

$$b^{(i)}(\mathbf{x}) = \frac{1}{2} \left\langle \mathbf{x} - \mathbf{x}^{(i)} \middle\vert \mathbf{k}^{(i)} \middle\vert \mathbf{x} - \mathbf{x}^{(i)} \right\rangle.$$

## API functions for umbrella sampling

For these simulation types, the `pyemma.thermo` module provides the API functions

```python
def estimate_umbrella_sampling(
    us_trajs, us_dtrajs, us_centers, us_force_constants,
    md_trajs=None, md_dtrajs=None, kT=None,
    maxiter=10000, maxerr=1.0E-15, save_convergence_info=0,
    estimator='wham', lag=1, dt_traj='1 step', init=None):
    ...

```

# Example Model 1: one-dimensional asymmetric double well potential

We start by looking at the stationary distribution and free energy profile which are available analytically.

In [None]:
adw_x, adw_f, adw_pi = shortcuts.adw_reference(-1, 5, 100)

fig, ax = plt.subplots(1, 2, figsize=(2 * pw, ph))
ax[0].plot(adw_x, adw_pi, linewidth=3, color='black')
ax[0].set_ylabel(r"$\pi(x)$", fontsize=20)
ax[0].semilogy()
ax[1].plot(adw_x, adw_f, linewidth=3, color='black')
ax[1].set_ylabel(r"$f(x)$ / kT", fontsize=20)
for _ax in ax:
    _ax.set_xlabel(r"$x$ / a.u.", fontsize=20)
    _ax.tick_params(labelsize=15)
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(2 * pw, ph))
# plot the thermodynamic ground/unbiased state (kT=1.0)
ax[0].plot(adw_x, adw_pi, linewidth=3, color='black', label='unbiased')
ax[1].plot(adw_x, adw_f, linewidth=3, color='black', label='unbiased')
# plot the sixth umbrella
_, adw_f2, adw_pi2 = shortcuts.adw_reference(adw_x[0], adw_x[-1], adw_x.shape[0], k_bias=30.0, x_bias=1.07894737)
ax[0].plot(adw_x, adw_pi2, linewidth=3, color='blue', label='umbrella 6')
ax[1].plot(adw_x, adw_f2, linewidth=3, color='blue', label='umbrella 6')
# plot the 10th umbrella
_, adw_f2, adw_pi2 = shortcuts.adw_reference(adw_x[0], adw_x[-1], adw_x.shape[0], k_bias=30.0, x_bias=2.13157895)
ax[0].plot(adw_x, adw_pi2, linewidth=3, color='green', label='umbrella 10')
ax[1].plot(adw_x, adw_f2, linewidth=3, color='green', label='umbrella 10')
# plot the 14th umbrella
_, adw_f2, adw_pi2 = shortcuts.adw_reference(adw_x[0], adw_x[-1], adw_x.shape[0], k_bias=30.0, x_bias=3.18421053)
ax[0].plot(adw_x, adw_pi2, linewidth=3, color='red', label='umbrella 14')
ax[1].plot(adw_x, adw_f2, linewidth=3, color='red', label='umbrella 14')
# finish the figure
ax[0].set_ylabel(r"$\pi^{(j)}(x)$", fontsize=20)
ax[0].semilogy()
ax[0].set_ylim([1.0E-10, 1.0])
ax[0].legend(loc=3, fontsize=12, fancybox=True, framealpha=0.5)
ax[1].set_ylabel(r"$f^{(j)}(x) - f^{(j)}$ / kT", fontsize=20)
ax[1].set_ylim([0.0, 30.0])
ax[1].legend(loc=2, fontsize=12, fancybox=True, framealpha=0.5)
for _ax in ax:
    _ax.set_xlabel(r"$x$ / a.u.", fontsize=20)
    _ax.tick_params(labelsize=15)
fig.tight_layout()


First step: import the data from 100 precomputed umbrella sampling trajectories as listed in the file ``meta.dat``...

In [None]:
with np.load(mdshare.load('pyemma-tutorial-us-data.npz', working_directory='data')) as fh:
    # load biased data
    adw_us_trajs = [fh['us_traj_%03d.npy' % i] for i in range(100)]
    adw_us_umbrella_centers = fh['umbrella_centers'].tolist()
    adw_us_force_constants = fh['force_constants'].tolist()
    # load unbiased data
    adw_md_trajs = [fh['md_traj_%03d.npy' % i] for i in range(5)]

In [None]:
print(len(adw_us_trajs))
print(len(adw_us_umbrella_centers))
print(len(adw_us_force_constants))

Second step: run a clustering algorithm on the configuration trajectories to define the bins
(and to compute the bin counts later on).

In [None]:
adw_us_cluster = pyemma.coordinates.cluster_regspace(adw_us_trajs, max_centers=500, dmin=0.2)

Third step: run ``WHAM``  estimations using the ``estimate_umbrella_sampling`` API function and plot the convergence info...

In [None]:
adw_us_estimator = pyemma.thermo.estimate_umbrella_sampling(
    adw_us_trajs, adw_us_cluster.dtrajs, adw_us_umbrella_centers, adw_us_force_constants,
    maxiter=100000, maxerr=1.0E-15, save_convergence_info=50, estimator='wham')

In [None]:
pyemma.plots.plot_convergence_info(adw_us_estimator)

Fourth step: plot the free energies ``f`` and ``f_therm``...

In [None]:
adw_us_x, adw_us_f = shortcuts.adw_match_reference_to_binning(adw_us_trajs, adw_us_cluster.clustercenters)

fig, ax = plt.subplots(1, 2, figsize=(2 * pw, ph))
ax[0].plot(
    adw_us_cluster.clustercenters[adw_us_estimator.active_set, 0], adw_us_estimator.f, 's', markersize=10, label=adw_us_estimator.name)
ax[0].plot(adw_us_x, adw_us_f, '-*', linewidth=2, markersize=9, color='black', label='Reference')
ax[0].set_xlabel(r"configuration state", fontsize=20)
ax[0].set_ylabel(r"f / kT", fontsize=20)
ax[1].plot(adw_us_estimator.umbrella_centers[:, 0], adw_us_estimator.f_therm, 's', markersize=10, label=adw_us_estimator.name)
ax[1].set_xlabel(r"umbrella_center", fontsize=20)
ax[1].set_ylabel(r"f_therm / kT", fontsize=20)
for _ax in ax:
    _ax.tick_params(labelsize=15)
    _ax.set_ylim([0, 12])
    _ax.legend(loc=4, fontsize=10, fancybox=True, framealpha=0.5)
fig.tight_layout()

# Mixed simulations data: US simulations + unbiased simulations

In [None]:
# redo clustering with both, biased and unbiased data
adw_us_cluster = pyemma.coordinates.cluster_regspace(adw_us_trajs + adw_md_trajs, max_centers=500, dmin=0.2)

# split dtrajs into biased and unbiased
adw_us_dtrajs = adw_us_cluster.dtrajs[:len(adw_us_trajs)]
adw_md_dtrajs = adw_us_cluster.dtrajs[len(adw_us_trajs):]

In [None]:
# plot order parameter trajectories of the unbiased simulations
for t in adw_md_trajs:
    plt.plot(t)
plt.ylabel('x')
plt.xlabel('step')

In [None]:
# run the estimator again for a sequence of lag times
lags = [1, 2, 5, 7, 10, 15, 20, 30, 40, 50, 70, 100]

memms = pyemma.thermo.estimate_umbrella_sampling(
    adw_us_trajs, adw_us_dtrajs, adw_us_umbrella_centers, adw_us_force_constants,
    md_trajs=adw_md_trajs, md_dtrajs=adw_md_dtrajs,
    lag=lags,
    maxiter=100000, maxerr=1.0E-15, save_convergence_info=50, estimator='dtram')

In [None]:
# TRAM
#lags = [1, 10, 50]
#memms = pyemma.thermo.estimate_umbrella_sampling(
#    adw_us_trajs, adw_us_dtrajs, adw_us_umbrella_centers, adw_us_force_constants,
#    md_trajs=adw_md_trajs, md_dtrajs=adw_md_dtrajs,
#    lag=lags,
#    maxiter=100000, maxerr=1.0E-6, init_maxerr=1.0, save_convergence_info=50, estimator='tram', direct_space=True)

In [None]:
[ m.name for m in memms ]

In [None]:
# plot implied time scales depending on lag time
pyemma.plots.plot_memm_implied_timescales(memms)

In [None]:
# at 10 steps the implied time scales look converged, pick that model for analysis
print(memms[4].lag)
dtram_estiamtor = memms[4]

In [None]:
# for TRAM
#print(memms[1].lag)
#dtram_estiamtor = memms[1]

In [None]:
# plot estimate of the stationary distribution
adw_us_x, adw_us_f = shortcuts.adw_match_reference_to_binning(adw_us_trajs, adw_us_cluster.clustercenters)

plt.figure(figsize=(2 * pw, ph))
plt.plot(
    adw_us_cluster.clustercenters[dtram_estiamtor.active_set, 0], dtram_estiamtor.f, 's', markersize=10, label=dtram_estiamtor.name)
plt.plot(adw_us_x, adw_us_f, '-*', linewidth=2, markersize=9, color='black', label='Reference')
plt.xlabel(r"configuration state", fontsize=20)
plt.ylabel(r"f / kT", fontsize=20)

In [None]:
# The MSM of the unbiased ensemble can be accessed via dtram_estiamtor.msm
unbiased_msm = dtram_estiamtor.msm

# We can do all the usual MSM analyses now, e. g. coarse-graining with PCCA and computing MFPTs.
pcca = unbiased_msm.pcca(2)

print("MFPT[blue->green] = %7.1f steps" %  unbiased_msm.mfpt(pcca.metastable_sets[0], pcca.metastable_sets[1]))
print("MFPT[green->blue] = %7.1f steps" %  unbiased_msm.mfpt(pcca.metastable_sets[1], pcca.metastable_sets[0]))

plt.plot(adw_us_x, adw_us_f, '-*', linewidth=2, markersize=9, color='black')
plt.scatter(
    adw_us_cluster.clustercenters[unbiased_msm.active_set, 0],
    -np.log(unbiased_msm.stationary_distribution[unbiased_msm.active_set]),
    s=120, c=pcca.metastable_assignment, cmap=mpl.cm.brg)

plt.xlabel(r"configuration state", fontsize=20)
plt.ylabel(r"f / kT", fontsize=20)
plt.tick_params(labelsize=15)
plt.xlim([-1, 5])
plt.ylim([0, 12])

# PyEMMA's general thermo API

## binned estimators
The `pyemma.thermo` module provides the following API functions to perform ``dTRAM`` and ``WHAM`` estimations:

```python
def dtram(
    ttrajs, dtrajs, bias, lag,
    maxiter=10000, maxerr=1.0E-15, save_convergence_info=0,
    dt_traj='1 step', init=None):
    ...
    
def wham(
    ttrajs, dtrajs, bias,
    maxiter=100000, maxerr=1.0E-15, save_convergence_info=0,
    dt_traj='1 step'):
    ...
```

- ``ttrajs`` is a list of ``numpy.ndarray`` objects with ``shape=(T_i,)``, where ``T_i`` denotes the number of frames in trajectory ``i``. The entries indicate in which thermodynamic state each frame was created.
- ``dtrajs`` is a list of ``numpy.ndarray`` objects with ``shape=(T_i,)``, where ``T_i`` denotes the number of frames in trajectory ``i``. The entries indicate to which discrete configuration states each frame belongs.
- ``bias`` is a ``numpy.ndarray`` with ``shape=(K, N)``, where ``K`` is the number of thermodynamic states and ``N`` is the number of discrete configuration states. The elements are the dimensionless bias energies for all combinations of discrete configuration and thermodynamic states.
- ``lag`` is the lag time in steps at which transitions are counted.


## bin-less estimators

```python
def tram(
    ttrajs, dtrajs, bias, lag,
    maxiter=10000, maxerr=1.0E-15, save_convergence_info=0,
    dt_traj='1 step', init=None, direct_space=False):
    ...
    
def mbar(
    ttrajs, dtrajs, bias,
    maxiter=100000, maxerr=1.0E-15, save_convergence_info=0,
    dt_traj='1 step', direct_space=False):
    ...
```

The ``bias`` parameter of bin-less estimators has a different formet than for binned estimators:


- ``bias`` is a ``(numpy.ndarray(T, num_therm_states)``, or list of ``numpy.ndarray(T_i, num_therm_states))`` – A single reduced bias energy trajectory or a list of reduced bias energy trajectories. For every simulation frame seen in trajectory `i` and time step `t`, ``btrajs[i][t, k]`` is the reduced bias energy of that frame evaluated in the `k`’th thermodynamic state (i.e. at the `k`’th umbrella/Hamiltonian/temperature)

The parameter ``direct_space`` allows to optimize the calculation for speed.

- ``direct_space`` is an optional boolean parameter that is false by default. – Whether to perform the self-consitent iteration with Boltzmann factors (direct space) or free energies (log-space). Calculations in direct space are faster. When analyzing data from multi-temperature simulations, direct-space is not recommended.

To make the preparation of ``ttrajs`` and ``bias`` easier, we provide two further API functions to handle the preparation for certain types of simulations, i.e., multi-temperature and umbrella sampling with harmonic bias potentials.