# 05 - hidden Markov state models (HMMs)

In [None]:
%%javascript
Jupyter.utils.load_extensions('rubberband/main')
Jupyter.utils.load_extensions('exercise2/main')

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import mdshare
import pyemma

In [None]:
file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')
with np.load(file) as fh:
    data = fh['trajectory']
    good_dtraj = fh['discrete_trajectory']

In [None]:
poor_clustercenters = np.asarray([[-0.1, -0.6], [0.1, 1.4]])
poor_dtraj = pyemma.coordinates.assign_to_centers(data, centers=poor_clustercenters)[0]

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharex=True, sharey=True)
axes[0].scatter(*data.T, s=1, c=poor_dtraj)
axes[0].scatter(*poor_clustercenters.T, s=75, c='red', marker='x')
axes[1].scatter(*data.T, s=1, c=good_dtraj)
axes[1].scatter(*np.asarray([[0, -1], [0, 1]]).T, s=75, c='red', marker='x')
for ax in axes.flat:
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
axes[0].set_title('poor discretization')
axes[1].set_title('good discretization')
fig.tight_layout()

In [None]:
lags = [i + 1 for i in range(10)]

fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharex=True, sharey=True)
pyemma.plots.plot_implied_timescales(pyemma.msm.its(poor_dtraj, lags=lags, errors='bayes'), ylog=False, ax=axes[0])
pyemma.plots.plot_implied_timescales(pyemma.msm.its(good_dtraj, lags=lags, errors='bayes'), ylog=False, ax=axes[1])
axes[0].set_title('MSM with poor discretization')
axes[1].set_title('MSM with good discretization')
fig.tight_layout()

In [None]:
poor_msm = pyemma.msm.estimate_markov_model(poor_dtraj, lag=5)
good_msm = pyemma.msm.estimate_markov_model(good_dtraj, lag=1)

In [None]:
print(poor_msm.timescales())
print(good_msm.timescales())

In [None]:
pyemma.plots.plot_cktest(poor_msm.cktest(2));

In [None]:
pyemma.plots.plot_cktest(good_msm.cktest(2));

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
pyemma.plots.plot_implied_timescales(
    pyemma.msm.timescales_hmsm(poor_dtraj, 2, lags=lags, errors='bayes'), ylog=False, ax=axes[0])
pyemma.plots.plot_implied_timescales(
    pyemma.msm.timescales_hmsm(good_dtraj, 2, lags=lags, errors='bayes'), ylog=False, ax=axes[1])
axes[0].set_title('HMM with poor discretization')
axes[1].set_title('HMM with good discretization')
fig.tight_layout()

In [None]:
poor_hmm = pyemma.msm.estimate_hidden_markov_model(poor_dtraj, 2, lag=1)
good_hmm = pyemma.msm.estimate_hidden_markov_model(good_dtraj, 2, lag=1)

In [None]:
print(poor_hmm.timescales())
print(good_hmm.timescales())

In [None]:
pyemma.plots.plot_cktest(poor_hmm.cktest(2));

In [None]:
pyemma.plots.plot_cktest(good_hmm.cktest(2));

In [None]:
bad_clustercenters = np.asarray([[-2.5, -1.4], [0.3, 1.2], [2.7, -0.6]])
bad_dtraj = pyemma.coordinates.assign_to_centers(data, centers=bad_clustercenters)[0]

fig, axes = plt.subplots(1, 3, figsize=(12, 3))
axes[0].scatter(*data.T, s=1, c=bad_dtraj)
axes[0].scatter(*bad_clustercenters.T, s=75, c='red', marker='x')
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(bad_dtraj, lags=lags, errors='bayes'), ylog=False, ax=axes[1])
pyemma.plots.plot_implied_timescales(
    pyemma.msm.timescales_hmsm(bad_dtraj, 2, lags=lags, errors='bayes'), ylog=False, ax=axes[2])
axes[0].set_xlabel('$x$')
axes[0].set_ylabel('$y$')
axes[0].set_title('bad discretization')
axes[1].set_title('MSM with bad discretization')
axes[2].set_title('HMM with bad discretization')
for ax in axes.flat[1:]:
    ax.set_ylim(-0.5, 10.5)
fig.tight_layout()

In [None]:
bad_hmm = pyemma.msm.estimate_hidden_markov_model(bad_dtraj, 2, lag=1)
print(bad_hmm.timescales())
pyemma.plots.plot_cktest(bad_hmm.cktest(2));