# 04 - MSM coarse graining and analysis

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import mdshare
import pyemma

In [None]:
file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')
with np.load(file) as fh:
    data = fh['trajectory']

cluster = pyemma.coordinates.cluster_kmeans(data, k=50, max_iter=50)
its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 3, 5, 7, 10], nits=3, errors='bayes')

fig, axes = plt.subplots(1, 3, figsize=(12, 3))
pyemma.plots.plot_feature_histograms(data, feature_labels=['$x$', '$y$'], ax=axes[0])
axes[1].scatter(*data.T, s=1, alpha=0.3)
axes[1].scatter(*cluster.clustercenters.T, s=15)
axes[1].set_xlabel('$x$')
axes[1].set_ylabel('$y$')
pyemma.plots.plot_implied_timescales(its, ylog=False, ax=axes[2])
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=1)

In [None]:
eigvec = msm.eigenvectors_right()

fig, axes = plt.subplots(1, 3, figsize=(12, 3))
for i, ax in enumerate(axes.flat):
    im = ax.scatter(*cluster.clustercenters.T, c=eigvec[:, i], s=70, cmap=mpl.cm.bwr)
    cb = plt.colorbar(im, ax=ax)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    cb.set_label('%d. right eigenvector' % (i + 1))
fig.tight_layout()

In [None]:
nstates = 2
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)
print(coarse_msm.transition_matrix)

In [None]:
mtraj = coarse_msm.metastable_assignments[cluster.dtrajs[0]]

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
im = axes[0].scatter(*data.T, s=1, c=mtraj)
axes[0].set_xlabel('$x$')
axes[0].set_ylabel('$y$')
cb = fig.colorbar(im, ax=axes[0])
cb.set_label('metastable state')
cb.set_ticks([0, 1])
pyemma.plots.plot_markov_model(
    coarse_msm,
    pos=np.asarray([[0, 0], [3, 2]]),
    arrow_curvature=2.0,
    figpadding=0.2,
    size=12,
    ax=axes[1])
axes[1].set_aspect('equal')
fig.tight_layout()

In [None]:
mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

rate = np.zeros_like(mfpt)
nz = mfpt.nonzero()
rate[nz] = 1.0 / mfpt[nz]

pyemma.plots.plot_network(
    rate,
    pos=np.asarray([[0, 0], [2, 1]]),
    arrow_label_format='%.0f steps',
    arrow_labels=mfpt,
    figpadding=0.3,
    size=12);

In [None]:
pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data')
files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.dcd', working_directory='data')

feat = pyemma.coordinates.featurizer(pdb)
feat.add_backbone_torsions()
data = pyemma.coordinates.load(files, features=feat)

cluster = pyemma.coordinates.cluster_kmeans(data, k=100, max_iter=50, stride=10)
its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')

fig, axes = plt.subplots(1, 3, figsize=(12, 3))
pyemma.plots.plot_feature_histograms(np.concatenate(data), feature_labels=['$\Phi$', '$\Psi$'], ax=axes[0])
axes[1].scatter(*np.concatenate(data).T, s=1, alpha=0.3)
axes[1].scatter(*cluster.clustercenters.T, s=15)
axes[1].set_xlabel('$\Phi$')
axes[1].set_ylabel('$\Psi$')
pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='0.001 ns')

eigvec = msm.eigenvectors_right()

fig, axes = plt.subplots(2, 3, figsize=(12, 6))
for i, ax in enumerate(axes.flat):
    im = ax.scatter(*cluster.clustercenters.T, c=eigvec[:, i], s=70, cmap=mpl.cm.bwr)
    cb = plt.colorbar(im, ax=ax)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    cb.set_label('%d. right eigenvector' % (i + 1))
fig.tight_layout()

In [None]:
bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='0.001 ns')

nstates = 4
pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)
print(coarse_msm.transition_matrix)

In [None]:
mtraj = coarse_msm.metastable_assignments[np.concatenate(cluster.dtrajs)]

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
im = axes[0].scatter(*np.concatenate(data).T, s=1, c=mtraj)
axes[0].set_xlabel('$\Phi$')
axes[0].set_ylabel('$\Psi$')
cb = fig.colorbar(im, ax=axes[0])
cb.set_label('metastable state')
cb.set_ticks([i for i in range(nstates)])
pyemma.plots.plot_markov_model(
    coarse_msm,
    pos=np.asarray([[0, 0], [4, 0], [2, 4], [6, 4]]),
    figpadding=0.1,
    size=12,
    ax=axes[1])
axes[1].set_aspect('equal')
fig.tight_layout()

In [None]:
mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

rate = np.zeros_like(mfpt)
nz = mfpt.nonzero()
rate[nz] = 1.0 / mfpt[nz]

pyemma.plots.plot_network(
    rate,
    pos=np.asarray([[0, 0], [4, 0], [2, 4], [6, 4]]),
    arrow_label_format='%.1f ns',
    arrow_labels=mfpt,
    arrow_scale=3.0,
    size=12);

In [None]:
feat = pyemma.coordinates.featurizer(pdb)
feat.add_distances(feat.select_Heavy())
data = pyemma.coordinates.load(files, features=feat)

tica = pyemma.coordinates.tica(data, lag=3, dim=2)
tica_out = np.concatenate(tica.get_output())
cluster = pyemma.coordinates.cluster_kmeans(tica, k=100, max_iter=50, stride=10)
its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')

fig, axes = plt.subplots(1, 3, figsize=(12, 3))
pyemma.plots.plot_feature_histograms(tica_out, feature_labels=['IC 1', 'IC 2'], ax=axes[0])
axes[1].scatter(*tica_out.T, s=1, alpha=0.3)
axes[1].scatter(*cluster.clustercenters.T, s=15)
axes[1].set_xlabel('IC 1')
axes[1].set_ylabel('IC 2')
pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='0.001 ns')

eigvec = msm.eigenvectors_right()

fig, axes = plt.subplots(2, 3, figsize=(12, 6))
for i, ax in enumerate(axes.flat):
    im = ax.scatter(*cluster.clustercenters.T, c=eigvec[:, i], s=70, cmap=mpl.cm.bwr)
    cb = plt.colorbar(im, ax=ax)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    cb.set_label('%d. right eigenvector' % (i + 1))
fig.tight_layout()

In [None]:
bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='0.001 ns')

nstates = 4
pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)
print(coarse_msm.transition_matrix)

mtraj = coarse_msm.metastable_assignments[np.concatenate(cluster.dtrajs)]

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
im = axes[0].scatter(*tica_out.T, s=1, c=mtraj)
axes[0].set_xlabel('IC 1')
axes[0].set_ylabel('IC 2')
cb = fig.colorbar(im, ax=axes[0])
cb.set_label('metastable state')
cb.set_ticks([i for i in range(nstates)])
pyemma.plots.plot_markov_model(
    coarse_msm,
    pos=np.asarray([[0, 0], [4, 0], [2, 4], [6, 4]]),
    figpadding=0.1,
    size=12,
    ax=axes[1])
axes[1].set_aspect('equal')
fig.tight_layout()

In [None]:
mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

rate = np.zeros_like(mfpt)
nz = mfpt.nonzero()
rate[nz] = 1.0 / mfpt[nz]

pyemma.plots.plot_network(
    rate,
    pos=np.asarray([[0, 0], [4, 0], [2, 4], [6, 4]]),
    arrow_label_format='%.1f ns',
    arrow_labels=mfpt,
    arrow_scale=3.0,
    size=12);

In [None]:
pdb = mdshare.fetch('pentapeptide-impl-solv.pdb', working_directory='data')
files = mdshare.fetch('pentapeptide-*-500ns-impl-solv.xtc', working_directory='data')

feat = pyemma.coordinates.featurizer(pdb)
feat.add_backbone_torsions(cossin=True)
feat.add_sidechain_torsions(which='chi1', cossin=True)
data = pyemma.coordinates.load(files, features=feat)

tica = pyemma.coordinates.tica(data, lag=20, var_cutoff=0.9)
tica_out = np.concatenate(tica.get_output())
cluster = pyemma.coordinates.cluster_kmeans(tica, k=250, max_iter=50, stride=10)
its = pyemma.msm.its(cluster.dtrajs, lags=30, nits=10, errors='bayes')

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
pyemma.plots.plot_feature_histograms(np.concatenate(tica.get_output()), ax=axes[0])
pyemma.plots.plot_implied_timescales(its, ax=axes[1], dt=10.0, units='ns')
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=12, dt_traj='0.01 ns')

eigvec = msm.eigenvectors_right()

fig, axes = plt.subplots(2, 3, figsize=(12, 6))
for i, ax in enumerate(axes.flat):
    im = ax.scatter(*cluster.clustercenters[:, :2].T, c=eigvec[:, i], s=70, cmap=mpl.cm.bwr)
    cb = plt.colorbar(im, ax=ax)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    cb.set_label('%d. right eigenvector' % (i + 1))
fig.tight_layout()

In [None]:
bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='0.001 ns')

nstates = 4
pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)
print(coarse_msm.transition_matrix)

mtraj = coarse_msm.metastable_assignments[np.concatenate(cluster.dtrajs)]

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
im = axes[0].scatter(*tica_out[:, :2].T, s=1, c=mtraj)
axes[0].set_xlabel('IC 1')
axes[0].set_ylabel('IC 2')
cb = fig.colorbar(im, ax=axes[0])
cb.set_label('metastable state')
cb.set_ticks([i for i in range(nstates)])
pyemma.plots.plot_markov_model(
    coarse_msm,
    pos=np.asarray([[0, 0], [4, 0], [2, 4], [6, 4]]),
    figpadding=0.1,
    size=12,
    ax=axes[1])
axes[1].set_aspect('equal')
fig.tight_layout()

In [None]:
mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

rate = np.zeros_like(mfpt)
nz = mfpt.nonzero()
rate[nz] = 1.0 / mfpt[nz]

pyemma.plots.plot_network(
    rate,
    pos=np.asarray([[0, 0], [4, 0], [2, 4], [6, 4]]),
    arrow_label_format='%.1f ns',
    arrow_labels=mfpt,
    arrow_scale=3.0,
    size=12);