In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import mdshare
import pyemma

## toy model

In [None]:
file = mdshare.load('hmm-doublewell-2d-100k.npz')
with np.load(file) as fh:
    data = fh['trajectory']
    dtraj = fh['discrete_trajectory']

pyemma.plots.plot_free_energy(*data.T);

### full feature space

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(data, k=100)

plt.scatter(*data.T, s=0.1, alpha=0.3)
plt.scatter(*cluster.clustercenters.T, s=10);

In [None]:
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(
        cluster.dtrajs,
        lags=[1, 2, 3, 5, 7, 10],
        nits=2,
        errors='bayes'),
    ylog=False);

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=1)

nstates = 2
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)

mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

pyemma.plots.plot_network(
    coarse_msm.transition_matrix,
    arrow_labels=mfpt,
    arrow_label_format='%.1f',
    arrow_curvature=1.6,
    max_height=3,
    size=16);

In [None]:
plt.scatter(*data.T, s=0.5, c=coarse_msm.metastable_assignments[cluster.dtrajs[0]], alpha=0.3);

### dimension reduced space

In [None]:
pca = pyemma.coordinates.pca(data, dim=1)
pca_out = pca.get_output()[0]
tica = pyemma.coordinates.tica(data, dim=1, lag=1)
tica_out = tica.get_output()[0]

fig, (scatter, dist) = plt.subplots(1, 2, figsize=(10, 4))
scatter.scatter(*data.T, s=0.1, alpha=0.3, color='grey')
scatter.plot(
    [0, 3 * abs(pca.eigenvectors[0, 0])],
    [0, 3 * abs(pca.eigenvectors[1, 0])],
    label='PCA')
scatter.plot(
    [0, 3 * abs(tica.eigenvectors[0, 0])],
    [0, 3 * abs(tica.eigenvectors[1, 0])],
    label='TICA')
scatter.legend()
dist.hist(pca_out, bins=50, alpha=0.3, density=True, label='PCA')
dist.hist(tica_out, bins=50, alpha=0.3, density=True, label='TICA')
dist.legend()
fig.tight_layout()

In [None]:
centers_pca = np.linspace(pca_out.min(), pca_out.max(), 50)
centers_tica = np.linspace(tica_out.min(), tica_out.max(), 50)

dtraj_pca = pyemma.coordinates.assign_to_centers(pca_out, centers=centers_pca.reshape(-1, 1))
dtraj_tica = pyemma.coordinates.assign_to_centers(tica_out, centers=centers_tica.reshape(-1, 1))

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
for dtraj, ax in zip([dtraj_pca, dtraj_tica], axes.flat):
    pyemma.plots.plot_implied_timescales(
        pyemma.msm.its(
            dtraj,
            lags=[1, 2, 3, 5, 7, 10],
            nits=2,
            errors='bayes'),
        ylog=False,
        ax=ax)
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(dtraj_tica, lag=1)

nstates = 2
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)

mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

pyemma.plots.plot_network(
    coarse_msm.transition_matrix,
    arrow_labels=mfpt,
    arrow_label_format='%.1f',
    arrow_curvature=1.6,
    max_height=3,
    size=16);

## alanine dipeptide

In [None]:
pdb = mdshare.load('alanine-dipeptide-nowater.pdb')
files = [mdshare.load('alanine-dipeptide-%d-250ns-nowater.dcd' % i) for i in range(3)]
print(pdb, files)

### backbone dihedrals

In [None]:
feat = pyemma.coordinates.featurizer(pdb)
feat.add_backbone_torsions()

data = pyemma.coordinates.load(files, features=feat)

pyemma.plots.plot_free_energy(*np.concatenate(data).T);

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(data, k=100, stride=10)

plt.scatter(*np.concatenate(data)[::10].T, s=0.1, alpha=0.3)
plt.scatter(*cluster.clustercenters.T, s=10);

In [None]:
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(
        cluster.dtrajs,
        lags=[1, 2, 5, 10, 20, 50, 100],
        nits=4,
        errors='bayes'),
    ylog=True);

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10)

nstates = 4
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)

mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

pyemma.plots.plot_network(
    coarse_msm.transition_matrix,
    arrow_labels=mfpt,
    arrow_label_format='%.1f',
    size=16);

In [None]:
plt.scatter(
    *np.concatenate(data).T,
    s=0.5, 
    c=coarse_msm.metastable_assignments[np.concatenate(cluster.dtrajs)],
    alpha=0.3);

### heavy atom positions (aligned)

In [None]:
feat = pyemma.coordinates.featurizer(pdb)
feat.add_selection(feat.select_Heavy())

data = pyemma.coordinates.load(files, features=feat)

In [None]:
pca = pyemma.coordinates.pca(data, dim=2).get_output()

pyemma.plots.plot_free_energy(*np.concatenate(pca).T);

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(pca, k=100, stride=10)

plt.scatter(*np.concatenate(pca)[::10].T, s=0.1, alpha=0.3)
plt.scatter(*cluster.clustercenters.T, s=10);

In [None]:
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(
        cluster.dtrajs,
        lags=[1, 2, 5, 10, 20, 50, 100],
        nits=4,
        errors='bayes'),
    ylog=True);

In [None]:
tica = pyemma.coordinates.tica(data, dim=2, lag=1).get_output()

pyemma.plots.plot_free_energy(*np.concatenate(tica).T);

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(tica, k=100, stride=10)

plt.scatter(*np.concatenate(tica)[::10].T, s=0.1, alpha=0.3)
plt.scatter(*cluster.clustercenters.T, s=10);

In [None]:
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(
        cluster.dtrajs,
        lags=[1, 2, 5, 10, 20, 50, 100],
        nits=4,
        errors='bayes'),
    ylog=True);

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10)

nstates = 4
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)

mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

pyemma.plots.plot_network(
    coarse_msm.transition_matrix,
    arrow_labels=mfpt,
    arrow_label_format='%.1f',
    size=16);

In [None]:
plt.scatter(
    *np.concatenate(tica).T,
    s=0.5, 
    c=coarse_msm.metastable_assignments[np.concatenate(cluster.dtrajs)],
    alpha=0.3);

### heavy atom distances

In [None]:
feat = pyemma.coordinates.featurizer(pdb)
feat.add_distances(feat.select_Heavy())

data = pyemma.coordinates.load(files, features=feat)

In [None]:
pca = pyemma.coordinates.pca(data, dim=2).get_output()

pyemma.plots.plot_free_energy(*np.concatenate(pca).T);

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(pca, k=100, stride=10)

plt.scatter(*np.concatenate(pca)[::10].T, s=0.1, alpha=0.3)
plt.scatter(*cluster.clustercenters.T, s=10);

In [None]:
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(
        cluster.dtrajs,
        lags=[1, 2, 5, 10, 20, 50, 100],
        nits=4,
        errors='bayes'),
    ylog=True);

In [None]:
tica = pyemma.coordinates.tica(data, dim=2, lag=1).get_output()

pyemma.plots.plot_free_energy(*np.concatenate(tica).T);

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(tica, k=100, stride=10)

plt.scatter(*np.concatenate(tica)[::10].T, s=0.1, alpha=0.3)
plt.scatter(*cluster.clustercenters.T, s=10);

In [None]:
pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(
        cluster.dtrajs,
        lags=[1, 2, 5, 10, 20, 50, 100],
        nits=4,
        errors='bayes'),
    ylog=True);

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10)

nstates = 4
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)

mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

pyemma.plots.plot_network(
    coarse_msm.transition_matrix,
    arrow_labels=mfpt,
    arrow_label_format='%.1f',
    size=16);

In [None]:
plt.scatter(
    *np.concatenate(tica).T,
    s=0.5, 
    c=coarse_msm.metastable_assignments[np.concatenate(cluster.dtrajs)],
    alpha=0.3);

## pentapeptide

In [None]:
pdb = mdshare.load('pentapeptide-impl-solv.pdb')
files = [mdshare.load('pentapeptide-%02d-500ns-impl-solv.xtc' % i) for i in range(25)]
print(pdb, files)

### backbone dihedrals

In [None]:
feat = pyemma.coordinates.featurizer(pdb)
feat.add_backbone_torsions(cossin=True)
feat.add_sidechain_torsions(which='chi1', cossin=True)

data = pyemma.coordinates.source(files, features=feat)

In [None]:
pca = pyemma.coordinates.pca(data)
pca_out = pca.get_output(stride=10)

pyemma.plots.plot_feature_histograms(
    np.concatenate(pca_out),
    feature_labels=['PC %02d' % (i + 1) for i in range(pca.dimension())]);

In [None]:
tica = pyemma.coordinates.tica(data, lag=20, var_cutoff=0.9)
tica_out = tica.get_output(stride=10)

pyemma.plots.plot_feature_histograms(
    np.concatenate(tica_out),
    feature_labels=['IC %02d' % (i + 1) for i in range(tica.dimension())]);

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(tica, k=250, stride=5)

pyemma.plots.plot_implied_timescales(
    pyemma.msm.its(
        cluster.dtrajs,
        lags=[1, 2, 3, 5, 7, 10, 13, 16, 20, 25],
        nits=4,
        errors='bayes'),
    ylog=True);

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=12)

nstates = 4
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10)

nstates = 4
pyemma.plots.plot_cktest(msm.cktest(nstates));

In [None]:
coarse_msm = msm.coarse_grain(nstates)

mfpt = np.zeros((nstates, nstates))
for i in range(nstates):
    for j in range(nstates):
        mfpt[i, j] = msm.mfpt(
            coarse_msm.metastable_sets[i],
            coarse_msm.metastable_sets[j])

pyemma.plots.plot_network(
    coarse_msm.transition_matrix,
    arrow_labels=mfpt,
    arrow_label_format='%.1f',
    size=16);

In [None]:
plt.scatter(
    *np.concatenate(tica.get_output())[:, :2].T,
    s=0.5, 
    c=coarse_msm.metastable_assignments[np.concatenate(cluster.dtrajs)],
    alpha=0.3);