In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import mdshare
import pyemma

## Loading MD data and repeating the clustering step

In [None]:
pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data')
files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.xtc', working_directory='data')
feat = pyemma.coordinates.featurizer(pdb)
feat.add_backbone_torsions(periodic=False)
data = pyemma.coordinates.load(files, features=feat)
cluster = pyemma.coordinates.cluster_kmeans(data, k=200, max_iter=50, stride=10)

In [None]:
fig, ax = plt.subplots()
pyemma.plots.plot_free_energy(*np.concatenate(data).T, ax=ax)
ax.scatter(*cluster.clustercenters.T, s=15, c='k')
ax.set_xlabel('$\Phi$ / rad') 
ax.set_ylabel('$\Psi$ / rad');

## Implied time scales and lag time selection

In [None]:
its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4)

$$T(n \tau) = (T(\tau))^n$$

$$\lambda(n \tau) = (\lambda(\tau))^n$$

$$ \mathrm{ITS}(n \tau) = - \frac{n \tau}{\ln \lambda(n \tau)} = - \frac{n \tau}{\ln (\lambda(\tau))^n} = - \frac{\tau}{\ln \lambda(\tau)} = \mathrm{ITS}(\tau) $$

In [None]:
pyemma.plots.plot_implied_timescales(its, units='ps')

## Error bars for the timescales

In [None]:
its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')

In [None]:
pyemma.plots.plot_implied_timescales(its, units='ps')

## Effect of the discretization on the implied timescales

In [None]:
cluster_20 = pyemma.coordinates.cluster_kmeans(data, k=20, max_iter=50, stride=10)
its_20 = pyemma.msm.its(cluster_20.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')
cluster_50 = pyemma.coordinates.cluster_kmeans(data, k=50, max_iter=50, stride=10)
its_50 = pyemma.msm.its(cluster_50.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')
cluster_100 = pyemma.coordinates.cluster_kmeans(data, k=100, max_iter=50, stride=10)
its_100 = pyemma.msm.its(cluster_100.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(12, 6))
pyemma.plots.plot_density(*np.concatenate(data).T, ax=axes[0, 0], cbar=False, alpha=0.1)
axes[0, 0].scatter(*cluster_20.clustercenters.T, s=15, c='C1')
pyemma.plots.plot_implied_timescales(its_20, ax=axes[1, 0], units='ps')

pyemma.plots.plot_density(*np.concatenate(data).T, ax=axes[0, 1], cbar=False, alpha=0.1)
axes[0, 1].scatter(*cluster_50.clustercenters.T, s=15, c='C1')
pyemma.plots.plot_implied_timescales(its_50, ax=axes[1, 1], units='ps')

pyemma.plots.plot_density(*np.concatenate(data).T, ax=axes[0, 2], cbar=False, alpha=0.1)
axes[0, 2].scatter(*cluster_100.clustercenters.T, s=15, c='C1')
pyemma.plots.plot_implied_timescales(its_100, ax=axes[1, 2], units='ps')

## Estimating the maximum likelihood Markov model

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')

In [None]:
msm.timescales(k=3)

## Estimating the Bayesian Markov model

In [None]:
bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps') 

In [None]:
bayesian_msm.sample_conf('timescales', k=3)

## The Chapman-Kolmogorov test

In [None]:
pyemma.plots.plot_cktest(msm.cktest(4), units='ps');

## Persisting and restoring estimators

In [None]:
cluster_50.save('nb3.pyemma', model_name='kmeans_k50')

In [None]:
msm.save('nb3.pyemma', model_name='msm', overwrite=True)

In [None]:
cluster_restored = pyemma.load('nb3.pyemma', model_name='kmeans_k50')

In [None]:
msm_restored = pyemma.load('nb3.pyemma', model_name='msm')

In [None]:
msm_restored.timescales(k=3)

In [None]:
pyemma.list_models('nb3.pyemma').keys()