# 06 - expectations and observables

In [None]:
%%javascript
Jupyter.utils.load_extensions('rubberband/main')
Jupyter.utils.load_extensions('exercise2/main')

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import mdshare
import pyemma

## Case 1: preprocessed, two-dimensional data (toy model)

In [None]:
file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')
with np.load(file) as fh:
    data = fh['trajectory']

cluster = pyemma.coordinates.cluster_kmeans(data, k=50, max_iter=50)
its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 3, 5, 7, 10], nits=3, errors='bayes')

fig, axes = plt.subplots(1, 3, figsize=(12, 3))
pyemma.plots.plot_feature_histograms(data, feature_labels=['$x$', '$y$'], ax=axes[0])
axes[1].scatter(*data.T, s=1, alpha=0.3)
axes[1].scatter(*cluster.clustercenters.T, s=15)
axes[1].set_xlabel('$x$')
axes[1].set_ylabel('$y$')
pyemma.plots.plot_implied_timescales(its, ylog=False, ax=axes[2])
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=1)

print('fraction of states used = ', msm.active_state_fraction)
print('fraction of counts used = ', msm.active_count_fraction)

nstates = 2
pyemma.plots.plot_cktest(msm.cktest(nstates))

coarse_msm = msm.coarse_grain(nstates)

## Case 2: low-dimensional molecular dynamics data (alanine dipeptide)

In [None]:
pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data')
files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.dcd', working_directory='data')

feat = pyemma.coordinates.featurizer(pdb)
feat.add_backbone_torsions()
data = pyemma.coordinates.load(files, features=feat)

cluster = pyemma.coordinates.cluster_kmeans(data, k=100, max_iter=50, stride=10)
its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')

fig, axes = plt.subplots(1, 3, figsize=(12, 3))
pyemma.plots.plot_feature_histograms(np.concatenate(data), feature_labels=['$\Phi$', '$\Psi$'], ax=axes[0])
axes[1].scatter(*np.concatenate(data).T, s=1, alpha=0.3)
axes[1].scatter(*cluster.clustercenters.T, s=15)
axes[1].set_xlabel('$\Phi$')
axes[1].set_ylabel('$\Psi$')
pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='0.001 ns')

print('fraction of states used = ', msm.active_state_fraction)
print('fraction of counts used = ', msm.active_count_fraction)

bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10)

nstates = 4
pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates))

coarse_msm = msm.coarse_grain(nstates)

## Case 3: another molecular dynamics data set (pentapeptide)

In [None]:
pdb = mdshare.fetch('pentapeptide-impl-solv.pdb', working_directory='data')
files = mdshare.fetch('pentapeptide-*-500ns-impl-solv.xtc', working_directory='data')

feat = pyemma.coordinates.featurizer(pdb)
feat.add_backbone_torsions(cossin=True)
feat.add_sidechain_torsions(which='chi1', cossin=True)
data = pyemma.coordinates.load([files], features=feat)

tica = pyemma.coordinates.tica(data, lag=20, var_cutoff=0.9)
tica_out = tica.get_output()[0]
cluster = pyemma.coordinates.cluster_kmeans(tica, k=250, max_iter=50, stride=10)
its = pyemma.msm.its(cluster.dtrajs, lags=30, nits=10, errors='bayes')

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
pyemma.plots.plot_feature_histograms(tica_out, ax=axes[0])
pyemma.plots.plot_implied_timescales(its, ax=axes[1], dt=10.0, units='ns')
fig.tight_layout()

In [None]:
msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=12, dt_traj='0.01 ns')

print('fraction of states used = ', msm.active_state_fraction)
print('fraction of counts used = ', msm.active_count_fraction)

bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=12)

nstates = 4
pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates))

coarse_msm = msm.coarse_grain(nstates)