Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Commit

Permalink
[doc] use fix_docs decorator in more estimators, fixed formatting, re…
Browse files Browse the repository at this point in the history
…moved references to EstimatedMSM (#864)

* [doc] use fix_docs decorator in more estimators, fixed formatting, removed references to EstimatedMSM
  • Loading branch information
marscher committed Jul 8, 2016
1 parent a3b91dc commit 0b6f5e9
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 477 deletions.
9 changes: 5 additions & 4 deletions pyemma/coordinates/clustering/interface.py
Expand Up @@ -26,18 +26,19 @@

import os

import numpy as np
from six.moves import range, zip

from pyemma._base.model import Model
from pyemma._ext.sklearn.base import ClusterMixin
from pyemma.coordinates.clustering import regspatial
from pyemma.coordinates.transform.transformer import StreamingTransformer
from pyemma.util.annotators import fix_docs
from pyemma.util.discrete_trajectories import index_states, sample_indexes_by_state
from pyemma.util.files import mkdir_p


from six.moves import range, zip
import numpy as np


@fix_docs
class AbstractClustering(StreamingTransformer, Model, ClusterMixin):

"""
Expand Down
3 changes: 2 additions & 1 deletion pyemma/coordinates/data/_base/datasource.py
Expand Up @@ -233,7 +233,8 @@ def n_frames_total(self, stride=1):
Returns
-------
int : n_frames_total
n_frames_total : int
total number of frames.
"""
if isinstance(stride, np.ndarray):
return stride.shape[0]
Expand Down
4 changes: 0 additions & 4 deletions pyemma/msm/api.py
Expand Up @@ -986,10 +986,6 @@ def estimate_hidden_markov_model(dtrajs, nstates, lag, reversible=True, stationa
>>> print(mm.mfpt(0, 1)) # doctest: +ELLIPSIS
6.3...
See also
--------
EstimatedHMSM : A discrete HMM object that has been estimated from data
.. autoclass:: pyemma.msm.estimators.maximum_likelihood_hmsm.MaximumLikelihoodHMSM
:members:
Expand Down
57 changes: 28 additions & 29 deletions pyemma/msm/estimators/bayesian_hmsm.py
Expand Up @@ -17,19 +17,22 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import, print_function
from six.moves import range

import numpy as _np
from pyemma.util.types import ensure_dtraj_list
from six.moves import range

from pyemma._base.progress import ProgressReporter
from pyemma.msm.estimators.maximum_likelihood_hmsm import MaximumLikelihoodHMSM as _MaximumLikelihoodHMSM
from pyemma.msm.models.hmsm import HMSM as _HMSM
from pyemma.msm.models.hmsm_sampled import SampledHMSM as _SampledHMSM
from pyemma._base.progress import ProgressReporter
from pyemma.util.annotators import fix_docs
from pyemma.util.types import ensure_dtraj_list
from pyemma.util.units import TimeUnit

__author__ = 'noe'


@fix_docs
class BayesianHMSM(_MaximumLikelihoodHMSM, _SampledHMSM, ProgressReporter):
r"""Estimator for a Bayesian Hidden Markov state model"""

Expand Down Expand Up @@ -64,17 +67,19 @@ def __init__(self, nstates=2, lag=1, stride='effective',
the stationary distribution of the transition matrix).
Currently implements different versions of the Dirichlet prior that
is conjugate to the Dirichlet distribution of p0. p0 is sampled from:
.. math:
p0 \sim \prod_i (p0)_i^{a_i + n_i - 1}
where :math:`n_i` are the number of times a hidden trajectory was in
state :math:`i` at time step 0 and :math:`a_i` is the prior count.
Following options are available:
| 'mixed' (default), :math:`a_i = p_{0,init}`, where :math:`p_{0,init}`
is the initial distribution of initial_model.
| ndarray(n) or float,
the given array will be used as A.
| 'uniform', :math:`a_i = 1`
| None, :math:`a_i = 0`. This option ensures coincidence between
* 'mixed' (default), :math:`a_i = p_{0,init}`, where :math:`p_{0,init}`
is the initial distribution of initial_model.
* ndarray(n) or float,
the given array will be used as A.
* 'uniform', :math:`a_i = 1`
* None, :math:`a_i = 0`. This option ensures coincidence between
sample mean an MLE. Will sooner or later lead to sampling problems,
because as soon as zero trajectories are drawn from a given state,
the sampler cannot recover and that state will never serve as a starting
Expand All @@ -86,22 +91,24 @@ def __init__(self, nstates=2, lag=1, stride='effective',
Currently implements Dirichlet priors if reversible=False and reversible
transition matrix priors as described in [3]_ if reversible=True. For the
nonreversible case the posterior of transition matrix :math:`P` is:
.. math:
P \sim \prod_{i,j} p_{ij}^{b_{ij} + c_{ij} - 1}
where :math:`c_{ij}` are the number of transitions found for hidden
trajectories and :math:`b_{ij}` are prior counts.
| 'mixed' (default), :math:`b_{ij} = p_{ij,init}`, where :math:`p_{ij,init}`
is the transition matrix of initial_model. That means one prior
count will be used per row.
| ndarray(n, n) or broadcastable,
the given array will be used as B.
| 'uniform', :math:`b_{ij} = 1`
| None, :math:`b_ij = 0`. This option ensures coincidence between
sample mean an MLE. Will sooner or later lead to sampling problems,
because as soon as a transition :math:`ij` will not occur in a
sample, the sampler cannot recover and that transition will never
be sampled again. This option is not recommended unless you have
a small HMM and a lot of data.
* 'mixed' (default), :math:`b_{ij} = p_{ij,init}`, where :math:`p_{ij,init}`
is the transition matrix of initial_model. That means one prior
count will be used per row.
* ndarray(n, n) or broadcastable,
the given array will be used as B.
* 'uniform', :math:`b_{ij} = 1`
* None, :math:`b_ij = 0`. This option ensures coincidence between
sample mean an MLE. Will sooner or later lead to sampling problems,
because as soon as a transition :math:`ij` will not occur in a
sample, the sampler cannot recover and that transition will never
be sampled again. This option is not recommended unless you have
a small HMM and a lot of data.
init_hmsm : :class:`HMSM <pyemma.msm.models.HMSM>`, default=None
Single-point estimate of HMSM object around which errors will be evaluated.
If None is give an initial estimate will be automatically generated using the
Expand Down Expand Up @@ -140,14 +147,6 @@ def __init__(self, nstates=2, lag=1, stride='effective',
self.show_progress = show_progress

def _estimate(self, dtrajs):
"""
Return
------
hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
Estimated Hidden Markov state model
"""
# ensure right format
dtrajs = ensure_dtraj_list(dtrajs)

Expand Down
21 changes: 15 additions & 6 deletions pyemma/msm/estimators/bayesian_msm.py
Expand Up @@ -17,17 +17,20 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import
from six.moves import range

__author__ = 'noe'
from six.moves import range

from pyemma.msm.models.msm import MSM as _MSM
from pyemma._base.progress import ProgressReporter
from pyemma.msm.estimators.maximum_likelihood_msm import MaximumLikelihoodMSM as _MLMSM
from pyemma.msm.models.msm import MSM as _MSM
from pyemma.msm.models.msm_sampled import SampledMSM as _SampledMSM
from pyemma.util.annotators import fix_docs
from pyemma.util.types import ensure_dtraj_list
from pyemma._base.progress import ProgressReporter

__author__ = 'noe'


@fix_docs
class BayesianMSM(_MLMSM, _SampledMSM, ProgressReporter):
r"""Bayesian Markov state model estimator"""

Expand Down Expand Up @@ -139,7 +142,7 @@ def __init__(self, lag=1, nsamples=100, nsteps=None, reversible=True,
self.conf = conf
self.show_progress = show_progress

def _estimate(self, dtrajs):
def estimate(self, dtrajs, **kw):
"""
Parameters
Expand All @@ -148,12 +151,18 @@ def _estimate(self, dtrajs):
discrete trajectories, stored as integer ndarrays (arbitrary size)
or a single ndarray for only one trajectory.
kw : dict
Other Parameters. See documentation of class.
Return
------
hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
msm : :class:`BayesianMSM <pyemma.msm.BayesianMSM>`
Estimated Hidden Markov state model
"""
return super(BayesianMSM, self).estimate(dtrajs, **kw)

def _estimate(self, dtrajs):
# ensure right format
dtrajs = ensure_dtraj_list(dtrajs)
# conduct MLE estimation (superclass) first
Expand Down
5 changes: 3 additions & 2 deletions pyemma/msm/estimators/implied_timescales.py
Expand Up @@ -419,7 +419,8 @@ def fraction_of_frames(self):
**Be aware**: this fraction refers to the **full count matrix**, and not that of the largest connected
set. Hence, the output is not necessarily the **active** fraction. For that, use the
:py:func:`EstimatedMSM.active_count_fraction` function of the :py:class:`EstimatedMSM` class object.
:py:meth:`activte_count_fraction <pyemma.msm.MaximumLikelihoodMSM.active_count_fraction>` function of
the :py:class:`pyemma.msm.MaximumLikelihoodMSM` class object or for HMM respectively.
"""
# TODO : implement fraction_of_active_frames
# Are we computing this for the first time?
Expand All @@ -432,4 +433,4 @@ def fraction_of_frames(self):
long_enough = np.argwhere(self._trajlengths-lag >= 1).squeeze()
self._fraction[ii] = self._trajlengths[long_enough].sum()/self._nframes

return self._fraction
return self._fraction
41 changes: 13 additions & 28 deletions pyemma/msm/estimators/maximum_likelihood_hmsm.py
Expand Up @@ -18,7 +18,7 @@

from __future__ import absolute_import
# from six.moves import range
from pyemma.util.annotators import alias, aliased
from pyemma.util.annotators import alias, aliased, fix_docs

import numpy as _np
import msmtools.estimation as msmest
Expand All @@ -29,16 +29,11 @@
from pyemma.util.units import TimeUnit


# TODO: This parameter was in the docstring but is not used:
# store_data : bool
# True: estimate() returns an :class:`pyemma.msm.EstimatedMSM` object
# with discrete trajectories and counts stored. False: estimate() returns
# a plain :class:`pyemma.msm.MSM` object that only contains the
# transition matrix and quantities derived from it.
# TODO: currently, it's not possible to start with disconnected matrices.


@aliased
@fix_docs
class MaximumLikelihoodHMSM(_Estimator, _HMSM):
r"""Maximum likelihood estimator for a Hidden MSM given a MSM"""

Expand All @@ -64,9 +59,10 @@ def __init__(self, nstates=2, lag=1, stride=1, msm_init='largest-strong', revers
stride in order to have statistically uncorrelated trajectories.
Setting stride = 'effective' uses the largest neglected timescale as
an estimate for the correlation time and sets the stride accordingly
msm_init : str or :class:`MSM <pyemma.msm.estimators.msm_estimated.MSM>`
msm_init : str or :class:`MSM <pyemma.msm.MSM>`
MSM object to initialize the estimation, or one of following keywords:
* 'largest-strong' | None (default) : Estimate MSM on the largest
* 'largest-strong' or None (default) : Estimate MSM on the largest
strongly connected set and use spectral clustering to generate an
initial HMM
* 'all' : Estimate MSM(s) on the full state space to initialize the
Expand All @@ -86,12 +82,12 @@ def __init__(self, nstates=2, lag=1, stride=1, msm_init='largest-strong', revers
matrix, stationary distribution, etc) are only defined on this subset
and are correspondingly smaller than nstates.
Following modes are available:
* None or 'all' : The active set is the full set of states.
Estimation is done on all weakly connected subsets separately. The
resulting transition matrix may be disconnected.
* 'largest' : The active set is the largest reversibly connected set.
* 'populous' : The active set is the reversibly connected set with
most counts.
* 'populous' : The active set is the reversibly connected set with most counts.
mincount_connectivity : float or '1/n'
minimum number of counts to consider a connection between two states.
Counts lower than that will count zero in the connectivity check and
Expand Down Expand Up @@ -146,20 +142,8 @@ def __init__(self, nstates=2, lag=1, stride=1, msm_init='largest-strong', revers
self.accuracy = accuracy
self.maxit = maxit


#TODO: store_data is mentioned but not implemented or used!
def _estimate(self, dtrajs):
"""
Parameters
----------
Return
------
hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
Estimated Hidden Markov state model
"""
import bhmm
# ensure right format
dtrajs = _types.ensure_dtraj_list(dtrajs)
Expand Down Expand Up @@ -494,10 +478,6 @@ def submodel_disconnect(self, mincount_connectivity='1/n'):
"""
return self.submodel(mincount_connectivity=mincount_connectivity)

################################################################################
# TODO: there is redundancy between this code and EstimatedMSM
################################################################################

def trajectory_weights(self):
r"""Uses the HMSM to assign a probability weight to each trajectory frame.
Expand All @@ -506,24 +486,28 @@ def trajectory_weights(self):
Returns a list of weight arrays, one for each trajectory, and with a number of elements equal to
trajectory frames. Given :math:`N` trajectories of lengths :math:`T_1` to :math:`T_N`, this function
returns corresponding weights:
.. math::
(w_{1,1}, ..., w_{1,T_1}), (w_{N,1}, ..., w_{N,T_N})
that are normalized to one:
.. math::
\sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} = 1
Suppose you are interested in computing the expectation value of a function :math:`a(x)`, where :math:`x`
are your input configurations. Use this function to compute the weights of all input configurations and
obtain the estimated expectation by:
.. math::
\langle a \rangle = \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} a(x_{i,t})
Or if you are interested in computing the time-lagged correlation between functions :math:`a(x)` and
:math:`b(x)` you could do:
.. math::
\langle a(t) b(t+\tau) \rangle_t = \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} a(x_{i,t}) a(x_{i,t+\tau})
Expand All @@ -532,6 +516,7 @@ def trajectory_weights(self):
-------
The normalized trajectory weights. Given :math:`N` trajectories of lengths :math:`T_1` to :math:`T_N`,
returns the corresponding weights:
.. math::
(w_{1,1}, ..., w_{1,T_1}), (w_{N,1}, ..., w_{N,T_N})
Expand Down Expand Up @@ -567,7 +552,7 @@ def observable_state_indexes(self):
"""
try: # if we have this attribute, return it
return self._observable_state_indexes
except: # didn't exist? then create it.
except AttributeError: # didn't exist? then create it.
import pyemma.util.discrete_trajectories as dt

self._observable_state_indexes = dt.index_states(self.discrete_trajectories_obs)
Expand Down
11 changes: 8 additions & 3 deletions pyemma/msm/estimators/maximum_likelihood_msm.py
Expand Up @@ -22,14 +22,16 @@
import numpy as _np
from msmtools import estimation as msmest

from pyemma.util.annotators import alias, aliased
from pyemma.util.annotators import alias, aliased, fix_docs
from pyemma.util.types import ensure_dtraj_list
from pyemma._base.estimator import Estimator as _Estimator
from pyemma.msm.estimators._dtraj_stats import DiscreteTrajectoryStats as _DiscreteTrajectoryStats
from pyemma.msm.models.msm import MSM as _MSM
from pyemma.util.units import TimeUnit as _TimeUnit
from pyemma.util import types as _types


@fix_docs
@aliased
class MaximumLikelihoodMSM(_Estimator, _MSM):
r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics"""
Expand Down Expand Up @@ -190,7 +192,7 @@ def _prepare_input_revpi(self, C, pi):
lcc = msmest.largest_connected_set(C_pos, directed=False)
return pos[lcc]

def _estimate(self, dtrajs):
def estimate(self, dtrajs, **parms):
"""
Parameters
----------
Expand All @@ -202,9 +204,12 @@ def _estimate(self, dtrajs):
Returns
-------
MSM : :class:`pyemma.msm.EstimatedMSM` or :class:`pyemma.msm.MSM`
MSM : :class:`pyemma.msm.MaximumlikelihoodMSM`
"""
return super(MaximumLikelihoodMSM, self).estimate(dtrajs, **parms)

def _estimate(self, dtrajs):
# ensure right format
dtrajs = ensure_dtraj_list(dtrajs)
# harvest discrete statistics
Expand Down

0 comments on commit 0b6f5e9

Please sign in to comment.