[doc] use fix_docs decorator in more estimators, fixed formatting, re…

…moved references to EstimatedMSM (#864) * [doc] use fix_docs decorator in more estimators, fixed formatting, removed references to EstimatedMSM
markovmodel · Jul 8, 2016 · 0b6f5e9 · 0b6f5e9
1 parent a3b91dc
commit 0b6f5e9
Show file tree

Hide file tree

Showing 11 changed files with 84 additions and 477 deletions.
diff --git a/pyemma/coordinates/clustering/interface.py b/pyemma/coordinates/clustering/interface.py
@@ -26,18 +26,19 @@
 
 import os
 
+import numpy as np
+from six.moves import range, zip
+
 from pyemma._base.model import Model
 from pyemma._ext.sklearn.base import ClusterMixin
 from pyemma.coordinates.clustering import regspatial
 from pyemma.coordinates.transform.transformer import StreamingTransformer
+from pyemma.util.annotators import fix_docs
 from pyemma.util.discrete_trajectories import index_states, sample_indexes_by_state
 from pyemma.util.files import mkdir_p
 
 
-from six.moves import range, zip
-import numpy as np
-
-
+@fix_docs
 class AbstractClustering(StreamingTransformer, Model, ClusterMixin):
 
     """

diff --git a/pyemma/coordinates/data/_base/datasource.py b/pyemma/coordinates/data/_base/datasource.py
@@ -233,7 +233,8 @@ def n_frames_total(self, stride=1):
 
         Returns
         -------
-        int : n_frames_total
+        n_frames_total : int
+            total number of frames.
         """
         if isinstance(stride, np.ndarray):
             return stride.shape[0]

diff --git a/pyemma/msm/api.py b/pyemma/msm/api.py
@@ -986,10 +986,6 @@ def estimate_hidden_markov_model(dtrajs, nstates, lag, reversible=True, stationa
     >>> print(mm.mfpt(0, 1))  # doctest: +ELLIPSIS
     6.3...
 
-    See also
-    --------
-    EstimatedHMSM : A discrete HMM object that has been estimated from data
-
 
     .. autoclass:: pyemma.msm.estimators.maximum_likelihood_hmsm.MaximumLikelihoodHMSM
         :members:

diff --git a/pyemma/msm/estimators/bayesian_hmsm.py b/pyemma/msm/estimators/bayesian_hmsm.py
@@ -17,19 +17,22 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from __future__ import absolute_import, print_function
-from six.moves import range
 
 import numpy as _np
-from pyemma.util.types import ensure_dtraj_list
+from six.moves import range
+
+from pyemma._base.progress import ProgressReporter
 from pyemma.msm.estimators.maximum_likelihood_hmsm import MaximumLikelihoodHMSM as _MaximumLikelihoodHMSM
 from pyemma.msm.models.hmsm import HMSM as _HMSM
 from pyemma.msm.models.hmsm_sampled import SampledHMSM as _SampledHMSM
-from pyemma._base.progress import ProgressReporter
+from pyemma.util.annotators import fix_docs
+from pyemma.util.types import ensure_dtraj_list
 from pyemma.util.units import TimeUnit
 
 __author__ = 'noe'
 
 
+@fix_docs
 class BayesianHMSM(_MaximumLikelihoodHMSM, _SampledHMSM, ProgressReporter):
     r"""Estimator for a Bayesian Hidden Markov state model"""
 
@@ -64,17 +67,19 @@ def __init__(self, nstates=2, lag=1, stride='effective',
             the stationary distribution of the transition matrix).
             Currently implements different versions of the Dirichlet prior that
             is conjugate to the Dirichlet distribution of p0. p0 is sampled from:
+
             .. math:
                 p0 \sim \prod_i (p0)_i^{a_i + n_i - 1}
             where :math:`n_i` are the number of times a hidden trajectory was in
             state :math:`i` at time step 0 and :math:`a_i` is the prior count.
             Following options are available:
-            |  'mixed' (default),  :math:`a_i = p_{0,init}`, where :math:`p_{0,init}`
-                is the initial distribution of initial_model.
-            |  ndarray(n) or float,
-                the given array will be used as A.
-            |  'uniform',  :math:`a_i = 1`
-            |  None,  :math:`a_i = 0`. This option ensures coincidence between
+
+            * 'mixed' (default),  :math:`a_i = p_{0,init}`, where :math:`p_{0,init}`
+              is the initial distribution of initial_model.
+            *  ndarray(n) or float,
+               the given array will be used as A.
+            *  'uniform',  :math:`a_i = 1`
+            *   None,  :math:`a_i = 0`. This option ensures coincidence between
                 sample mean an MLE. Will sooner or later lead to sampling problems,
                 because as soon as zero trajectories are drawn from a given state,
                 the sampler cannot recover and that state will never serve as a starting
@@ -86,22 +91,24 @@ def __init__(self, nstates=2, lag=1, stride='effective',
             Currently implements Dirichlet priors if reversible=False and reversible
             transition matrix priors as described in [3]_ if reversible=True. For the
             nonreversible case the posterior of transition matrix :math:`P` is:
+
             .. math:
                 P \sim \prod_{i,j} p_{ij}^{b_{ij} + c_{ij} - 1}
             where :math:`c_{ij}` are the number of transitions found for hidden
             trajectories and :math:`b_{ij}` are prior counts.
-            |  'mixed' (default),  :math:`b_{ij} = p_{ij,init}`, where :math:`p_{ij,init}`
-                is the transition matrix of initial_model. That means one prior
-                count will be used per row.
-            |  ndarray(n, n) or broadcastable,
-                the given array will be used as B.
-            |  'uniform',  :math:`b_{ij} = 1`
-            |  None,  :math:`b_ij = 0`. This option ensures coincidence between
-                sample mean an MLE. Will sooner or later lead to sampling problems,
-                because as soon as a transition :math:`ij` will not occur in a
-                sample, the sampler cannot recover and that transition will never
-                be sampled again. This option is not recommended unless you have
-                a small HMM and a lot of data.
+
+            * 'mixed' (default),  :math:`b_{ij} = p_{ij,init}`, where :math:`p_{ij,init}`
+              is the transition matrix of initial_model. That means one prior
+              count will be used per row.
+            * ndarray(n, n) or broadcastable,
+              the given array will be used as B.
+            * 'uniform',  :math:`b_{ij} = 1`
+            * None,  :math:`b_ij = 0`. This option ensures coincidence between
+              sample mean an MLE. Will sooner or later lead to sampling problems,
+              because as soon as a transition :math:`ij` will not occur in a
+              sample, the sampler cannot recover and that transition will never
+              be sampled again. This option is not recommended unless you have
+              a small HMM and a lot of data.
         init_hmsm : :class:`HMSM <pyemma.msm.models.HMSM>`, default=None
             Single-point estimate of HMSM object around which errors will be evaluated.
             If None is give an initial estimate will be automatically generated using the
@@ -140,14 +147,6 @@ def __init__(self, nstates=2, lag=1, stride='effective',
         self.show_progress = show_progress
 
     def _estimate(self, dtrajs):
-        """
-
-        Return
-        ------
-        hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
-            Estimated Hidden Markov state model
-
-        """
         # ensure right format
         dtrajs = ensure_dtraj_list(dtrajs)
 

diff --git a/pyemma/msm/estimators/bayesian_msm.py b/pyemma/msm/estimators/bayesian_msm.py
@@ -17,17 +17,20 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from __future__ import absolute_import
-from six.moves import range
 
-__author__ = 'noe'
+from six.moves import range
 
-from pyemma.msm.models.msm import MSM as _MSM
+from pyemma._base.progress import ProgressReporter
 from pyemma.msm.estimators.maximum_likelihood_msm import MaximumLikelihoodMSM as _MLMSM
+from pyemma.msm.models.msm import MSM as _MSM
 from pyemma.msm.models.msm_sampled import SampledMSM as _SampledMSM
+from pyemma.util.annotators import fix_docs
 from pyemma.util.types import ensure_dtraj_list
-from pyemma._base.progress import ProgressReporter
+
+__author__ = 'noe'
 
 
+@fix_docs
 class BayesianMSM(_MLMSM, _SampledMSM, ProgressReporter):
     r"""Bayesian Markov state model estimator"""
 
@@ -139,7 +142,7 @@ def __init__(self, lag=1, nsamples=100, nsteps=None, reversible=True,
         self.conf = conf
         self.show_progress = show_progress
 
-    def _estimate(self, dtrajs):
+    def estimate(self, dtrajs, **kw):
         """
 
         Parameters
@@ -148,12 +151,18 @@ def _estimate(self, dtrajs):
             discrete trajectories, stored as integer ndarrays (arbitrary size)
             or a single ndarray for only one trajectory.
 
+        kw : dict
+            Other Parameters. See documentation of class.
+
         Return
         ------
-        hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
+        msm : :class:`BayesianMSM <pyemma.msm.BayesianMSM>`
             Estimated Hidden Markov state model
 
         """
+        return super(BayesianMSM, self).estimate(dtrajs, **kw)
+
+    def _estimate(self, dtrajs):
         # ensure right format
         dtrajs = ensure_dtraj_list(dtrajs)
         # conduct MLE estimation (superclass) first

diff --git a/pyemma/msm/estimators/implied_timescales.py b/pyemma/msm/estimators/implied_timescales.py
@@ -419,7 +419,8 @@ def fraction_of_frames(self):
 
         **Be aware**: this fraction refers to the **full count matrix**, and not that of the largest connected
         set. Hence, the output is not necessarily the **active** fraction. For that, use the
-        :py:func:`EstimatedMSM.active_count_fraction` function of the :py:class:`EstimatedMSM` class object.
+        :py:meth:`activte_count_fraction <pyemma.msm.MaximumLikelihoodMSM.active_count_fraction>` function of
+        the :py:class:`pyemma.msm.MaximumLikelihoodMSM` class object or for HMM respectively.
         """
         # TODO : implement fraction_of_active_frames
         # Are we computing this for the first time?
@@ -432,4 +433,4 @@ def fraction_of_frames(self):
                 long_enough = np.argwhere(self._trajlengths-lag >= 1).squeeze()
                 self._fraction[ii] = self._trajlengths[long_enough].sum()/self._nframes
 
-        return self._fraction
+        return self._fraction
diff --git a/pyemma/msm/estimators/maximum_likelihood_hmsm.py b/pyemma/msm/estimators/maximum_likelihood_hmsm.py
@@ -18,7 +18,7 @@
 
 from __future__ import absolute_import
 # from six.moves import range
-from pyemma.util.annotators import alias, aliased
+from pyemma.util.annotators import alias, aliased, fix_docs
 
 import numpy as _np
 import msmtools.estimation as msmest
@@ -29,16 +29,11 @@
 from pyemma.util.units import TimeUnit
 
 
-# TODO: This parameter was in the docstring but is not used:
-#     store_data : bool
-#         True: estimate() returns an :class:`pyemma.msm.EstimatedMSM` object
-#         with discrete trajectories and counts stored. False: estimate() returns
-#         a plain :class:`pyemma.msm.MSM` object that only contains the
-#         transition matrix and quantities derived from it.
 # TODO: currently, it's not possible to start with disconnected matrices.
 
 
 @aliased
+@fix_docs
 class MaximumLikelihoodHMSM(_Estimator, _HMSM):
     r"""Maximum likelihood estimator for a Hidden MSM given a MSM"""
 
@@ -64,9 +59,10 @@ def __init__(self, nstates=2, lag=1, stride=1, msm_init='largest-strong', revers
             stride in order to have statistically uncorrelated trajectories.
             Setting stride = 'effective' uses the largest neglected timescale as
             an estimate for the correlation time and sets the stride accordingly
-        msm_init : str or :class:`MSM <pyemma.msm.estimators.msm_estimated.MSM>`
+        msm_init : str or :class:`MSM <pyemma.msm.MSM>`
             MSM object to initialize the estimation, or one of following keywords:
-            * 'largest-strong' | None (default) : Estimate MSM on the largest
+
+            * 'largest-strong' or None (default) : Estimate MSM on the largest
                 strongly connected set and use spectral clustering to generate an
                 initial HMM
             * 'all' : Estimate MSM(s) on the full state space to initialize the
@@ -86,12 +82,12 @@ def __init__(self, nstates=2, lag=1, stride=1, msm_init='largest-strong', revers
             matrix, stationary distribution, etc) are only defined on this subset
             and are correspondingly smaller than nstates.
             Following modes are available:
+
             * None or 'all' : The active set is the full set of states.
               Estimation is done on all weakly connected subsets separately. The
               resulting transition matrix may be disconnected.
             * 'largest' : The active set is the largest reversibly connected set.
-            * 'populous' : The active set is the reversibly connected set with
-               most counts.
+            * 'populous' : The active set is the reversibly connected set with most counts.
         mincount_connectivity : float or '1/n'
             minimum number of counts to consider a connection between two states.
             Counts lower than that will count zero in the connectivity check and
@@ -146,20 +142,8 @@ def __init__(self, nstates=2, lag=1, stride=1, msm_init='largest-strong', revers
         self.accuracy = accuracy
         self.maxit = maxit
 
-
     #TODO: store_data is mentioned but not implemented or used!
     def _estimate(self, dtrajs):
-        """
-
-        Parameters
-        ----------
-
-        Return
-        ------
-        hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
-            Estimated Hidden Markov state model
-
-        """
         import bhmm
         # ensure right format
         dtrajs = _types.ensure_dtraj_list(dtrajs)
@@ -494,10 +478,6 @@ def submodel_disconnect(self, mincount_connectivity='1/n'):
         """
         return self.submodel(mincount_connectivity=mincount_connectivity)
 
-    ################################################################################
-    # TODO: there is redundancy between this code and EstimatedMSM
-    ################################################################################
-
     def trajectory_weights(self):
         r"""Uses the HMSM to assign a probability weight to each trajectory frame.
 
@@ -506,24 +486,28 @@ def trajectory_weights(self):
         Returns a list of weight arrays, one for each trajectory, and with a number of elements equal to
         trajectory frames. Given :math:`N` trajectories of lengths :math:`T_1` to :math:`T_N`, this function
         returns corresponding weights:
+
         .. math::
 
             (w_{1,1}, ..., w_{1,T_1}), (w_{N,1}, ..., w_{N,T_N})
 
         that are normalized to one:
+
         .. math::
 
             \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} = 1
 
         Suppose you are interested in computing the expectation value of a function :math:`a(x)`, where :math:`x`
         are your input configurations. Use this function to compute the weights of all input configurations and
         obtain the estimated expectation by:
+
         .. math::
 
             \langle a \rangle = \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} a(x_{i,t})
 
         Or if you are interested in computing the time-lagged correlation between functions :math:`a(x)` and
         :math:`b(x)` you could do:
+
         .. math::
 
             \langle a(t) b(t+\tau) \rangle_t = \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} a(x_{i,t}) a(x_{i,t+\tau})
@@ -532,6 +516,7 @@ def trajectory_weights(self):
         -------
         The normalized trajectory weights. Given :math:`N` trajectories of lengths :math:`T_1` to :math:`T_N`,
         returns the corresponding weights:
+
         .. math::
 
             (w_{1,1}, ..., w_{1,T_1}), (w_{N,1}, ..., w_{N,T_N})
@@ -567,7 +552,7 @@ def observable_state_indexes(self):
         """
         try:  # if we have this attribute, return it
             return self._observable_state_indexes
-        except:  # didn't exist? then create it.
+        except AttributeError:  # didn't exist? then create it.
             import pyemma.util.discrete_trajectories as dt
 
             self._observable_state_indexes = dt.index_states(self.discrete_trajectories_obs)

diff --git a/pyemma/msm/estimators/maximum_likelihood_msm.py b/pyemma/msm/estimators/maximum_likelihood_msm.py
@@ -22,14 +22,16 @@
 import numpy as _np
 from msmtools import estimation as msmest
 
-from pyemma.util.annotators import alias, aliased
+from pyemma.util.annotators import alias, aliased, fix_docs
 from pyemma.util.types import ensure_dtraj_list
 from pyemma._base.estimator import Estimator as _Estimator
 from pyemma.msm.estimators._dtraj_stats import DiscreteTrajectoryStats as _DiscreteTrajectoryStats
 from pyemma.msm.models.msm import MSM as _MSM
 from pyemma.util.units import TimeUnit as _TimeUnit
 from pyemma.util import types as _types
 
+
+@fix_docs
 @aliased
 class MaximumLikelihoodMSM(_Estimator, _MSM):
     r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics"""
@@ -190,7 +192,7 @@ def _prepare_input_revpi(self, C, pi):
         lcc = msmest.largest_connected_set(C_pos, directed=False)
         return pos[lcc]
 
-    def _estimate(self, dtrajs):
+    def estimate(self, dtrajs, **parms):
         """
         Parameters
         ----------
@@ -202,9 +204,12 @@ def _estimate(self, dtrajs):
 
         Returns
         -------
-        MSM : :class:`pyemma.msm.EstimatedMSM` or :class:`pyemma.msm.MSM`
+        MSM : :class:`pyemma.msm.MaximumlikelihoodMSM`
 
         """
+        return super(MaximumLikelihoodMSM, self).estimate(dtrajs, **parms)
+
+    def _estimate(self, dtrajs):
         # ensure right format
         dtrajs = ensure_dtraj_list(dtrajs)
         # harvest discrete statistics