nodes: Add ._new_covariance_matrix() for easy extension

It's awkward to write an extension tweaking extend a class' .__init__() method, so we pull the CovarianceMatrix() call out into its own method. Tiziano wants extension initialization to effect untrained (but already initialized) instances, so we postpone calling the new method until we hit the training phase (with the hasattr() check for the first training call). Existing calls to CovarianceMatrix are not particularly consistent with respect to their use of `bias`, which is in any case deprecated in favor of ddof since NumPy 1.5. I think this should be cleaned up in MDP as well, but have left the initialization calls as they stand during this commit's refactoring.
mdp-toolkit · Mar 27, 2013 · 4ee8e89 · 4ee8e89
1 parent 561bb5e
commit 4ee8e89
Show file tree

Hide file tree

Showing 6 changed files with 33 additions and 12 deletions.
diff --git a/mdp/nodes/classifier_nodes.py b/mdp/nodes/classifier_nodes.py
@@ -379,9 +379,12 @@ def _check_train_args(self, x, labels):
                    "datapoints (%d != %d)" % (len(labels), x.shape[0]))
             raise mdp.TrainingException(msg)
 
+    def _new_covariance_matrix(self):
+        return utils.CovarianceMatrix(dtype=self.dtype)
+
     def _update_covs(self, x, lbl):
         if lbl not in self._cov_objs:
-            self._cov_objs[lbl] = utils.CovarianceMatrix(dtype=self.dtype)
+            self._cov_objs[lbl] = self._new_covariance_matrix()
         self._cov_objs[lbl].update(x)
 
     def _train(self, x, labels):

diff --git a/mdp/nodes/em_nodes.py b/mdp/nodes/em_nodes.py
@@ -63,9 +63,13 @@ def __init__(self, tol=1e-4, max_cycles=100, verbose=False,
         self.tol = tol
         self.max_cycles = max_cycles
         self.verbose = verbose
-        self._cov_mtx = CovarianceMatrix(dtype, bias=True)
+
+    def _new_covariance_matrix(self):
+        return CovarianceMatrix(dtype=self.dtype, bias=True)
 
     def _train(self, x):
+        if not hasattr(self, '_cov_mtx'):
+            self._cov_mtx = self._new_covariance_matrix()
         # update the covariance matrix
         self._cov_mtx.update(x)
 

diff --git a/mdp/nodes/fda_nodes.py b/mdp/nodes/fda_nodes.py
@@ -48,7 +48,6 @@ def __init__(self, input_dim=None, output_dim=None, dtype=None):
         # is deleted after training
         self._S_W = None
         # covariance matrix of the full data distribution
-        self._allcov = mdp.utils.CovarianceMatrix(dtype=self.dtype)
         self.means = {}  # maps class labels to the class means
         self.tlens = {}  # maps class labels to number of training points
         self.v = None  # transposed of the projection matrix
@@ -91,6 +90,9 @@ def _update_means(self, x, label):
         self.means[label] += x.sum(axis=0)
         self.tlens[label] += x.shape[0]
 
+    def _new_covariance_matrix(self):
+        return mdp.utils.CovarianceMatrix(dtype=self.dtype)
+
     # Training step 2: compute the overall and within-class covariance
     # matrices and solve the FDA problem
 
@@ -100,6 +102,8 @@ def _train_fda(self, x, labels):
             self._S_W = numx.zeros((self.input_dim, self.input_dim),
                                    dtype=self.dtype)
         # update the covariance matrix of all classes
+        if not hasattr(self, '_allcov'):
+            self._allcov = self._new_covariance_matrix()
         self._allcov.update(x)
         # if labels is a number, all x's belong to the same class
         if isinstance(labels, (list, tuple, numx.ndarray)):

diff --git a/mdp/nodes/pca_nodes.py b/mdp/nodes/pca_nodes.py
@@ -69,8 +69,6 @@ def __init__(self, input_dim=None, output_dim=None, dtype=None,
         self.var_rel = var_rel
         self.var_part = var_part
         self.reduce = reduce
-        # empirical covariance matrix, updated during the training phase
-        self._cov_mtx = CovarianceMatrix(dtype)
         # attributes that defined in stop_training
         self.d = None  # eigenvalues
         self.v = None  # eigenvectors, first index for coordinates
@@ -107,7 +105,12 @@ def get_explained_variance(self):
         """
         return self.explained_variance
 
+    def _new_covariance_matrix(self):
+        return CovarianceMatrix(dtype=self.dtype)
+
     def _train(self, x):
+        if not hasattr(self, '_cov_mtx'):
+            self._cov_mtx = self._new_covariance_matrix()
         # update the covariance matrix
         self._cov_mtx.update(x)
 

diff --git a/mdp/nodes/sfa_nodes.py b/mdp/nodes/sfa_nodes.py
@@ -77,12 +77,6 @@ def __init__(self, input_dim=None, output_dim=None, dtype=None,
         super(SFANode, self).__init__(input_dim, output_dim, dtype)
         self._include_last_sample = include_last_sample
 
-        # init two covariance matrices
-        # one for the input data
-        self._cov_mtx = CovarianceMatrix(dtype)
-        # one for the derivatives
-        self._dcov_mtx = CovarianceMatrix(dtype)
-
         # set routine for eigenproblem
         self._symeig = symeig
 
@@ -116,6 +110,9 @@ def _check_train_args(self, x, *args, **kwargs):
             raise TrainingException('Need at least 2 time samples to '
                                     'compute time derivative (%d given)'%s)
 
+    def _new_covariance_matrix(self):
+        return CovarianceMatrix(dtype=self.dtype)
+
     def _train(self, x, include_last_sample=None):
         """
         For the ``include_last_sample`` switch have a look at the
@@ -127,6 +124,12 @@ def _train(self, x, include_last_sample=None):
         last_sample_index = None if include_last_sample else -1
 
         # update the covariance matrices
+        if not hasattr(self, '_cov_mtx'):
+            # init two covariance matrices
+            # one for the input data
+            self._cov_mtx = self._new_covariance_matrix()
+            # one for the derivatives
+            self._dcov_mtx = self._new_covariance_matrix()
         self._cov_mtx.update(x[:last_sample_index, :])
         self._dcov_mtx.update(self.time_derivative(x))
 

diff --git a/mdp/nodes/xsfa_nodes.py b/mdp/nodes/xsfa_nodes.py
@@ -300,14 +300,18 @@ def _execute(self, x):
 class NormalizeNode(mdp.PreserveDimNode):
     """Make input signal meanfree and unit variance"""
     def __init__(self, input_dim=None, output_dim=None, dtype=None):
-        self._cov_mtx = mdp.utils.CovarianceMatrix(dtype)
         super(NormalizeNode, self).__init__(input_dim, output_dim, dtype)
 
     @staticmethod
     def is_trainable():
         return True
 
+    def _new_covariance_matrix(self):
+        return CovarianceMatrix(dtype=self.dtype)
+
     def _train(self, x):
+        if not hasattr(self, '_cov_mtx'):
+            self._cov_mtx = self._new_covariance_matrix()
         self._cov_mtx.update(x)
 
     def _stop_training(self):