fix bug in re-fetching indices, add tests

trevorstephens · Sep 5, 2016 · ac13cee · ac13cee
1 parent d3bdac2
commit ac13cee
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 33 deletions.
diff --git a/gplearn/_program.py b/gplearn/_program.py
@@ -362,8 +362,8 @@ def execute(self, X):
         np.seterr(**old_settings)
         return None
 
-    def _get_all_indices(self, n_samples=None, max_samples=None,
-                         random_state=None):
+    def get_all_indices(self, n_samples=None, max_samples=None,
+                        random_state=None):
         """Get the indices on which to evaluate the fitness of a program.
 
         Parameters
@@ -400,17 +400,17 @@ def _get_all_indices(self, n_samples=None, max_samples=None,
         indices_state.set_state(self._indices_state)
 
         not_indices = sample_without_replacement(
-            n_samples,
-            n_samples - max_samples,
+            self._n_samples,
+            self._n_samples - self._max_samples,
             random_state=indices_state)
-        sample_counts = np.bincount(not_indices, minlength=n_samples)
+        sample_counts = np.bincount(not_indices, minlength=self._n_samples)
         indices = np.where(sample_counts == 0)[0]
 
         return indices, not_indices
 
     def _indices(self):
         """Get the indices used to measure the program's fitness."""
-        return self._get_all_indices[0]
+        return self.get_all_indices()[0]
 
     def raw_fitness(self, X, y, sample_weight):
         """Evaluate the raw fitness of the program according to X, y.

diff --git a/gplearn/functions.py b/gplearn/functions.py
@@ -79,9 +79,7 @@ def make_function(function, name, arity):
         raise ValueError('name must be a string, got %s' % type(name))
 
     # Check output shape
-    args = []
-    for i in range(arity):
-        args.append(np.ones(10))
+    args = [np.ones(10) for _ in range(arity)]
     try:
         function(*args)
     except ValueError:
@@ -95,15 +93,11 @@ def make_function(function, name, arity):
                          'input vectors.' % name)
 
     # Check closure for zero & negative input arguments
-    args = []
-    for i in range(arity):
-        args.append(np.zeros(10))
+    args = [np.zeros(10) for _ in range(arity)]
     if not np.all(np.isfinite(function(*args))):
         raise ValueError('supplied function %s does not have closure against '
                          'zeros in argument vectors.' % name)
-    args = []
-    for i in range(arity):
-        args.append(-1 * np.ones(10))
+    args = [-1 * np.ones(10) for _ in range(arity)]
     if not np.all(np.isfinite(function(*args))):
         raise ValueError('supplied function %s does not have closure against '
                          'negatives in argument vectors.' % name)

diff --git a/gplearn/genetic.py b/gplearn/genetic.py
@@ -132,9 +132,9 @@ def _tournament():
             curr_sample_weight = sample_weight.copy()
         oob_sample_weight = curr_sample_weight.copy()
 
-        indices, not_indices = program._get_all_indices(n_samples,
-                                                        max_samples,
-                                                        random_state)
+        indices, not_indices = program.get_all_indices(n_samples,
+                                                       max_samples,
+                                                       random_state)
 
         curr_sample_weight[not_indices] = 0
         oob_sample_weight[indices] = 0
@@ -208,10 +208,7 @@ def _verbose_reporter(self,
                           gen=None,
                           population=None,
                           fitness=None,
-                          length=None,
-                          X=None,
-                          y=None,
-                          sample_weight=None):
+                          length=None):
         """A report of the progress of the evolution process.
 
         Parameters
@@ -230,16 +227,6 @@ def _verbose_reporter(self,
 
         length : list
             The current population's lengths.
-
-        X : {array-like}, shape = [n_samples, n_features]
-            Training vectors, where n_samples is the number of samples and
-            n_features is the number of features.
-
-        y : array-like, shape = [n_samples]
-            Target values.
-
-        sample_weight : array-like, shape = [n_samples], optional
-            Weights applied to individual samples.
         """
         if start_time is None:
             print('%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25),
@@ -441,7 +428,7 @@ def fit(self, X, y, sample_weight=None):
 
             if self.verbose:
                 self._verbose_reporter(start_time, gen, population, fitness,
-                                       length, X, y, sample_weight)
+                                       length)
 
             # Check for early stopping
             if self.metric in ('pearson', 'spearman'):

diff --git a/gplearn/tests/test_genetic.py b/gplearn/tests/test_genetic.py
@@ -29,6 +29,7 @@
 from gplearn.skutils.testing import assert_false, assert_true
 from gplearn.skutils.testing import assert_greater
 from gplearn.skutils.testing import assert_equal, assert_almost_equal
+from gplearn.skutils.testing import assert_array_equal
 from gplearn.skutils.testing import assert_array_almost_equal
 from gplearn.skutils.testing import assert_raises
 from gplearn.skutils.validation import check_random_state
@@ -936,6 +937,37 @@ def test_validate_functions():
         assert_raises(ValueError, est.fit, boston.data, boston.target)
 
 
+def test_indices():
+    """Check that indices are stable when generated on the fly."""
+
+    params = {'function_set': [add2, sub2, mul2, div2],
+              'arities': {2: [add2, sub2, mul2, div2]},
+              'init_depth': (2, 6),
+              'init_method': 'half and half',
+              'n_features': 10,
+              'const_range': (-1.0, 1.0),
+              'metric': 'mean absolute error',
+              'p_point_replace': 0.05,
+              'parsimony_coefficient': 0.1}
+    random_state = check_random_state(415)
+    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
+    gp = _Program(random_state=random_state, program=test_gp, **params)
+
+    assert_raises(ValueError, gp.get_all_indices)
+    assert_raises(ValueError, gp._indices)
+
+    def get_indices_property():
+        return gp.indices_
+
+    assert_raises(ValueError, get_indices_property)
+
+    indices, _ = gp.get_all_indices(10, 7, random_state)
+
+    assert_array_equal(indices, gp.get_all_indices()[0])
+    assert_array_equal(indices, gp._indices())
+    assert_array_equal(indices, gp.indices_)
+
+
 if __name__ == "__main__":
     import nose
     nose.runmodule()