Skip to content

Commit

Permalink
fix bug in re-fetching indices, add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorstephens committed Sep 5, 2016
1 parent d3bdac2 commit ac13cee
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 33 deletions.
12 changes: 6 additions & 6 deletions gplearn/_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,8 @@ def execute(self, X):
np.seterr(**old_settings)
return None

def _get_all_indices(self, n_samples=None, max_samples=None,
random_state=None):
def get_all_indices(self, n_samples=None, max_samples=None,
random_state=None):
"""Get the indices on which to evaluate the fitness of a program.
Parameters
Expand Down Expand Up @@ -400,17 +400,17 @@ def _get_all_indices(self, n_samples=None, max_samples=None,
indices_state.set_state(self._indices_state)

not_indices = sample_without_replacement(
n_samples,
n_samples - max_samples,
self._n_samples,
self._n_samples - self._max_samples,
random_state=indices_state)
sample_counts = np.bincount(not_indices, minlength=n_samples)
sample_counts = np.bincount(not_indices, minlength=self._n_samples)
indices = np.where(sample_counts == 0)[0]

return indices, not_indices

def _indices(self):
"""Get the indices used to measure the program's fitness."""
return self._get_all_indices[0]
return self.get_all_indices()[0]

def raw_fitness(self, X, y, sample_weight):
"""Evaluate the raw fitness of the program according to X, y.
Expand Down
12 changes: 3 additions & 9 deletions gplearn/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ def make_function(function, name, arity):
raise ValueError('name must be a string, got %s' % type(name))

# Check output shape
args = []
for i in range(arity):
args.append(np.ones(10))
args = [np.ones(10) for _ in range(arity)]
try:
function(*args)
except ValueError:
Expand All @@ -95,15 +93,11 @@ def make_function(function, name, arity):
'input vectors.' % name)

# Check closure for zero & negative input arguments
args = []
for i in range(arity):
args.append(np.zeros(10))
args = [np.zeros(10) for _ in range(arity)]
if not np.all(np.isfinite(function(*args))):
raise ValueError('supplied function %s does not have closure against '
'zeros in argument vectors.' % name)
args = []
for i in range(arity):
args.append(-1 * np.ones(10))
args = [-1 * np.ones(10) for _ in range(arity)]
if not np.all(np.isfinite(function(*args))):
raise ValueError('supplied function %s does not have closure against '
'negatives in argument vectors.' % name)
Expand Down
23 changes: 5 additions & 18 deletions gplearn/genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ def _tournament():
curr_sample_weight = sample_weight.copy()
oob_sample_weight = curr_sample_weight.copy()

indices, not_indices = program._get_all_indices(n_samples,
max_samples,
random_state)
indices, not_indices = program.get_all_indices(n_samples,
max_samples,
random_state)

curr_sample_weight[not_indices] = 0
oob_sample_weight[indices] = 0
Expand Down Expand Up @@ -208,10 +208,7 @@ def _verbose_reporter(self,
gen=None,
population=None,
fitness=None,
length=None,
X=None,
y=None,
sample_weight=None):
length=None):
"""A report of the progress of the evolution process.
Parameters
Expand All @@ -230,16 +227,6 @@ def _verbose_reporter(self,
length : list
The current population's lengths.
X : {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
sample_weight : array-like, shape = [n_samples], optional
Weights applied to individual samples.
"""
if start_time is None:
print('%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25),
Expand Down Expand Up @@ -441,7 +428,7 @@ def fit(self, X, y, sample_weight=None):

if self.verbose:
self._verbose_reporter(start_time, gen, population, fitness,
length, X, y, sample_weight)
length)

# Check for early stopping
if self.metric in ('pearson', 'spearman'):
Expand Down
32 changes: 32 additions & 0 deletions gplearn/tests/test_genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from gplearn.skutils.testing import assert_false, assert_true
from gplearn.skutils.testing import assert_greater
from gplearn.skutils.testing import assert_equal, assert_almost_equal
from gplearn.skutils.testing import assert_array_equal
from gplearn.skutils.testing import assert_array_almost_equal
from gplearn.skutils.testing import assert_raises
from gplearn.skutils.validation import check_random_state
Expand Down Expand Up @@ -936,6 +937,37 @@ def test_validate_functions():
assert_raises(ValueError, est.fit, boston.data, boston.target)


def test_indices():
"""Check that indices are stable when generated on the fly."""

params = {'function_set': [add2, sub2, mul2, div2],
'arities': {2: [add2, sub2, mul2, div2]},
'init_depth': (2, 6),
'init_method': 'half and half',
'n_features': 10,
'const_range': (-1.0, 1.0),
'metric': 'mean absolute error',
'p_point_replace': 0.05,
'parsimony_coefficient': 0.1}
random_state = check_random_state(415)
test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
gp = _Program(random_state=random_state, program=test_gp, **params)

assert_raises(ValueError, gp.get_all_indices)
assert_raises(ValueError, gp._indices)

def get_indices_property():
return gp.indices_

assert_raises(ValueError, get_indices_property)

indices, _ = gp.get_all_indices(10, 7, random_state)

assert_array_equal(indices, gp.get_all_indices()[0])
assert_array_equal(indices, gp._indices())
assert_array_equal(indices, gp.indices_)


if __name__ == "__main__":
import nose
nose.runmodule()

0 comments on commit ac13cee

Please sign in to comment.