Skip to content

Commit

Permalink
Merge pull request #66 from rodrigo-arenas/0.7.X
Browse files Browse the repository at this point in the history
[PR] Param_grid with length one
  • Loading branch information
rodrigo-arenas committed Jul 8, 2021
2 parents eb46c17 + e9dfb5c commit 4889485
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 7 deletions.
15 changes: 15 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@ Release Notes

Some notes on new features in various releases


What's new in 0.7.0dev0
-----------------------

This is the current in-development version, these features are not yet
available via PyPI

^^^^^^^^^^
Bug Fixes:
^^^^^^^^^^

* When a param_grid of length 1 is provided, a user warning is raised instead of an error.
Internally it will swap the crossover operation to use the DEAP's :func:`~tools.cxSimulatedBinaryBounded`.


What's new in 0.6.0
-------------------

Expand Down
2 changes: 1 addition & 1 deletion sklearn_genetic/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0"
__version__ = "0.7.0dev0"
3 changes: 1 addition & 2 deletions sklearn_genetic/callbacks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def on_end(self, logbook=None, estimator=None):
"""
pass # pragma: no cover

def __call__(self, record=None, logbook=None, estimator=None):
return self.on_step(record, logbook, estimator)

2 changes: 1 addition & 1 deletion sklearn_genetic/callbacks/early_stoppers.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def on_step(self, record=None, logbook=None, estimator=None):
current_stat = logbook.select(self.metric)[-1]

# Compare the current metric with the last |generations| metrics
stats = logbook.select(self.metric)[(-self.generations - 1): -1]
stats = logbook.select(self.metric)[(-self.generations - 1) : -1]

if all(stat >= current_stat for stat in stats):
print(f"INFO: {self.__class__.__name__} callback met its criteria")
Expand Down
22 changes: 20 additions & 2 deletions sklearn_genetic/genetic_search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import random
import time
import warnings

import numpy as np
from deap import base, creator, tools
Expand Down Expand Up @@ -54,7 +55,9 @@ class GASearchCV(BaseSearchCV):
of hyperparameter based on the estimator selected and as values
one of :class:`~sklearn_genetic.space.Integer` ,
:class:`~sklearn_genetic.space.Categorical`
:class:`~sklearn_genetic.space.Continuous` classes
:class:`~sklearn_genetic.space.Continuous` classes.
At least two parameters are advised to be provided in order to successfully make
an optimization routine.
population_size : int, default=10
Size of the initial population to sample randomly generated individuals.
Expand Down Expand Up @@ -264,6 +267,12 @@ def __init__(
# Saves the param_grid and computes some extra properties in the same object
self.space = Space(param_grid)

if len(self.space) == 1:
warnings.warn(
"Warning, only one parameter was provided to the param_grid, the optimization routine "
"might not have effect, it's advised to use at least 2 parameters"
)

super(GASearchCV, self).__init__(
estimator=estimator,
scoring=scoring,
Expand Down Expand Up @@ -305,7 +314,16 @@ def _register(self):
"population", tools.initRepeat, list, self.toolbox.individual
)

self.toolbox.register("mate", tools.cxTwoPoint)
if len(self.space) == 1:
sampler = list(self.space.param_grid.values())[0]
lower, upper = sampler.lower, sampler.upper

self.toolbox.register(
"mate", tools.cxSimulatedBinaryBounded, low=lower, up=upper, eta=10
)
else:
self.toolbox.register("mate", tools.cxTwoPoint)

self.toolbox.register("mutate", self.mutate)
if self.elitism:
self.toolbox.register(
Expand Down
59 changes: 58 additions & 1 deletion sklearn_genetic/tests/test_genetic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
from sklearn.datasets import load_digits, load_boston
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.utils.validation import check_is_fitted
from sklearn.tree import DecisionTreeRegressor
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
import numpy as np

from .. import GASearchCV
from ..space import Integer, Categorical, Continuous
Expand Down Expand Up @@ -81,7 +83,7 @@ def test_expected_ga_results():
assert "param_l1_ratio" in cv_result_keys
assert "param_alpha" in cv_result_keys
assert "param_average" in cv_result_keys
assert "split1_test_score" in cv_result_keys
assert "split0_test_score" in cv_result_keys
assert "split1_test_score" in cv_result_keys
assert "split2_test_score" in cv_result_keys
assert "split0_train_score" in cv_result_keys
Expand Down Expand Up @@ -432,3 +434,58 @@ def test_no_param_grid():
)

assert str(excinfo.value) == "param_grid can not be empty"


def test_param_grid_one_param():
X = np.random.normal(75, 10, (1000, 2))
y = np.random.normal(200, 20, 1000)
y_labels = np.random.randint(0, 2, size=1000)

param_grid = {"degree": Integer(2, 6)}

with pytest.warns(UserWarning) as record:
evolved_estimator = GASearchCV(
estimator=SVR(),
cv=3,
population_size=4,
generations=5,
param_grid=param_grid,
criteria="max",
scoring="neg_mean_absolute_error",
error_score="raise",
n_jobs=-1,
verbose=True,
)

assert (
record[0].message.args[0]
== "Warning, only one parameter was provided to the param_grid, the optimization routine might not have effect, "
"it's advised to use at least 2 parameters"
)

evolved_estimator.fit(X, y_labels)

assert check_is_fitted(evolved_estimator) is None
assert "degree" in evolved_estimator.best_params_
assert len(evolved_estimator) == 5 + 1 # +1 random initial population
assert bool(evolved_estimator.get_params())
assert len(evolved_estimator.hof) == evolved_estimator.keep_top_k
assert "gen" in evolved_estimator[0]
assert "fitness_max" in evolved_estimator[0]
assert "fitness" in evolved_estimator[0]
assert "fitness_std" in evolved_estimator[0]
assert "fitness_min" in evolved_estimator[0]

cv_results_ = evolved_estimator.cv_results_
cv_result_keys = set(cv_results_.keys())

assert "param_degree" in cv_result_keys
assert "split0_test_score" in cv_result_keys
assert "split1_test_score" in cv_result_keys
assert "split2_test_score" in cv_result_keys
assert "mean_test_score" in cv_result_keys
assert "std_test_score" in cv_result_keys
assert "rank_test_score" in cv_result_keys
assert "std_fit_time" in cv_result_keys
assert "params" in cv_result_keys

0 comments on commit 4889485

Please sign in to comment.