Merge pull request #66 from rodrigo-arenas/0.7.X

[PR] Param_grid with length one
rodrigo-arenas · Jul 8, 2021 · 4889485 · 4889485
2 parents eb46c17 + e9dfb5c
commit 4889485
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 7 deletions.
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
@@ -3,6 +3,21 @@ Release Notes
 
 Some notes on new features in various releases
 
+
+What's new in 0.7.0dev0
+-----------------------
+
+This is the current in-development version, these features are not yet
+available via PyPI
+
+^^^^^^^^^^
+Bug Fixes:
+^^^^^^^^^^
+
+* When a param_grid of length 1 is provided, a user warning is raised instead of an error.
+  Internally it will swap the crossover operation to use the DEAP's :func:`~tools.cxSimulatedBinaryBounded`.
+
+
 What's new in 0.6.0
 -------------------
 

diff --git a/sklearn_genetic/_version.py b/sklearn_genetic/_version.py
@@ -1 +1 @@
-__version__ = "0.6.0"
+__version__ = "0.7.0dev0"
diff --git a/sklearn_genetic/callbacks/base.py b/sklearn_genetic/callbacks/base.py
@@ -54,7 +54,6 @@ def on_end(self, logbook=None, estimator=None):
 
         """
         pass  # pragma: no cover
-    
+
     def __call__(self, record=None, logbook=None, estimator=None):
         return self.on_step(record, logbook, estimator)
-
diff --git a/sklearn_genetic/callbacks/early_stoppers.py b/sklearn_genetic/callbacks/early_stoppers.py
@@ -76,7 +76,7 @@ def on_step(self, record=None, logbook=None, estimator=None):
                 current_stat = logbook.select(self.metric)[-1]
 
             # Compare the current metric with the last |generations| metrics
-            stats = logbook.select(self.metric)[(-self.generations - 1): -1]
+            stats = logbook.select(self.metric)[(-self.generations - 1) : -1]
 
             if all(stat >= current_stat for stat in stats):
                 print(f"INFO: {self.__class__.__name__} callback met its criteria")

diff --git a/sklearn_genetic/genetic_search.py b/sklearn_genetic/genetic_search.py
@@ -1,5 +1,6 @@
 import random
 import time
+import warnings
 
 import numpy as np
 from deap import base, creator, tools
@@ -54,7 +55,9 @@ class GASearchCV(BaseSearchCV):
         of hyperparameter based on the estimator selected and as values
         one of :class:`~sklearn_genetic.space.Integer` ,
         :class:`~sklearn_genetic.space.Categorical`
-        :class:`~sklearn_genetic.space.Continuous` classes
+        :class:`~sklearn_genetic.space.Continuous` classes.
+        At least two parameters are advised to be provided in order to successfully make
+        an optimization routine.
 
     population_size : int, default=10
         Size of the initial population to sample randomly generated individuals.
@@ -264,6 +267,12 @@ def __init__(
         # Saves the param_grid and computes some extra properties in the same object
         self.space = Space(param_grid)
 
+        if len(self.space) == 1:
+            warnings.warn(
+                "Warning, only one parameter was provided to the param_grid, the optimization routine "
+                "might not have effect, it's advised to use at least 2 parameters"
+            )
+
         super(GASearchCV, self).__init__(
             estimator=estimator,
             scoring=scoring,
@@ -305,7 +314,16 @@ def _register(self):
             "population", tools.initRepeat, list, self.toolbox.individual
         )
 
-        self.toolbox.register("mate", tools.cxTwoPoint)
+        if len(self.space) == 1:
+            sampler = list(self.space.param_grid.values())[0]
+            lower, upper = sampler.lower, sampler.upper
+
+            self.toolbox.register(
+                "mate", tools.cxSimulatedBinaryBounded, low=lower, up=upper, eta=10
+            )
+        else:
+            self.toolbox.register("mate", tools.cxTwoPoint)
+
         self.toolbox.register("mutate", self.mutate)
         if self.elitism:
             self.toolbox.register(

diff --git a/sklearn_genetic/tests/test_genetic_search.py b/sklearn_genetic/tests/test_genetic_search.py
@@ -2,11 +2,13 @@
 from sklearn.datasets import load_digits, load_boston
 from sklearn.linear_model import SGDClassifier
 from sklearn.tree import DecisionTreeClassifier
+from sklearn.svm import SVR
 from sklearn.model_selection import train_test_split
 from sklearn.utils.validation import check_is_fitted
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.cluster import KMeans
 from sklearn.metrics import accuracy_score
+import numpy as np
 
 from .. import GASearchCV
 from ..space import Integer, Categorical, Continuous
@@ -81,7 +83,7 @@ def test_expected_ga_results():
     assert "param_l1_ratio" in cv_result_keys
     assert "param_alpha" in cv_result_keys
     assert "param_average" in cv_result_keys
-    assert "split1_test_score" in cv_result_keys
+    assert "split0_test_score" in cv_result_keys
     assert "split1_test_score" in cv_result_keys
     assert "split2_test_score" in cv_result_keys
     assert "split0_train_score" in cv_result_keys
@@ -432,3 +434,58 @@ def test_no_param_grid():
         )
 
     assert str(excinfo.value) == "param_grid can not be empty"
+
+
+def test_param_grid_one_param():
+    X = np.random.normal(75, 10, (1000, 2))
+    y = np.random.normal(200, 20, 1000)
+    y_labels = np.random.randint(0, 2, size=1000)
+
+    param_grid = {"degree": Integer(2, 6)}
+
+    with pytest.warns(UserWarning) as record:
+        evolved_estimator = GASearchCV(
+            estimator=SVR(),
+            cv=3,
+            population_size=4,
+            generations=5,
+            param_grid=param_grid,
+            criteria="max",
+            scoring="neg_mean_absolute_error",
+            error_score="raise",
+            n_jobs=-1,
+            verbose=True,
+        )
+
+    assert (
+        record[0].message.args[0]
+        == "Warning, only one parameter was provided to the param_grid, the optimization routine might not have effect, "
+           "it's advised to use at least 2 parameters"
+    )
+
+    evolved_estimator.fit(X, y_labels)
+
+    assert check_is_fitted(evolved_estimator) is None
+    assert "degree" in evolved_estimator.best_params_
+    assert len(evolved_estimator) == 5 + 1  # +1 random initial population
+    assert bool(evolved_estimator.get_params())
+    assert len(evolved_estimator.hof) == evolved_estimator.keep_top_k
+    assert "gen" in evolved_estimator[0]
+    assert "fitness_max" in evolved_estimator[0]
+    assert "fitness" in evolved_estimator[0]
+    assert "fitness_std" in evolved_estimator[0]
+    assert "fitness_min" in evolved_estimator[0]
+
+    cv_results_ = evolved_estimator.cv_results_
+    cv_result_keys = set(cv_results_.keys())
+
+    assert "param_degree" in cv_result_keys
+    assert "split0_test_score" in cv_result_keys
+    assert "split1_test_score" in cv_result_keys
+    assert "split2_test_score" in cv_result_keys
+    assert "mean_test_score" in cv_result_keys
+    assert "std_test_score" in cv_result_keys
+    assert "rank_test_score" in cv_result_keys
+    assert "std_fit_time" in cv_result_keys
+    assert "params" in cv_result_keys
+