Skip to content

Commit

Permalink
Merge pull request #32 from rodrigo-arenas/0.6.Xdev
Browse files Browse the repository at this point in the history
[PR] Code comments and BaseCallback
  • Loading branch information
rodrigo-arenas committed Jun 23, 2021
2 parents 2cccdc8 + 08596fe commit 36953e8
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 38 deletions.
5 changes: 5 additions & 0 deletions docs/tutorials/custom_callback.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ that will have all this parameters, so we can rewrite it like this:
return self.on_step(record, logbook, estimator)
**Note:** The implementation of the ``__call__`` method is optional, by default
its behavior is inherited from the :class:`~sklearn_genetic.callbacks.base.BaseCallback`.
It's in this example for deeper understanding of how the callbacks are coded and
to avoid unexpected overwrites.

So that is it, now you can initialize the DummyThreshold
and pass it to a in the ``fit`` method of a
:class:`~sklearn_genetic.GASearchCV` instance:
Expand Down
3 changes: 1 addition & 2 deletions sklearn_genetic/callbacks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,5 @@ def on_step(self, record=None, logbook=None, estimator=None):

pass # pragma: no cover

@abstractmethod
def __call__(self, record=None, logbook=None, estimator=None):
pass # pragma: no cover
return self.on_step(record, logbook, estimator)
9 changes: 0 additions & 9 deletions sklearn_genetic/callbacks/early_stoppers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@ def on_step(self, record=None, logbook=None, estimator=None):
"At least one of record or logbook parameters must be provided"
)

def __call__(self, record=None, logbook=None, estimator=None):
return self.on_step(record, logbook, estimator)


class ConsecutiveStopping(BaseCallback):
"""
Expand Down Expand Up @@ -133,9 +130,6 @@ def on_step(self, record=None, logbook=None, estimator=None):
else:
raise ValueError("logbook parameter must be provided")

def __call__(self, record=None, logbook=None, estimator=None):
return self.on_step(record, logbook, estimator)


class TimerStopping(BaseCallback):
"""
Expand All @@ -161,6 +155,3 @@ def on_step(self, record=None, logbook=None, estimator=None):
print(f"INFO: {self.__class__.__name__} callback met its criteria")
return True
return False

def __call__(self, record=None, logbook=None, estimator=None):
return self.on_step(record, logbook, estimator)
26 changes: 5 additions & 21 deletions sklearn_genetic/callbacks/tests/test_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,36 +60,20 @@ def test_check_callback():


def test_wrong_base_callback():
class MyDummyCallback(BaseCallback):
def __init__(self, metric):
self.metric = metric

def validate(self):
print(self.metric)

with pytest.raises(Exception) as excinfo:
callback = MyDummyCallback()
assert (
str(excinfo.value)
== "Can't instantiate abstract class MyDummyCallback with abstract methods __call__, on_step"
)


def test_base_callback_call():
possible_messages = [
"Can't instantiate abstract class MyDummyCallback with abstract methods __call__",
"Can't instantiate abstract class MyDummyCallback with abstract method __call__",
"Can't instantiate abstract class MyDummyCallback with abstract methods on_step",
"Can't instantiate abstract class MyDummyCallback with abstract method on_step",
]

class MyDummyCallback(BaseCallback):
def __init__(self, metric):
self.metric = metric

def on_step(self, record=None, logbook=None, estimator=None):
print(record)
def validate(self):
print(self.metric)

with pytest.raises(Exception) as excinfo:
callback = MyDummyCallback(metric="fitness")
MyDummyCallback()

assert any([str(excinfo.value) == i for i in possible_messages])

Expand Down
74 changes: 70 additions & 4 deletions sklearn_genetic/genetic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def __init__(
self.log_config = log_config
self._initial_training_time = None

# Check that the estimator is compatible with scikit-learn
if not is_classifier(self.estimator) and not is_regressor(self.estimator):
raise ValueError(
f"{self.estimator} is not a valid Sklearn classifier or regressor"
Expand All @@ -224,11 +225,13 @@ def __init__(
raise ValueError(
f"Criteria must be one of {Criteria.list()}, got {criteria} instead"
)
# Minimization is handle like an optimization problem with a change in the score sign
elif criteria == Criteria.max.value:
self.criteria_sign = 1
elif criteria == Criteria.min.value:
self.criteria_sign = -1

# Saves the param_grid and computes some extra properties in the same object
self.space = Space(param_grid)

super(GASearchCV, self).__init__(
Expand All @@ -243,12 +246,17 @@ def __init__(
)

def _register(self):
"""
This function is the responsible for registering the DEAPs necessary methods
and create other objects to hold the hof, logbook and stats.
"""

self.creator.create("FitnessMax", base.Fitness, weights=[1.0])
self.creator.create("Individual", list, fitness=creator.FitnessMax)

attributes = []

# Assign all the parameters defined in the param_grid
# It uses the distribution parameter to set the sampling function
for parameter, dimension in self.space.param_grid.items():
self.toolbox.register(f"{parameter}", dimension.sample)
attributes.append(getattr(self.toolbox, parameter))
Expand Down Expand Up @@ -290,22 +298,52 @@ def _register(self):
self.logbook = tools.Logbook()

def mutate(self, individual):
"""
This function is responsible of changed a randomly selected parameter from an individual
Parameters
----------
individual: Individual object
The individual (set of hyperparameters) that is being generated
Returns
-------
Mutated individual
"""

# Randomly select one of the hyperparameters
gen = random.randrange(0, len(self.space))
parameter_idx = self.space.parameters[gen]
parameter = self.space[parameter_idx]

# Using the defined distribution from the para_grid value
# Make a random sample of the parameter
individual[gen] = parameter.sample()

return [individual]

def evaluate(self, individual):
"""
Compute the cross-validation scores and record the logbook and mlflow (if specified)
Parameters
----------
individual: Individual object
The individual (set of hyperparameters) that is being evaluated
Returns
-------
The fitness value of the estimator candidate, corresponding to the cv-score with the criteria sing
"""

# Dictionary representation of the individual with key-> hyperparameter name, value -> value
current_generation_params = {
key: individual[n] for n, key in enumerate(self.space.parameters)
}

local_estimator = clone(self.estimator)
local_estimator.set_params(**current_generation_params)

# Compute the cv-score
cv_scores = cross_val_score(
local_estimator,
self.X_,
Expand All @@ -319,6 +357,7 @@ def evaluate(self, individual):

score = np.mean(cv_scores)

# Uses the log config to save in remote log server (e.g MLflow)
if self.log_config is not None:
self.log_config.create_run(
parameters=current_generation_params,
Expand All @@ -328,6 +367,7 @@ def evaluate(self, individual):

current_generation_params["score"] = score

# Log the hyperparameters and the cv-score
self.logbook.record(parameters=current_generation_params)

return [self.criteria_sign * score]
Expand All @@ -353,24 +393,29 @@ def fit(self, X, y, callbacks=None):
The callback is evaluated after fitting the estimators from the generation 1.
"""
scorer = check_scoring(self.estimator, scoring=self.scoring)

self.X_ = X
self.y_ = y

# Make sure the callbacks are valid
self.callbacks = check_callback(callbacks)
self.scorer_ = check_scoring(
self.estimator, scoring=self.scoring)
self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

# Set the DEAPs necessary methods
self._register()

self._initial_training_time = datetime.utcnow()

# Optimization routine from the selected evolutionary algorithm
pop, log, n_gen = self._select_algorithm(
pop=self._pop, stats=self._stats, hof=self._hof
)

# Update the _n_iterations value as the algorithm could stop earlier due a callback
self._n_iterations = n_gen

# hof keeps the best params according to the fitness value
# The best one is in the position 0
self.best_params_ = {
key: self._hof[0][n] for n, key in enumerate(self.space.parameters)
}
Expand All @@ -388,6 +433,7 @@ def fit(self, X, y, callbacks=None):
"fitness_min": log.select("fitness_min"),
}

# Imitate the logic of scikit-learn refit parameter
if self.refit:
self.estimator.set_params(**self.best_params_)
self.estimator.fit(self.X_, self.y_)
Expand All @@ -399,6 +445,26 @@ def fit(self, X, y, callbacks=None):
return self

def _select_algorithm(self, pop, stats, hof):
"""
It selects the algorithm to run from the sklearn_genetic.algorithms module
based in the parameter self.algorithm.
Parameters
----------
pop: pop object from DEAP
stats: stats object from DEAP
hof: hof object from DEAP
Returns
-------
pop: pop object
The last evaluated population
log: Logbook object
It contains the calculated metrics {'fitness', 'fitness_std', 'fitness_max', 'fitness_min'}
the number of generations and the number of evaluated individuals per generation
n_gen: int
The number of generations that the evolutionary algorithm ran
"""

if self.algorithm == Algorithms.eaSimple.value:

Expand Down
1 change: 1 addition & 0 deletions sklearn_genetic/space/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def check_space(param_grid: dict = None):
if not param_grid:
raise ValueError(f"param_grid can not be empty")

# Make sure that each of the param_grid values are defined using one of the available Space objects
for key, value in param_grid.items():
if not isinstance(value, BaseDimension):
raise ValueError(
Expand Down
5 changes: 5 additions & 0 deletions sklearn_genetic/space/space_parameters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import enum

"""
This module contains all the possible random distributions names
that can be set in each of the Space variables
"""


class ExtendedEnum(enum.Enum):
@classmethod
Expand Down
8 changes: 6 additions & 2 deletions sklearn_genetic/tests/test_genetic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def test_expected_ga_results():
assert len(evolved_estimator.decision_function(X_test)) == len(X_test)
assert len(evolved_estimator.predict_proba(X_test)) == len(X_test)
assert len(evolved_estimator.predict_log_proba(X_test)) == len(X_test)
assert evolved_estimator.score(X_test, y_test) == accuracy_score(y_test, evolved_estimator.predict(X_test))
assert evolved_estimator.score(X_test, y_test) == accuracy_score(
y_test, evolved_estimator.predict(X_test)
)
assert bool(evolved_estimator.get_params())
assert len(evolved_estimator.hof) == evolved_estimator.keep_top_k
assert "gen" in evolved_estimator[0]
Expand Down Expand Up @@ -148,7 +150,9 @@ def test_expected_algorithms_callbacks(algorithm, callback):
assert len(evolved_estimator.decision_function(X_test)) == len(X_test)
assert len(evolved_estimator.predict_proba(X_test)) == len(X_test)
assert len(evolved_estimator.predict_log_proba(X_test)) == len(X_test)
assert evolved_estimator.score(X_test, y_test) == accuracy_score(y_test, evolved_estimator.predict(X_test))
assert evolved_estimator.score(X_test, y_test) == accuracy_score(
y_test, evolved_estimator.predict(X_test)
)
assert bool(evolved_estimator.get_params())
assert len(evolved_estimator.hof) <= evolved_estimator.keep_top_k
assert "gen" in evolved_estimator[0]
Expand Down

0 comments on commit 36953e8

Please sign in to comment.