-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ENH] Allow list of hyperparameter options for tunning (#47)
* [WIP] Multiple hyperparam grids * fix linter * increase coverage * add more tests * More tests for merger * add news fragemtn
- Loading branch information
Showing
10 changed files
with
965 additions
and
201 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add support for multiple grids in hyperparameter tuning by `Fede Raimondo`_ |
102 changes: 102 additions & 0 deletions
102
examples/03_complex_models/run_hyperparameter_multiple_grids.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
""" | ||
Tuning Multiple Hyperparameters Grids | ||
===================================== | ||
This example uses the 'fmri' dataset, performs simple binary classification | ||
using a Support Vector Machine classifier while tuning multiple hyperparameters | ||
grids at the same time. | ||
References | ||
---------- | ||
Waskom, M.L., Frank, M.C., Wagner, A.D. (2016). Adaptive engagement of | ||
cognitive control in context-dependent decision-making. Cerebral Cortex. | ||
.. include:: ../../links.inc | ||
""" | ||
# Authors: Federico Raimondo <f.raimondo@fz-juelich.de> | ||
# | ||
# License: AGPL | ||
import numpy as np | ||
from seaborn import load_dataset | ||
|
||
from julearn import run_cross_validation | ||
from julearn.utils import configure_logging | ||
from julearn.pipeline import PipelineCreator | ||
|
||
############################################################################### | ||
# Set the logging level to info to see extra information | ||
configure_logging(level="INFO") | ||
|
||
############################################################################### | ||
# Set the random seed to always have the same example | ||
np.random.seed(42) | ||
|
||
############################################################################### | ||
# Load the dataset | ||
df_fmri = load_dataset("fmri") | ||
print(df_fmri.head()) | ||
|
||
############################################################################### | ||
# Set the dataframe in the right format | ||
df_fmri = df_fmri.pivot( | ||
index=["subject", "timepoint", "event"], columns="region", values="signal" | ||
) | ||
|
||
df_fmri = df_fmri.reset_index() | ||
print(df_fmri.head()) | ||
|
||
X = ["frontal", "parietal"] | ||
y = "event" | ||
|
||
############################################################################### | ||
# Lets do a first attempt and use a linear SVM with the default parameters. | ||
|
||
creator = PipelineCreator(problem_type="classification") | ||
creator.add("zscore") | ||
creator.add("svm", kernel="linear") | ||
|
||
scores = run_cross_validation(X=X, y=y, data=df_fmri, model=creator) | ||
|
||
print(scores["test_score"].mean()) | ||
|
||
############################################################################### | ||
# Now lets tune a bit this SVM. We will use a grid search to tune the | ||
# regularization parameter C and the kernel. We will also tune the gamma. | ||
# But since the gamma is only used for the rbf kernel, we will use a | ||
# different grid for the rbf kernel. | ||
# | ||
# To specify two different sets of parameters for the same step, we can | ||
# explicitly specify the name of the step. This is done by passing the | ||
# ``name`` parameter to the ``add`` method. | ||
creator = PipelineCreator(problem_type="classification") | ||
creator.add("zscore") | ||
creator.add("svm", kernel="linear", C=[0.01, 0.1], name="svm") | ||
creator.add( | ||
"svm", | ||
kernel="rbf", | ||
C=[0.01, 0.1], | ||
gamma=["scale", "auto", 1e-2, 1e-3], | ||
name="svm", | ||
) | ||
|
||
search_params = { | ||
"kind": "grid", | ||
"cv": 2, # to speed up the example | ||
} | ||
|
||
scores, estimator = run_cross_validation( | ||
X=X, | ||
y=y, | ||
data=df_fmri, | ||
model=creator, | ||
search_params=search_params, | ||
return_estimator="final", | ||
) | ||
|
||
print(scores["test_score"].mean()) | ||
############################################################################### | ||
# It seems that we might have found a better model, but which one is it? | ||
print(estimator.best_params_) | ||
print(estimator.best_estimator_["svm"]._gamma) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
"""Module to merge multiple pipelines into a single one.""" | ||
|
||
# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>prepa | ||
# License: AGPL | ||
|
||
from typing import Dict | ||
|
||
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV | ||
from sklearn.pipeline import Pipeline | ||
|
||
from ..utils.logging import raise_error | ||
from ..utils.typing import EstimatorLike | ||
from .pipeline_creator import _prepare_hyperparameter_tuning | ||
from ..prepare import prepare_search_params | ||
|
||
|
||
def merge_pipelines( | ||
*pipelines: EstimatorLike, search_params: Dict | ||
) -> Pipeline: | ||
"""Merge multiple pipelines into a single one. | ||
Parameters | ||
---------- | ||
pipelines : List[EstimatorLike] | ||
List of estimators that will be merged. | ||
search_params : Dict | ||
Dictionary with the search parameters. | ||
Returns | ||
------- | ||
merged : BaseSearchCV | ||
The merged pipeline as a searcher. | ||
""" | ||
|
||
# Check that we only merge pipelines and searchers. And if there is a | ||
# searcher, they are all of the same kind and match the search params. | ||
|
||
search_params = prepare_search_params(search_params) | ||
|
||
for p in pipelines: | ||
if not isinstance(p, (Pipeline, GridSearchCV, RandomizedSearchCV)): | ||
raise_error( | ||
"Only pipelines and searchers are supported. " | ||
f"Found {type(p)} instead.") | ||
if isinstance(p, GridSearchCV): | ||
if search_params["kind"] != "grid": | ||
raise_error( | ||
"At least one of the pipelines to merge is a " | ||
"GridSearchCV, but the search params do not specify a " | ||
"grid search. These pipelines cannot be merged." | ||
) | ||
elif isinstance(p, RandomizedSearchCV): | ||
if search_params["kind"] != "random": | ||
raise_error( | ||
"At least one of the pipelines to merge is a " | ||
"RandomizedSearchCV, but the search params do not specify " | ||
"a random search. These pipelines cannot be merged." | ||
) | ||
|
||
# Check that all estimators have the same named steps in their pipelines. | ||
reference_pipeline = pipelines[0] | ||
if isinstance(reference_pipeline, (GridSearchCV, RandomizedSearchCV)): | ||
reference_pipeline = reference_pipeline.estimator | ||
|
||
step_names = reference_pipeline.named_steps.keys() | ||
|
||
for p in pipelines: | ||
if isinstance(p, (GridSearchCV, RandomizedSearchCV)): | ||
p = p.estimator | ||
if not isinstance(p, Pipeline): | ||
raise_error("All searchers must use a pipeline.") | ||
if step_names != p.named_steps.keys(): | ||
raise_error("All pipelines must have the same named steps.") | ||
|
||
# The idea behind the merge is to create a list of parameter | ||
# grids/distributions from a list of pipeline and searchers, to then | ||
# wrap them into a single searcher. Since all the searchers have the same | ||
# steps, this is possible. We just need to concatenate the | ||
# grids/distributions from all searchers. If one of the pipelines is not | ||
# a searcher, then this means that it has no hyperparameters to tune, but | ||
# the pipeline is one of the hyperparameter options. | ||
|
||
different_steps = [] | ||
for t_step_name in step_names: | ||
# Get the transformer/model of the first element | ||
t = reference_pipeline.named_steps[t_step_name] | ||
|
||
# Check that all searchers have the same transformer/model. | ||
# TODO: Fix this comparison, as it always returns False. | ||
for s in pipelines[1:]: | ||
if isinstance(s, (GridSearchCV, RandomizedSearchCV)): | ||
if s.estimator.named_steps[t_step_name] != t: | ||
different_steps.append(t_step_name) | ||
break | ||
else: | ||
if s.named_steps[t_step_name] != t: | ||
different_steps.append(t_step_name) | ||
break | ||
|
||
# Then, we will update the grid of the searchers that have different | ||
# transformer/model. | ||
all_grids = [] | ||
for s in pipelines: | ||
if isinstance(s, GridSearchCV): | ||
t_grid = s.param_grid.copy() | ||
elif isinstance(s, RandomizedSearchCV): | ||
t_grid = s.param_distributions.copy() | ||
else: | ||
t_grid = {} | ||
for t_name in different_steps: | ||
if isinstance(s, (GridSearchCV, RandomizedSearchCV)): | ||
t_grid[t_name] = [s.estimator.named_steps[t_name]] | ||
else: | ||
t_grid[t_name] = [s.named_steps[t_name]] | ||
all_grids.append(t_grid) | ||
|
||
# Finally, we will concatenate the grids and create a new searcher. | ||
new_searcher = _prepare_hyperparameter_tuning( | ||
all_grids, search_params, reference_pipeline | ||
) | ||
return new_searcher |
Oops, something went wrong.