From a435085809a44d5c589ef4d7071d7b38584535d9 Mon Sep 17 00:00:00 2001 From: Programador Artificial Date: Sat, 13 Jan 2024 16:59:27 -0300 Subject: [PATCH 1/5] Add to tuner return best model --- docs/tutorials/10-Hyperparameter Tuning.ipynb | 4 +-- src/pytorch_tabular/tabular_model_sweep.py | 7 +++- src/pytorch_tabular/tabular_model_tuner.py | 32 ++++++++++++++++++- tests/test_common.py | 2 +- 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/docs/tutorials/10-Hyperparameter Tuning.ipynb b/docs/tutorials/10-Hyperparameter Tuning.ipynb index 17b76a97..3fb9e433 100644 --- a/docs/tutorials/10-Hyperparameter Tuning.ipynb +++ b/docs/tutorials/10-Hyperparameter Tuning.ipynb @@ -1379,7 +1379,7 @@ ")\n", "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " result = tuner.tune(\n", + " result, best_model = tuner.tune(\n", " train=train,\n", " validation=test,\n", " search_space=search_space,\n", @@ -1877,7 +1877,7 @@ ")\n", "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " result = tuner.tune(\n", + " result, best_model = tuner.tune(\n", " train=train,\n", " validation=test, # Need not give validation is we use CV\n", " search_space=search_space,\n", diff --git a/src/pytorch_tabular/tabular_model_sweep.py b/src/pytorch_tabular/tabular_model_sweep.py index 047806f7..f033e8ab 100644 --- a/src/pytorch_tabular/tabular_model_sweep.py +++ b/src/pytorch_tabular/tabular_model_sweep.py @@ -197,6 +197,11 @@ def model_sweep( verbose (bool, optional): If True, will print the progress. Defaults to True. suppress_lightning_logger (bool, optional): If True, will suppress the lightning logger. Defaults to True. + + Returns: + results: Training results. + + best_model: If return_best_model is True, return best_model otherwise return None. """ _validate_args( task=task, @@ -342,4 +347,4 @@ def _init_tabular_model(m): if return_best_model: return results, best_model else: - return results + return results, None diff --git a/src/pytorch_tabular/tabular_model_tuner.py b/src/pytorch_tabular/tabular_model_tuner.py index 19640970..ddd1ce0c 100644 --- a/src/pytorch_tabular/tabular_model_tuner.py +++ b/src/pytorch_tabular/tabular_model_tuner.py @@ -153,6 +153,7 @@ def tune( cv: Optional[Union[int, Iterable, BaseCrossValidator]] = None, cv_agg_func: Optional[Callable] = np.mean, cv_kwargs: Optional[Dict] = {}, + return_best_model: bool = True, verbose: bool = False, progress_bar: bool = True, random_state: Optional[int] = 42, @@ -199,6 +200,8 @@ def tune( cv_kwargs (Optional[Dict], optional): Additional keyword arguments to be passed to the cross validation method. Defaults to {}. + return_best_model (bool, optional): If True, will return the best model. Defaults to True. + verbose (bool, optional): Whether to print the results of each trial. Defaults to False. progress_bar (bool, optional): Whether to show a progress bar. Defaults to True. @@ -214,6 +217,8 @@ def tune( trials_df (DataFrame): A dataframe with the results of each trial best_params (Dict): The best parameters found best_score (float): The best score found + + best_model: If return_best_model is True, return best_model otherwise return None. """ assert strategy in self.ALLOWABLE_STRATEGIES, f"tuner must be one of {self.ALLOWABLE_STRATEGIES}" assert mode in ["max", "min"], "mode must be one of ['max', 'min']" @@ -269,6 +274,8 @@ def tune( metric_str = metric.__name__ del temp_tabular_model trials = [] + best_model = None + best_score = 0.0 for i, params in enumerate(iterator): # Copying the configs as a base # Make sure all default parameters that you want to be set for all @@ -333,6 +340,20 @@ def tune( else: result = tabular_model_t.evaluate(validation, verbose=False) params.update({k.replace("test_", ""): v for k, v in result[0].items()}) + + if best_model is None: + best_model = deepcopy(tabular_model_t) + best_score = params[metric_str] + else: + if mode == "min": + if params[metric_str] < best_score: + best_model = deepcopy(tabular_model_t) + best_score = params[metric_str] + elif mode == "max": + if params[metric_str] > best_score: + best_model = deepcopy(tabular_model_t) + best_score = params[metric_str] + params.update({"trial_id": i}) trials.append(params) if verbose: @@ -348,4 +369,13 @@ def tune( best_params = trials_df.iloc[best_idx].to_dict() best_score = best_params.pop(metric_str) trials_df.insert(0, "trial_id", trials) - return self.OUTPUT(trials_df, best_params, best_score) + + if verbose: + logger.info("Model Tuner Finished" +) + logger.info(f"Best Score ({metric_str}): {best_score}") + + if return_best_model: + return self.OUTPUT(trials_df, best_params, best_score), best_model + else: + return self.OUTPUT(trials_df, best_params, best_score), None diff --git a/tests/test_common.py b/tests/test_common.py index b30cb1b2..8443c004 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -862,7 +862,7 @@ def test_tuner( "trainer_config__batch_size": randint(32, 64), "optimizer_config__optimizer": ["RAdam", "AdamW"], } - result = tuner.tune( + result, best_model = tuner.tune( train=train, validation=test, search_space=search_space, From 9ca29c5e798fc7325ac7f301d2b83bd175b05a05 Mon Sep 17 00:00:00 2001 From: Programador Artificial Date: Sat, 13 Jan 2024 17:00:08 -0300 Subject: [PATCH 2/5] Fix bug with progress bar in tuner --- src/pytorch_tabular/tabular_model_tuner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pytorch_tabular/tabular_model_tuner.py b/src/pytorch_tabular/tabular_model_tuner.py index ddd1ce0c..064d4362 100644 --- a/src/pytorch_tabular/tabular_model_tuner.py +++ b/src/pytorch_tabular/tabular_model_tuner.py @@ -88,6 +88,8 @@ def __init__( if trainer_config.fast_dev_run: warnings.warn("fast_dev_run is turned on. Tuning results won't be accurate.") if trainer_config.progress_bar != "none": + # If config and tuner have progress bar enabled, it will result in a bug within the library (rich.progress) + trainer_config.progress_bar = "none" warnings.warn("Turning off progress bar. Set progress_bar='none' in TrainerConfig to disable this warning.") trainer_config.trainer_kwargs.update({"enable_model_summary": False}) self.data_config = data_config From 46c0b08be12d632350f03110c9095f69e546fbbe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 20:03:08 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytorch_tabular/tabular_model_tuner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pytorch_tabular/tabular_model_tuner.py b/src/pytorch_tabular/tabular_model_tuner.py index 064d4362..1143771a 100644 --- a/src/pytorch_tabular/tabular_model_tuner.py +++ b/src/pytorch_tabular/tabular_model_tuner.py @@ -373,8 +373,7 @@ def tune( trials_df.insert(0, "trial_id", trials) if verbose: - logger.info("Model Tuner Finished" -) + logger.info("Model Tuner Finished") logger.info(f"Best Score ({metric_str}): {best_score}") if return_best_model: From f6d9863c95303222369f4cc5795b55586d6a0e03 Mon Sep 17 00:00:00 2001 From: Programador Artificial Date: Sun, 14 Jan 2024 09:56:43 -0300 Subject: [PATCH 4/5] Remove datamodule before deepcopy and change tuner output --- docs/tutorials/10-Hyperparameter Tuning.ipynb | 14 ++++---- src/pytorch_tabular/tabular_model_tuner.py | 35 ++++++++++--------- tests/test_common.py | 2 +- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/docs/tutorials/10-Hyperparameter Tuning.ipynb b/docs/tutorials/10-Hyperparameter Tuning.ipynb index 3fb9e433..78d6cbbc 100644 --- a/docs/tutorials/10-Hyperparameter Tuning.ipynb +++ b/docs/tutorials/10-Hyperparameter Tuning.ipynb @@ -1379,7 +1379,7 @@ ")\n", "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " result, best_model = tuner.tune(\n", + " result = tuner.tune(\n", " train=train,\n", " validation=test,\n", " search_space=search_space,\n", @@ -1396,11 +1396,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Result is a namedtuple with trials_df, best_params, and best_score\\\n", + "Result is a namedtuple with trials_df, best_params, best_score and best_model\\\n", "\n", "- trials_df: A dataframe with all the hyperparameter combinations and their corresponding scores\n", "- best_params: The best hyperparameter combination\n", - "- best_score: The best score" + "- best_score: The best score\n", + "- best_model: If return_best_model is True, return best_model otherwise return None" ] }, { @@ -1877,7 +1878,7 @@ ")\n", "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " result, best_model = tuner.tune(\n", + " result = tuner.tune(\n", " train=train,\n", " validation=test, # Need not give validation is we use CV\n", " search_space=search_space,\n", @@ -1895,11 +1896,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Result is a namedtuple with trials_df, best_params, and best_score\\\n", + "Result is a namedtuple with trials_df, best_params, best_score and best_model\\\n", "\n", "- trials_df: A dataframe with all the hyperparameter combinations and their corresponding scores\n", "- best_params: The best hyperparameter combination\n", - "- best_score: The best score" + "- best_score: The best score\n", + "- best_model: If return_best_model is True, return best_model otherwise return None" ] }, { diff --git a/src/pytorch_tabular/tabular_model_tuner.py b/src/pytorch_tabular/tabular_model_tuner.py index 1143771a..444d25c4 100644 --- a/src/pytorch_tabular/tabular_model_tuner.py +++ b/src/pytorch_tabular/tabular_model_tuner.py @@ -35,7 +35,7 @@ class TabularModelTuner: """ ALLOWABLE_STRATEGIES = ["grid_search", "random_search"] - OUTPUT = namedtuple("OUTPUT", ["trials_df", "best_params", "best_score"]) + OUTPUT = namedtuple("OUTPUT", ["trials_df", "best_params", "best_score", "best_model"]) def __init__( self, @@ -343,18 +343,20 @@ def tune( result = tabular_model_t.evaluate(validation, verbose=False) params.update({k.replace("test_", ""): v for k, v in result[0].items()}) - if best_model is None: - best_model = deepcopy(tabular_model_t) - best_score = params[metric_str] - else: - if mode == "min": - if params[metric_str] < best_score: - best_model = deepcopy(tabular_model_t) - best_score = params[metric_str] - elif mode == "max": - if params[metric_str] > best_score: - best_model = deepcopy(tabular_model_t) - best_score = params[metric_str] + if return_best_model: + tabular_model_t.datamodule = None + if best_model is None: + best_model = deepcopy(tabular_model_t) + best_score = params[metric_str] + else: + if mode == "min": + if params[metric_str] < best_score: + best_model = deepcopy(tabular_model_t) + best_score = params[metric_str] + elif mode == "max": + if params[metric_str] > best_score: + best_model = deepcopy(tabular_model_t) + best_score = params[metric_str] params.update({"trial_id": i}) trials.append(params) @@ -376,7 +378,8 @@ def tune( logger.info("Model Tuner Finished") logger.info(f"Best Score ({metric_str}): {best_score}") - if return_best_model: - return self.OUTPUT(trials_df, best_params, best_score), best_model + if return_best_model and best_model is not None: + best_model.datamodule = datamodule + return self.OUTPUT(trials_df, best_params, best_score, best_model) else: - return self.OUTPUT(trials_df, best_params, best_score), None + return self.OUTPUT(trials_df, best_params, best_score, None) diff --git a/tests/test_common.py b/tests/test_common.py index 8443c004..b30cb1b2 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -862,7 +862,7 @@ def test_tuner( "trainer_config__batch_size": randint(32, 64), "optimizer_config__optimizer": ["RAdam", "AdamW"], } - result, best_model = tuner.tune( + result = tuner.tune( train=train, validation=test, search_space=search_space, From cd1a72b89f9245c332e390f6a2ab3f62a3924271 Mon Sep 17 00:00:00 2001 From: Programador Artificial Date: Sun, 14 Jan 2024 10:24:46 -0300 Subject: [PATCH 5/5] Update documentation --- src/pytorch_tabular/tabular_model_tuner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pytorch_tabular/tabular_model_tuner.py b/src/pytorch_tabular/tabular_model_tuner.py index 444d25c4..10293977 100644 --- a/src/pytorch_tabular/tabular_model_tuner.py +++ b/src/pytorch_tabular/tabular_model_tuner.py @@ -219,8 +219,7 @@ def tune( trials_df (DataFrame): A dataframe with the results of each trial best_params (Dict): The best parameters found best_score (float): The best score found - - best_model: If return_best_model is True, return best_model otherwise return None. + best_model (TabularModel or None): If return_best_model is True, return best_model otherwise return None """ assert strategy in self.ALLOWABLE_STRATEGIES, f"tuner must be one of {self.ALLOWABLE_STRATEGIES}" assert mode in ["max", "min"], "mode must be one of ['max', 'min']"