[fix] Hotfix debug no training in simple intensifier (automl#370)

* [fix] Fix the no-training-issue when using simple intensifier * [test] Add a test for the modification * [fix] Modify the default budget so that the budget is compatible Since the previous version does not consider the provided budget_type when determining the default budget, I modified this part so that the default budget does not mix up the default budget for epochs and runtime. Note that since the default pipeline config defines epochs as the default budget, I also followed this rule when taking the default value. * [fix] Fix a mypy error * [fix] Change the total runtime for single config in the example Since the training sometimes does not finish in time, I increased the total runtime for the training so that we can accomodate the training in the given amount of time. * [fix] [refactor] Fix the SMAC requirement and refactor some conditions
ravinkohli · Apr 12, 2022 · f9cd838 · f9cd838
1 parent 26ac628
commit f9cd838
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 23 deletions.
diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py
@@ -201,6 +201,23 @@ def __init__(
 
         self.search_space_updates = search_space_updates
 
+    def _check_and_get_default_budget(self) -> float:
+        budget_type_choices = ('epochs', 'runtime')
+        budget_choices = {
+            budget_type: float(self.pipeline_config.get(budget_type, np.inf))
+            for budget_type in budget_type_choices
+        }
+
+        # budget is defined by epochs by default
+        budget_type = str(self.pipeline_config.get('budget_type', 'epochs'))
+        if self.budget_type is not None:
+            budget_type = self.budget_type
+
+        if budget_type not in budget_type_choices:
+            raise ValueError(f"budget type must be in {budget_type_choices}, but got {budget_type}")
+        else:
+            return budget_choices[budget_type]
+
     def run_wrapper(
         self,
         run_info: RunInfo,
@@ -218,26 +235,19 @@ def run_wrapper(
             RunValue:
                 Contains information about the status/performance of config
         """
-        if self.budget_type is None:
-            if run_info.budget != 0:
-                raise ValueError(
-                    'If budget_type is None, budget must be.0, but is %f' % run_info.budget
-                )
-        else:
-            if run_info.budget == 0:
-                # SMAC can return budget zero for intensifiers that don't have a concept
-                # of budget, for example a simple bayesian optimization intensifier.
-                # Budget determines how our pipeline trains, which can be via runtime or epochs
-                epochs_budget = self.pipeline_config.get('epochs', np.inf)
-                runtime_budget = self.pipeline_config.get('runtime', np.inf)
-                run_info = run_info._replace(budget=min(epochs_budget, runtime_budget))
-            elif run_info.budget <= 0:
-                raise ValueError('Illegal value for budget, must be greater than zero but is %f' %
-                                 run_info.budget)
-            if self.budget_type not in ('epochs', 'runtime'):
-                raise ValueError("Illegal value for budget type, must be one of "
-                                 "('epochs', 'runtime'), but is : %s" %
-                                 self.budget_type)
+        # SMAC returns non-zero budget for intensification
+        # In other words, SMAC returns budget=0 for a simple intensifier (i.e. no intensification)
+        is_intensified = (run_info.budget != 0)
+        default_budget = self._check_and_get_default_budget()
+
+        if self.budget_type is None and is_intensified:
+            raise ValueError(f'budget must be 0 (=no intensification) for budget_type=None, but got {run_info.budget}')
+        if self.budget_type is not None and run_info.budget < 0:
+            raise ValueError(f'budget must be greater than zero but got {run_info.budget}')
+
+        if self.budget_type is not None and not is_intensified:
+            # The budget will be provided in train evaluator when budget_type is None
+            run_info = run_info._replace(budget=default_budget)
 
         remaining_time = self.stats.get_remaing_time_budget()
 
@@ -261,6 +271,10 @@ def run_wrapper(
 
         self.logger.info("Starting to evaluate configuration %s" % run_info.config.config_id)
         run_info, run_value = super().run_wrapper(run_info=run_info)
+
+        if not is_intensified:  # It is required for the SMAC compatibility
+            run_info = run_info._replace(budget=0.0)
+
         return run_info, run_value
 
     def run(

diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
@@ -66,8 +66,8 @@
 pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
                                                                 configuration=configuration,
                                                                 budget_type='epochs',
-                                                                budget=10,
-                                                                run_time_limit_secs=100
+                                                                budget=5,
+                                                                run_time_limit_secs=75
                                                                 )
 
 # The fit_pipeline command also returns a named tuple with the pipeline constraints

diff --git a/requirements.txt b/requirements.txt
@@ -10,7 +10,7 @@ imgaug>=0.4.0
 ConfigSpace>=0.4.14,<0.5
 pynisher>=0.6.3
 pyrfr>=0.7,<0.9
-smac==0.14.0
+smac>=0.14.0
 dask
 distributed>=2.2.0
 catboost

diff --git a/test/test_evaluation/test_evaluation.py b/test/test_evaluation/test_evaluation.py
@@ -394,6 +394,32 @@ def test_silent_exception_in_target_function(self):
         self.assertNotIn('exit_status', info[1].additional_info)
         self.assertNotIn('traceback', info[1])
 
+    def test_eval_with_simple_intensification(self):
+        config = unittest.mock.Mock(spec=int)
+        config.config_id = 198
+
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger_port=self.logger_port,
+                                    pynisher_context='fork',
+                                    budget_type='runtime'
+                                    )
+        ta.pynisher_logger = unittest.mock.Mock()
+        run_info = RunInfo(config=config, cutoff=3000, instance=None,
+                           instance_specific=None, seed=1, capped=False)
+
+        for budget in [0.0, 50.0]:
+            # Simple intensification always returns budget = 0
+            # Other intensifications return a non-zero value
+            self.stats.submitted_ta_runs += 1
+            run_info = run_info._replace(budget=budget)
+            run_info_out, _ = ta.run_wrapper(run_info)
+            self.assertEqual(run_info_out.budget, budget)
+
 
 @pytest.mark.parametrize("metric,expected", [(accuracy, 1.0), (log_loss, MAXINT)])
 def test_get_cost_of_crash(metric, expected):