benchmark/experiment_runner: save accelerator_model on error (#6218)

cota · web-flow · commit 86636ba7e178 · 2023-12-20T12:33:07.000-05:00
This commit can be seen as a partial revert of "63455e0cd Unify the way in which result files are dumped (#6162)". In that commit we missed that `experiment_cfg` does not have the `accelerator_model` record. Thus, when a benchmark fails we do not include that record in the JSONL file, and therefore resuming a run doesn't work because the failing entry is not recognized (note that when checking whether to resume we compare the JSONL entry against `benchmark_experiment`, which does have `accelerator_model`). We could fix this two ways: (1) always save `benchmark_experiment`, not only on success, or (2) add `accelerator_model` to experiment_config. I've chosen to go with (1) since that's what we were doing before 63455e0.
diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py
@@ -25,7 +25,7 @@ def list_experiment_configs(self):
         "test": ["eval", "train"],
     }
 
-    # Apply command line chocies.
+    # Apply command line choices.
     if self._args.accelerator:
       config_choices["accelerator"] = list(set(self._args.accelerator))
     if self._args.xla:
diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py
@@ -115,7 +115,8 @@ def generate_and_run_all_configs(self):
         if not self.model_loader.is_compatible(benchmark_model,
                                                benchmark_experiment):
           logger.warning("SKIP incompatible model and experiment configs.")
-          self._save_results(experiment_cfg, model_cfg, {"error": "SKIP"})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": "SKIP"})
           continue
 
         # Compose child process environment.
@@ -159,17 +160,21 @@ def generate_and_run_all_configs(self):
         except subprocess.TimeoutExpired as e:
           self._fwd_captured_stdout_stderr(e.stdout, e.stderr)
           logger.error("TIMEOUT")
-          self._save_results(experiment_cfg, model_cfg, {"error": str(e)})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": str(e)})
         except subprocess.CalledProcessError as e:
           self._fwd_captured_stdout_stderr(e.stdout, e.stderr)
           logger.error("ERROR in subprocess")
-          self._save_results(experiment_cfg, model_cfg, {"error": e.stderr})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": e.stderr})
         except subprocess.SubprocessError as e:
           logger.error("ERROR when launching child process")
-          self._save_results(experiment_cfg, model_cfg, {"error": str(e)})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": str(e)})
         except ValueError as e:
           logger.error(f"ERROR {e}")
-          self._save_results(experiment_cfg, model_cfg, {"error": str(e)})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": str(e)})
 
   # TODO: Use `_unique_basename` instead.
   def _get_config_fingerprint(self, experiment_config: OrderedDict,
@@ -212,8 +217,6 @@ def run_single_config(self):
             accumulated_metrics[k] = []
           accumulated_metrics[k].append(v)
 
-    # TODO: Use `experiment_config` and `model_config` when env vars are no
-    # longer included.
     self._save_results(benchmark_experiment.to_dict(),
                        benchmark_model.to_dict(), accumulated_metrics)
 

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ def list_experiment_configs(self):`
`25`	`25`	`"test": ["eval", "train"],`
`26`	`26`	`}`
`27`	`27`
`28`		`- # Apply command line chocies.`
	`28`	`+ # Apply command line choices.`
`29`	`29`	`if self._args.accelerator:`
`30`	`30`	`config_choices["accelerator"] = list(set(self._args.accelerator))`
`31`	`31`	`if self._args.xla:`