From 795dc5748f750a7d633a9c57712de942be4365f8 Mon Sep 17 00:00:00 2001
From: Emilio Cota <ecg@google.com>
Date: Tue, 19 Dec 2023 23:20:35 -0500
Subject: [PATCH] benchmark/experiment_runner: save accelerator_model on error

This commit can be seen as a partial revert of "63455e0cd Unify
the way in which result files are dumped (#6162)". In that
commit we missed that `experiment_cfg` does not have the
`accelerator_model` record. Thus, when a benchmark fails we
do not include that record in the JSONL file, and therefore
resuming a run doesn't work because the failing entry is not
recognized (note that when checking whether to resume we
compare the JSONL entry against `benchmark_experiment`, which
does have `accelerator_model`).

We could fix this two ways: (1) always save `benchmark_experiment`,
not only on success, or (2) add `accelerator_model` to
experiment_config.

I've chosen to go with (1) since that's what we were doing
before 63455e0cd.
---
 benchmarks/benchmark_experiment.py |  2 +-
 benchmarks/experiment_runner.py    | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py
index e32fc4d633c4..2b48c7e03de4 100644
--- a/benchmarks/benchmark_experiment.py
+++ b/benchmarks/benchmark_experiment.py
@@ -25,7 +25,7 @@ def list_experiment_configs(self):
         "test": ["eval", "train"],
     }
 
-    # Apply command line chocies.
+    # Apply command line choices.
     if self._args.accelerator:
       config_choices["accelerator"] = list(set(self._args.accelerator))
     if self._args.xla:
diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py
index 2fb618b72d50..54ed8a5bc130 100644
--- a/benchmarks/experiment_runner.py
+++ b/benchmarks/experiment_runner.py
@@ -113,7 +113,8 @@ def generate_and_run_all_configs(self):
         if not self.model_loader.is_compatible(benchmark_model,
                                                benchmark_experiment):
           logger.warning("SKIP incompatible model and experiment configs.")
-          self._save_results(experiment_cfg, model_cfg, {"error": "SKIP"})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": "SKIP"})
           continue
 
         # Compose child process environment.
@@ -157,17 +158,21 @@ def generate_and_run_all_configs(self):
         except subprocess.TimeoutExpired as e:
           self._fwd_captured_stdout_stderr(e.stdout, e.stderr)
           logger.error("TIMEOUT")
-          self._save_results(experiment_cfg, model_cfg, {"error": str(e)})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": str(e)})
         except subprocess.CalledProcessError as e:
           self._fwd_captured_stdout_stderr(e.stdout, e.stderr)
           logger.error("ERROR in subprocess")
-          self._save_results(experiment_cfg, model_cfg, {"error": e.stderr})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": e.stderr})
         except subprocess.SubprocessError as e:
           logger.error("ERROR when launching child process")
-          self._save_results(experiment_cfg, model_cfg, {"error": str(e)})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": str(e)})
         except ValueError as e:
           logger.error(f"ERROR {e}")
-          self._save_results(experiment_cfg, model_cfg, {"error": str(e)})
+          self._save_results(benchmark_experiment.to_dict(),
+                             benchmark_model.to_dict(), {"error": str(e)})
 
   # TODO: Use `_unique_basename` instead.
   def _get_config_fingerprint(self, experiment_config: OrderedDict,
@@ -210,8 +215,6 @@ def run_single_config(self):
             accumulated_metrics[k] = []
           accumulated_metrics[k].append(v)
 
-    # TODO: Use `experiment_config` and `model_config` when env vars are no
-    # longer included.
     self._save_results(benchmark_experiment.to_dict(),
                        benchmark_model.to_dict(), accumulated_metrics)