From 9bbd933344fb7ea2a5eb54f124268deb67cebaba Mon Sep 17 00:00:00 2001 From: runame Date: Fri, 3 Nov 2023 17:34:44 +0100 Subject: [PATCH 1/5] Remove tabulate requirement --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 4c2d9e6d3..a7ce5ebb2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,6 @@ install_requires = absl-py==1.4.0 numpy>=1.23 pandas>=2.0.1 - tabulate==0.9.0 tensorflow==2.12.0 tensorflow-datasets==4.9.2 tensorflow-probability==0.20.0 From ea2e7fcf4d5555a4bf6eb17cad16ce6cae9ff9d9 Mon Sep 17 00:00:00 2001 From: runame Date: Fri, 3 Nov 2023 17:37:21 +0100 Subject: [PATCH 2/5] Test warnings in get_experiment_df --- scoring/test_scoring_utils.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/scoring/test_scoring_utils.py b/scoring/test_scoring_utils.py index b766a04d7..fbb21958c 100644 --- a/scoring/test_scoring_utils.py +++ b/scoring/test_scoring_utils.py @@ -1,8 +1,11 @@ from absl.testing import absltest -import scoring_utils -TEST_LOGFILE = 'test_data/adamw_fastmri_jax_04-18-2023-13-10-58.log' -TEST_DIR = 'test_data/experiment_dir' +from scoring import scoring_utils +from scoring.scoring import NUM_TRIALS +from scoring.scoring import NUM_WORKLOADS + +TEST_LOGFILE = 'scoring/test_data/adamw_fastmri_jax_04-18-2023-13-10-58.log' +TEST_DIR = 'scoring/test_data/experiment_dir' NUM_EVALS = 18 @@ -14,8 +17,7 @@ def test_get_trials_dict(self): def test_get_trials_df_dict(self): trials_dict = scoring_utils.get_trials_df_dict(TEST_LOGFILE) - for trial in trials_dict: - df = trials_dict[trial] + for df in trials_dict.values(): self.assertEqual(len(df.index), NUM_EVALS) def test_get_trials_df(self): @@ -24,7 +26,18 @@ def test_get_trials_df(self): self.assertEqual(len(df.at['1', column]), NUM_EVALS) def test_get_experiment_df(self): - df = scoring_utils.get_experiment_df(TEST_DIR) + _ = scoring_utils.get_experiment_df(TEST_DIR) + self.assertWarnsRegex( + Warning, + f'There should be {NUM_WORKLOADS} workloads but there are 1.', + scoring_utils.get_experiment_df, + TEST_DIR) + self.assertWarnsRegex( + Warning, + f'There should be {NUM_TRIALS} trials for workload mnist_jax but there ' + 'are only 1.', + scoring_utils.get_experiment_df, + TEST_DIR) if __name__ == '__main__': From 74b961b8a01029c0e7b771b7f6965c528a6b57b2 Mon Sep 17 00:00:00 2001 From: runame Date: Fri, 3 Nov 2023 17:38:42 +0100 Subject: [PATCH 3/5] Add warnings when not all workloads or trials are present --- scoring/scoring_utils.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 37db73dd4..1a15db2f5 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -1,10 +1,14 @@ import json import os import re +import warnings from absl import logging import pandas as pd +from scoring.scoring import NUM_TRIALS +from scoring.scoring import NUM_WORKLOADS + TRIAL_LINE_REGEX = '(.*) --- Tuning run (\d+)/(\d+) ---' METRICS_LINE_REGEX = '(.*) Metrics: ({.*})' TRIAL_DIR_REGEX = 'trial_(\d+)' @@ -103,8 +107,7 @@ def get_trials_df_dict(logfile): """ trials_dict = get_trials_dict(logfile) trials_df_dict = {} - for trial in trials_dict.keys(): - metrics = trials_dict[trial] + for trial, metrics in trials_dict.items(): trials_df_dict[trial] = pd.DataFrame(metrics) return trials_df_dict @@ -156,6 +159,10 @@ def get_experiment_df(experiment_dir): """ df = pd.DataFrame() workload_dirs = os.listdir(experiment_dir) + num_workloads = len(workload_dirs) + if num_workloads != NUM_WORKLOADS: + warnings.warn(f'There should be {NUM_WORKLOADS} workloads but there are ' + f'{num_workloads}.') for workload in workload_dirs: data = { 'workload': workload, @@ -164,6 +171,7 @@ def get_experiment_df(experiment_dir): t for t in os.listdir(os.path.join(experiment_dir, workload)) if re.match(TRIAL_DIR_REGEX, t) ] + workload_df = pd.DataFrame() for trial in trial_dirs: eval_measurements_filepath = os.path.join( experiment_dir, @@ -173,7 +181,7 @@ def get_experiment_df(experiment_dir): ) try: trial_df = pd.read_csv(eval_measurements_filepath) - except FileNotFoundError as e: + except FileNotFoundError: logging.info(f'Could not read {eval_measurements_filepath}') continue data['trial'] = trial @@ -181,5 +189,10 @@ def get_experiment_df(experiment_dir): values = trial_df[column].to_numpy() data[column] = values trial_df = pd.DataFrame([data]) - df = pd.concat([df, trial_df], ignore_index=True) + workload_df = pd.concat([workload_df, trial_df], ignore_index=True) + num_trials = len(workload_df) + if num_trials != NUM_TRIALS: + warnings.warn(f'There should be {NUM_TRIALS} trials for workload ' + f'{workload} but there are only {num_trials}.') + df = pd.concat([df, workload_df], ignore_index=True) return df From c3a6f43428619622238257e3b3b5817086d10a04 Mon Sep 17 00:00:00 2001 From: runame Date: Fri, 3 Nov 2023 17:39:42 +0100 Subject: [PATCH 4/5] Fix bugs in scoring calculation --- scoring/scoring.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/scoring/scoring.py b/scoring/scoring.py index 7e52bd08c..dba254233 100644 --- a/scoring/scoring.py +++ b/scoring/scoring.py @@ -40,6 +40,12 @@ WORKLOADS = workloads_registry.WORKLOADS WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)' BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/' +# These global variables have to be set according to the current set of +# workloads and rules for the scoring to be correct. +# We do not use the workload registry since it contains test and development +# workloads as well. +NUM_WORKLOADS = 8 +NUM_TRIALS = 5 MIN_EVAL_METRICS = [ 'ce_loss', @@ -133,9 +139,10 @@ def get_index_that_reaches_target(workload_df, # Remove trials that never reach the target target_reached = target_reached[target_reached.apply(np.any)] - # If we have no trials that have reached the target, return -1. Else, return - # the eval index of the earliest point the target is reached. - if target_reached.empty: + # If less than 3 trials reach the target, the submission will be scored as + # missing the target on this workload; return -1. Else, return the eval index + # of the earliest point the target is reached. + if len(target_reached) < 3: return -1, -1 else: index_reached = target_reached.apply(np.argmax) @@ -287,7 +294,7 @@ def compute_performance_profiles(results, np.log10(min_tau), np.log10(max_tau), num=num_points, base=10.0) def rho(r, tau): - return (r <= tau).sum(axis=1) / len(r.columns) + return (r <= tau).sum(axis=1) / NUM_WORKLOADS perf_df = pd.concat([rho(df, tau) for tau in points], axis=1) From 4151e09c43d28a7431f2844603161a74a5469e3f Mon Sep 17 00:00:00 2001 From: runame Date: Fri, 3 Nov 2023 17:40:19 +0100 Subject: [PATCH 5/5] Fix imports --- scoring/score_submission.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scoring/score_submission.py b/scoring/score_submission.py index 42a605dac..e8a6ac010 100644 --- a/scoring/score_submission.py +++ b/scoring/score_submission.py @@ -5,8 +5,7 @@ from absl import logging import scoring_utils -from algorithmic_efficiency import workloads -import scoring +from scoring import scoring flags.DEFINE_string( 'experiment_path',