From c4c72324b307af4becb4f1c5428c8bc3293cda69 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Sat, 4 Nov 2023 01:04:09 +0000 Subject: [PATCH 1/8] rename scoring module to fix import errors --- scoring/{scoring.py => score_profile.py} | 24 ++++++++++----- scoring/score_submission.py | 38 ++++++++++++++---------- scoring/scoring_utils.py | 19 +++++------- setup.cfg | 1 + 4 files changed, 47 insertions(+), 35 deletions(-) rename scoring/{scoring.py => score_profile.py} (94%) diff --git a/scoring/scoring.py b/scoring/score_profile.py similarity index 94% rename from scoring/scoring.py rename to scoring/score_profile.py index dba254233..713ac87d2 100644 --- a/scoring/scoring.py +++ b/scoring/score_profile.py @@ -25,7 +25,7 @@ The keys in this dictionary should match the workload identifiers used in the dictionary of submissions. """ - +from absl import logging import itertools import operator import os @@ -153,7 +153,8 @@ def get_index_that_reaches_target(workload_df, def get_times_for_submission(submission, submission_tag, time_col='global_step', - verbosity=1): + verbosity=1, + self_tuning_ruleset=False): """Get times to target for each workload in a submission. Args: @@ -168,8 +169,15 @@ def get_times_for_submission(submission, """ workloads = [] submission_name = submission_tag.split('.')[1] - + num_workloads = len(submission.groupby('workload')) + if num_workloads != NUM_WORKLOADS: + logging.warning(f'Expecting {NUM_WORKLOADS} workloads ' + f'but found {num_workloads} workloads.') for workload, group in submission.groupby('workload'): + num_trials = len(group) + if num_trials != NUM_TRIALS and not self_tuning_ruleset: + logging.warning(f'Expecting {NUM_TRIALS} trials for workload ' + f'{workload} but found {num_trials} trials.') workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1) framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2) workload_metadata = WORKLOADS[workload_name] @@ -250,21 +258,21 @@ def compute_performance_profiles(results, dfs = [] for submission_tag, result in results.items(): - print(f'\nComputing performance profile with respect to `{time_col}` for ' + logging.info(f'\nComputing performance profile with respect to `{time_col}` for ' f'{submission_tag}') dfs.append( get_times_for_submission(result, submission_tag, time_col, verbosity)) df = pd.concat(dfs) if verbosity > 0: - print(f'\n`{time_col}` to reach target:') + logging.info('\n`{time_col}` to reach target:') with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): - print(df) + logging.info(df) # Divide by the fastest. if reference_submission_tag is None: @@ -273,14 +281,14 @@ def compute_performance_profiles(results, df.update(df.div(df.loc[reference_submission_tag, :], axis=1)) if verbosity > 0: - print(f'\n`{time_col}` to reach target normalized to best:') + logging.info('\n`{time_col}` to reach target normalized to best:') with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): - print(df) + logging.info(df) # If no max_tau is supplied, choose the value of tau that would plot all non # inf or nan data. diff --git a/scoring/score_submission.py b/scoring/score_submission.py index e8a6ac010..211de3db6 100644 --- a/scoring/score_submission.py +++ b/scoring/score_submission.py @@ -5,7 +5,8 @@ from absl import logging import scoring_utils -from scoring import scoring +from scoring import score_profile +import tabulate flags.DEFINE_string( 'experiment_path', @@ -15,6 +16,9 @@ flags.DEFINE_string('output_dir', 'scoring_results', 'Path to save performance profile table and plot.') +flags.DEFINE_boolean('compute_performance_profiles', + True, + 'Whether or not to compute the performance profiles.') FLAGS = flags.FLAGS @@ -23,21 +27,23 @@ def main(_): results = { FLAGS.submission_tag: df, } - performance_profile_df = scoring.compute_performance_profiles( - results, - time_col='score', - min_tau=1.0, - max_tau=None, - reference_submission_tag=None, - num_points=100, - scale='linear', - verbosity=0) - if not os.path.exists(FLAGS.output_dir): - os.mkdir(FLAGS.output_dir) - scoring.plot_performance_profiles( - performance_profile_df, 'score', save_dir=FLAGS.output_dir) - - logging.info(performance_profile_df) + + if FLAGS.compute_performance_profiles: + performance_profile_df = score_profile.compute_performance_profiles( + results, + time_col='score', + min_tau=1.0, + max_tau=None, + reference_submission_tag=None, + num_points=100, + scale='linear', + verbosity=0) + if not os.path.exists(FLAGS.output_dir): + os.mkdir(FLAGS.output_dir) + score_profile.plot_performance_profiles( + performance_profile_df, 'score', save_dir=FLAGS.output_dir) + + logging.info(performance_profile_df) if __name__ == '__main__': diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 1a15db2f5..86b0c36b6 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -1,13 +1,14 @@ import json import os import re -import warnings from absl import logging import pandas as pd -from scoring.scoring import NUM_TRIALS -from scoring.scoring import NUM_WORKLOADS + +from algorithmic_efficiency import spec +from scoring.score_profile import NUM_TRIALS +from scoring.score_profile import NUM_WORKLOADS TRIAL_LINE_REGEX = '(.*) --- Tuning run (\d+)/(\d+) ---' METRICS_LINE_REGEX = '(.*) Metrics: ({.*})' @@ -137,7 +138,7 @@ def get_trials_df(logfile): def get_experiment_df(experiment_dir): """Gets a df of per trial results from an experiment dir. The output df can be provided as input to - scoring.compute_performance_profiles. + score_profilecompute_performance_profiles. Args: experiment_dir: path to experiment directory containing results for workloads. @@ -160,9 +161,9 @@ def get_experiment_df(experiment_dir): df = pd.DataFrame() workload_dirs = os.listdir(experiment_dir) num_workloads = len(workload_dirs) - if num_workloads != NUM_WORKLOADS: - warnings.warn(f'There should be {NUM_WORKLOADS} workloads but there are ' - f'{num_workloads}.') + # if num_workloads != NUM_WORKLOADS: + # warnings.warn(f'There should be {NUM_WORKLOADS} workloads but there are ' + # f'{num_workloads}.') for workload in workload_dirs: data = { 'workload': workload, @@ -190,9 +191,5 @@ def get_experiment_df(experiment_dir): data[column] = values trial_df = pd.DataFrame([data]) workload_df = pd.concat([workload_df, trial_df], ignore_index=True) - num_trials = len(workload_df) - if num_trials != NUM_TRIALS: - warnings.warn(f'There should be {NUM_TRIALS} trials for workload ' - f'{workload} but there are only {num_trials}.') df = pd.concat([df, workload_df], ignore_index=True) return df diff --git a/setup.cfg b/setup.cfg index a7ce5ebb2..9aa4ffb5f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,6 +45,7 @@ install_requires = psutil==5.9.5 clu==0.0.7 matplotlib>=3.7.2 + tabulate==0.9.0 python_requires = >=3.8 From 26f962c2780578992e5ead74c41dd93537660e99 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Sat, 4 Nov 2023 02:06:35 +0000 Subject: [PATCH 2/8] refactor scoring --- scoring/score_profile.py | 20 +++----------------- scoring/score_submission.py | 15 +++++++++++---- scoring/scoring_utils.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/scoring/score_profile.py b/scoring/score_profile.py index 713ac87d2..bf6f01298 100644 --- a/scoring/score_profile.py +++ b/scoring/score_profile.py @@ -34,6 +34,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +import scoring_utils import algorithmic_efficiency.workloads.workloads as workloads_registry @@ -178,23 +179,8 @@ def get_times_for_submission(submission, if num_trials != NUM_TRIALS and not self_tuning_ruleset: logging.warning(f'Expecting {NUM_TRIALS} trials for workload ' f'{workload} but found {num_trials} trials.') - workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1) - framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2) - workload_metadata = WORKLOADS[workload_name] - - # Extend path according to framework. - workload_metadata['workload_path'] = os.path.join( - BASE_WORKLOADS_DIR, - workload_metadata['workload_path'] + f'{framework}', - 'workload.py') - workload_init_kwargs = {} - workload_obj = workloads_registry.import_workload( - workload_path=workload_metadata['workload_path'], - workload_class_name=workload_metadata['workload_class_name'], - workload_init_kwargs=workload_init_kwargs) - metric_name = workload_obj.target_metric_name - validation_metric = f'validation/{metric_name}' - validation_target = workload_obj.validation_target_value + validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) + validation_target = validation_target trial_idx, time_idx = get_index_that_reaches_target( group, validation_metric, validation_target) diff --git a/scoring/score_submission.py b/scoring/score_submission.py index 211de3db6..bd8e44f3b 100644 --- a/scoring/score_submission.py +++ b/scoring/score_submission.py @@ -4,9 +4,8 @@ from absl import flags from absl import logging import scoring_utils - +from tabulate import tabulate from scoring import score_profile -import tabulate flags.DEFINE_string( 'experiment_path', @@ -27,6 +26,14 @@ def main(_): results = { FLAGS.submission_tag: df, } + table = tabulate(df, headers='keys', tablefmt='psql') + logging.info(df) + for workload, group in df.groupby('workload'): + target_metric_name, target_metric_value = scoring_utils.get_workload_validation_target(workload) + print(target_metric_name) + print(target_metric_value) + # print(workload) + # print(group) if FLAGS.compute_performance_profiles: performance_profile_df = score_profile.compute_performance_profiles( @@ -42,8 +49,8 @@ def main(_): os.mkdir(FLAGS.output_dir) score_profile.plot_performance_profiles( performance_profile_df, 'score', save_dir=FLAGS.output_dir) - - logging.info(performance_profile_df) + perf_df = tabulate(performance_profile_df.T, headers='keys', tablefmt='psql') + logging.info(f'Performance profile:\n {perf_df}') if __name__ == '__main__': diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 86b0c36b6..bc0703a4c 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -1,6 +1,7 @@ import json import os import re +import copy from absl import logging import pandas as pd @@ -9,12 +10,16 @@ from algorithmic_efficiency import spec from scoring.score_profile import NUM_TRIALS from scoring.score_profile import NUM_WORKLOADS +import algorithmic_efficiency.workloads.workloads as workloads_registry TRIAL_LINE_REGEX = '(.*) --- Tuning run (\d+)/(\d+) ---' METRICS_LINE_REGEX = '(.*) Metrics: ({.*})' TRIAL_DIR_REGEX = 'trial_(\d+)' MEASUREMENTS_FILENAME = 'eval_measurements.csv' +WORKLOADS = workloads_registry.WORKLOADS +WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)' +BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/' #### File IO helper functions ### def get_logfile_paths(logdir): @@ -193,3 +198,30 @@ def get_experiment_df(experiment_dir): workload_df = pd.concat([workload_df, trial_df], ignore_index=True) df = pd.concat([df, workload_df], ignore_index=True) return df + + +## Get workload properties +def get_workload_validation_target(workload): + """Returns workload target metric name and value. + """ + print(workload) + workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1) + framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2) + workload_metadata = copy.copy(WORKLOADS[workload_name]) + print(workload_metadata) + + # Extend path according to framework. + workload_metadata['workload_path'] = os.path.join( + BASE_WORKLOADS_DIR, + workload_metadata['workload_path'] + f'{framework}', + 'workload.py') + workload_init_kwargs = {} + print(workload_metadata['workload_path']) + workload_obj = workloads_registry.import_workload( + workload_path=workload_metadata['workload_path'], + workload_class_name=workload_metadata['workload_class_name'], + workload_init_kwargs=workload_init_kwargs) + metric_name = workload_obj.target_metric_name + validation_metric = f'validation/{metric_name}' + validation_target = workload_obj.validation_target_value + return validation_metric, validation_target \ No newline at end of file From aa75f3eaeb4ca71957311b4d6c66f8a325398b98 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Wed, 8 Nov 2023 01:03:34 +0000 Subject: [PATCH 3/8] add scoring table --- scoring/score_profile.py | 368 ------------------------------------ scoring/score_submission.py | 59 ++++-- scoring/scoring_utils.py | 13 +- 3 files changed, 49 insertions(+), 391 deletions(-) delete mode 100644 scoring/score_profile.py diff --git a/scoring/score_profile.py b/scoring/score_profile.py deleted file mode 100644 index bf6f01298..000000000 --- a/scoring/score_profile.py +++ /dev/null @@ -1,368 +0,0 @@ -"""Performance and scoring code. - -The three primary methods exposed by the `scoring` module are: -- `compute_performance_profiles`: generates performance profiles for a set of - submissions over all workloads as defined in the scoring rules: - https://github.com/mlcommons/algorithmic-efficiency/blob/main/RULES.md -- `compute_leaderboard_score`: computes final scores from performance profiles. -- `plot_performance_profiles`: plot performance profiles for a set of - submissions. - -The two primary inputs to `compute_performance_profiles` are -1. A dictionary of pandas DataFrames, where each key is a globally unique - identifier for a submission and each value is a DataFrame containing one row - per trial per workload in that submission. At minimum, this DataFrame should - include a column of np.arrays indicating time (e.g., 'global_step'), a column - of np.arrays indicating performance (e.g., 'validation/accuracy') for each - workload and a column 'workload' that indicates the workload identifier. -2. A dictionary of workload metadata describing each workload in the form: - { - 'workload_identifier': { - 'target': VALUE, - 'metric': 'validation/error_rate', - } - } - The keys in this dictionary should match the workload identifiers used in - the dictionary of submissions. -""" -from absl import logging -import itertools -import operator -import os -import re - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import scoring_utils - -import algorithmic_efficiency.workloads.workloads as workloads_registry - -WORKLOADS = workloads_registry.WORKLOADS -WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)' -BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/' -# These global variables have to be set according to the current set of -# workloads and rules for the scoring to be correct. -# We do not use the workload registry since it contains test and development -# workloads as well. -NUM_WORKLOADS = 8 -NUM_TRIALS = 5 - -MIN_EVAL_METRICS = [ - 'ce_loss', - 'error_rate', - 'ctc_loss', - 'wer', - 'l1_loss', - 'loss', -] - -MAX_EVAL_METRICS = ['mean_average_precision', 'ssim', 'accuracy', 'bleu'] - - -def generate_eval_cols(metrics): - splits = ['train', 'validation'] - return [f'{split}/{col}' for split, col in itertools.product(splits, metrics)] - - -MINIMIZE_REGISTRY = {k: True for k in generate_eval_cols(MIN_EVAL_METRICS)} -MINIMIZE_REGISTRY.update( - {k: False for k in generate_eval_cols(MAX_EVAL_METRICS)}) -MINIMIZE_REGISTRY['train_cost'] = True - - -def check_if_minimized(col_name): - """Guess if the eval metric column name should be minimized or not.""" - for prefix in ['best_', 'final_']: - col_name = col_name.replace(prefix, '') - for col in MINIMIZE_REGISTRY: - if col in col_name: - return MINIMIZE_REGISTRY[col] - - raise ValueError(f'Column {col_name} not found in `MINIMIZE_REGISTRY` as ' - 'either a column name or a substring of a column name.') - - -def get_index_that_reaches_best(workload_df, metric_col): - """Get the eval index in which a workload reaches the best on metric_col. - - Args: - workload_df: A subset of a submission's trials DataFrame that - includes only the trials in a single workload. - metric_col: Name of array column in workload_df - (e.g., `validation/l1_loss`). - - Returns: - Tuple of trial index, time index, and best value where the workload - reached the best metric_col. Return (-1, -1, -1) if no undiverged trials. - """ - is_minimized = check_if_minimized(metric_col) - series = workload_df[metric_col] - - series = series[series != np.nan] - - op = np.min if is_minimized else np.max - best = series.apply(op) - - op_idx = np.argmin if is_minimized else np.argmax - best_idx = series.apply(op_idx) - - if best.empty: - return -1, -1, -1 - else: - trial = best.idxmin() if is_minimized else best.idxmax() - return trial, best_idx[trial], best[trial] - - -def get_index_that_reaches_target(workload_df, - validation_metric, - validation_target): - """Get the eval index in which a workload reaches the target metric_col. - - Args: - workload_df: A subset of a submission's trials DataFrame that - includes only the trials in a single workload. - metric_col: Name of array column in workload_df (e.g. `validation/l1_loss`). - target: Target value for metric_col. - - Returns: - Tuple of trial index and time index where the workload reached the target - metric_col. Return (-1, -1) if not reached. - """ - is_minimized = check_if_minimized(validation_metric) - validation_series = workload_df[validation_metric] - validation_series = validation_series[validation_series != np.nan] - - op = operator.le if is_minimized else operator.ge - validation_target_reached = validation_series.apply( - lambda x: op(x, validation_target)) - target_reached = pd.Series(validation_target_reached) - # Remove trials that never reach the target - target_reached = target_reached[target_reached.apply(np.any)] - - # If less than 3 trials reach the target, the submission will be scored as - # missing the target on this workload; return -1. Else, return the eval index - # of the earliest point the target is reached. - if len(target_reached) < 3: - return -1, -1 - else: - index_reached = target_reached.apply(np.argmax) - trial = index_reached.idxmin() - return trial, index_reached[trial] - - -def get_times_for_submission(submission, - submission_tag, - time_col='global_step', - verbosity=1, - self_tuning_ruleset=False): - """Get times to target for each workload in a submission. - - Args: - submission: A DataFrame containing one row for each trial in each workload - for a given submission. - submission_tag: Globally unique identified for a submission. - time_col: A string indicating which column to use for time. - verbosity: Debug level of information; choice of (1, 2, 3). - - Returns: - DataFrame with columns `submission`, `workload`, and time_col. - """ - workloads = [] - submission_name = submission_tag.split('.')[1] - num_workloads = len(submission.groupby('workload')) - if num_workloads != NUM_WORKLOADS: - logging.warning(f'Expecting {NUM_WORKLOADS} workloads ' - f'but found {num_workloads} workloads.') - for workload, group in submission.groupby('workload'): - num_trials = len(group) - if num_trials != NUM_TRIALS and not self_tuning_ruleset: - logging.warning(f'Expecting {NUM_TRIALS} trials for workload ' - f'{workload} but found {num_trials} trials.') - validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) - validation_target = validation_target - - trial_idx, time_idx = get_index_that_reaches_target( - group, validation_metric, validation_target) - if time_idx > -1: - time_val = group[time_col].loc[trial_idx][time_idx] - else: - time_val = float('inf') - - workloads.append({ - 'submission': submission_name, - 'workload': workload, - time_col: time_val, - }) - - if verbosity > 0: - print(' hparams:') - if time_idx > -1: - hparams = group.loc[trial_idx, 'hparams'] - for key, val in hparams.items(): - print(f' - {key}: {val}') - else: - print('Submission did not reach target') - df = pd.DataFrame.from_records(workloads) - df = df.pivot(index='submission', columns='workload', values=time_col) - - return df - - -def compute_performance_profiles(results, - time_col='global_step', - min_tau=1.0, - max_tau=None, - reference_submission_tag=None, - num_points=100, - scale='linear', - verbosity=0): - """Compute performance profiles for a set of submission by some time column. - - Args: - results: Dict where keys are submission names and values are a DataFrame of - trials where each row is a trial and each column is a field for a given - trial. Results should contain keys for each workload's metric, time_col, - 'workload'. See file header comment for more details. - time_col: A string indicating which column to use for time. - min_tau: Minimum tau to use for plotting. - max_tau: Maximum tau to use for plotting. - reference_submission_tag: If specified, must be an element of - `submission_tags`. Used as the denominator for computing tau. Otherwise, - the minimum time to target is computed per-workload and used as the - denominator for tau. - num_points: Number of points to use for plotting. - scale: Linear or log scale for the x-axis. - verbosity: Debug level of information; choice of (1, 2, 3). - - Returns: - A DataFrame of performance profiles for the set of submissions given in - `results` based on `time_col`. Each row represents a submission and each - column represents rho(tau) for some value of tau (df.volumns are the - different values of tau). - """ - dfs = [] - - for submission_tag, result in results.items(): - logging.info(f'\nComputing performance profile with respect to `{time_col}` for ' - f'{submission_tag}') - dfs.append( - get_times_for_submission(result, submission_tag, time_col, verbosity)) - df = pd.concat(dfs) - - if verbosity > 0: - logging.info('\n`{time_col}` to reach target:') - with pd.option_context('display.max_rows', - None, - 'display.max_columns', - None, - 'display.width', - 1000): - logging.info(df) - - # Divide by the fastest. - if reference_submission_tag is None: - df.update(df.div(df.min(axis=0), axis=1)) - else: - df.update(df.div(df.loc[reference_submission_tag, :], axis=1)) - - if verbosity > 0: - logging.info('\n`{time_col}` to reach target normalized to best:') - with pd.option_context('display.max_rows', - None, - 'display.max_columns', - None, - 'display.width', - 1000): - logging.info(df) - - # If no max_tau is supplied, choose the value of tau that would plot all non - # inf or nan data. - if max_tau is None: - max_tau = df.replace(float('inf'), -1).replace(np.nan, -1).values.max() - - if scale == 'linear': - points = np.linspace(min_tau, max_tau, num=num_points) - elif scale == 'log': - points = np.logspace( - np.log10(min_tau), np.log10(max_tau), num=num_points, base=10.0) - - def rho(r, tau): - return (r <= tau).sum(axis=1) / NUM_WORKLOADS - - perf_df = pd.concat([rho(df, tau) for tau in points], axis=1) - - cols = points - if scale == 'log': - cols = np.log10(points) - perf_df.columns = cols - - return perf_df - - -def compute_leaderboard_score(df, normalize=False): - """Compute leaderboard score by taking integral of performance profile. - - Args: - df: pd.DataFrame returned from `compute_performance_profiles`. - normalize: divide by the range of the performance profile's tau. - - Returns: - pd.DataFrame with one column of scores indexed by submission. - """ - scores = np.trapz(df, x=df.columns) - if normalize: - scores /= df.columns.max() - df.columns.min() - return pd.DataFrame(scores, columns=['score'], index=df.index) - - -def maybe_save_figure(save_dir, name, ext='pdf'): - """Maybe save the current matplotlib.pyplot figure.""" - if save_dir: - path = os.path.join(save_dir, f'{name}.{ext}') - with open(path, 'wb') as fout: - plt.savefig(fout, format=ext) - - -def maybe_save_df_to_csv(save_dir, df, path, **to_csv_kwargs): - if save_dir: - path = os.path.join(save_dir, path) - with open(path, 'w') as fout: - df.to_csv(fout, **to_csv_kwargs) - - -def plot_performance_profiles(perf_df, - df_col, - scale='linear', - save_dir=None, - figsize=(30, 10), - font_size=18): - """Plot performance profiles. - - Args: - perf_df: A DataFrame of performance profiles where each row represents a - submission and each column represents rho(tau) for some value of tau - (df.volumns are the different values of tau). - df_col: The column in the original submission results DataFrame used to - compute the performance profile. This argument is only used for axis - and file naming. - scale: Whether or not the data in perf_df is on a linear or log scale. This - argument is only used for axis and file naming. - save_dir: If a valid directory is provided, save both the plot and perf_df - to the provided directory. - figsize: The size of the plot. - font_size: The font size to use for the legend. - - Returns: - None. If a valid save_dir is provided, save both the plot and perf_df. - """ - fig = perf_df.T.plot(figsize=figsize) - df_col_display = f'log10({df_col})' if scale == 'log' else df_col - fig.set_xlabel( - f'Ratio of `{df_col_display}` to best submission', size=font_size) - fig.set_ylabel('Proportion of workloads', size=font_size) - fig.legend(prop={'size': font_size}, bbox_to_anchor=(1.0, 1.0)) - maybe_save_figure(save_dir, f'performance_profile_by_{df_col_display}') - maybe_save_df_to_csv(save_dir, - perf_df, - f'performance_profile_{df_col_display}.csv') diff --git a/scoring/score_submission.py b/scoring/score_submission.py index bd8e44f3b..ad6573589 100644 --- a/scoring/score_submission.py +++ b/scoring/score_submission.py @@ -3,9 +3,14 @@ from absl import app from absl import flags from absl import logging +import pandas as pd +import numpy as np +import operator + import scoring_utils from tabulate import tabulate -from scoring import score_profile +from scoring import performance_profile +from scoring.performance_profile import check_if_minimized flags.DEFINE_string( 'experiment_path', @@ -16,27 +21,54 @@ 'scoring_results', 'Path to save performance profile table and plot.') flags.DEFINE_boolean('compute_performance_profiles', - True, - 'Whether or not to compute the performance profiles.') + False, + 'Whether or not to compute the performance profiles.') FLAGS = flags.FLAGS +def get_summary_df(workload, workload_df): + validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) + is_minimized = check_if_minimized(validation_metric) + target_op = operator.le if is_minimized else operator.ge + best_op = min if is_minimized else max + idx_op = np.argmin if is_minimized else np.argmax + + summary_df = pd.DataFrame() + summary_df['workload'] = workload_df['workload'] + summary_df['trial'] = workload_df['trial'] + summary_df['target metric name'] = validation_metric + summary_df['target metric value'] = validation_target + + summary_df['target reached'] = workload_df[validation_metric].apply( + lambda x: target_op(x, validation_target)).apply(np.any) + summary_df['best target'] = workload_df[validation_metric].apply( + lambda x: best_op(x)) + workload_df['index best eval'] = workload_df[validation_metric].apply( + lambda x: idx_op(x)) + summary_df['submission time'] = workload_df.apply( + lambda x: x['accumulated_submission_time'][x['index best eval']], axis=1) + summary_df['score'] = summary_df.apply( + lambda x: x['submission time'] if x['target reached'] else np.inf, axis=1) + + return summary_df + + def main(_): df = scoring_utils.get_experiment_df(FLAGS.experiment_path) results = { FLAGS.submission_tag: df, } - table = tabulate(df, headers='keys', tablefmt='psql') - logging.info(df) + + dfs = [] for workload, group in df.groupby('workload'): - target_metric_name, target_metric_value = scoring_utils.get_workload_validation_target(workload) - print(target_metric_name) - print(target_metric_value) - # print(workload) - # print(group) + summary_df = get_summary_df(workload, group) + dfs.append(summary_df) + + df = pd.concat(dfs) + print(tabulate(df, headers='keys', tablefmt='psql')) if FLAGS.compute_performance_profiles: - performance_profile_df = score_profile.compute_performance_profiles( + performance_profile_df = performance_profile.compute_performance_profiles( results, time_col='score', min_tau=1.0, @@ -47,9 +79,10 @@ def main(_): verbosity=0) if not os.path.exists(FLAGS.output_dir): os.mkdir(FLAGS.output_dir) - score_profile.plot_performance_profiles( + performance_profile.plot_performance_profiles( performance_profile_df, 'score', save_dir=FLAGS.output_dir) - perf_df = tabulate(performance_profile_df.T, headers='keys', tablefmt='psql') + perf_df = tabulate( + performance_profile_df.T, headers='keys', tablefmt='psql') logging.info(f'Performance profile:\n {perf_df}') diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index bc0703a4c..99a58747a 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -6,10 +6,7 @@ from absl import logging import pandas as pd - from algorithmic_efficiency import spec -from scoring.score_profile import NUM_TRIALS -from scoring.score_profile import NUM_WORKLOADS import algorithmic_efficiency.workloads.workloads as workloads_registry TRIAL_LINE_REGEX = '(.*) --- Tuning run (\d+)/(\d+) ---' @@ -21,6 +18,7 @@ WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)' BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/' + #### File IO helper functions ### def get_logfile_paths(logdir): """Gets all files ending in .log in logdir @@ -166,9 +164,6 @@ def get_experiment_df(experiment_dir): df = pd.DataFrame() workload_dirs = os.listdir(experiment_dir) num_workloads = len(workload_dirs) - # if num_workloads != NUM_WORKLOADS: - # warnings.warn(f'There should be {NUM_WORKLOADS} workloads but there are ' - # f'{num_workloads}.') for workload in workload_dirs: data = { 'workload': workload, @@ -203,12 +198,10 @@ def get_experiment_df(experiment_dir): ## Get workload properties def get_workload_validation_target(workload): """Returns workload target metric name and value. - """ - print(workload) + """ workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1) framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2) workload_metadata = copy.copy(WORKLOADS[workload_name]) - print(workload_metadata) # Extend path according to framework. workload_metadata['workload_path'] = os.path.join( @@ -224,4 +217,4 @@ def get_workload_validation_target(workload): metric_name = workload_obj.target_metric_name validation_metric = f'validation/{metric_name}' validation_target = workload_obj.validation_target_value - return validation_metric, validation_target \ No newline at end of file + return validation_metric, validation_target From 217e5041b3b39fd7649f6b8dd19136fd2b14b84e Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Wed, 8 Nov 2023 01:16:28 +0000 Subject: [PATCH 4/8] linting --- scoring/score_submission.py | 6 +++--- scoring/scoring_utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scoring/score_submission.py b/scoring/score_submission.py index ad6573589..f284044a3 100644 --- a/scoring/score_submission.py +++ b/scoring/score_submission.py @@ -1,14 +1,14 @@ +import operator import os from absl import app from absl import flags from absl import logging -import pandas as pd import numpy as np -import operator - +import pandas as pd import scoring_utils from tabulate import tabulate + from scoring import performance_profile from scoring.performance_profile import check_if_minimized diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 99a58747a..70fb9b05f 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -1,7 +1,7 @@ +import copy import json import os import re -import copy from absl import logging import pandas as pd From 6a9048d47f8941192388f01e04ddbccc8d49e747 Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Wed, 8 Nov 2023 01:51:49 +0000 Subject: [PATCH 5/8] remove unused import --- scoring/scoring_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 70fb9b05f..45665c011 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -6,7 +6,6 @@ from absl import logging import pandas as pd -from algorithmic_efficiency import spec import algorithmic_efficiency.workloads.workloads as workloads_registry TRIAL_LINE_REGEX = '(.*) --- Tuning run (\d+)/(\d+) ---' From c668c1cee4a81a6b362dadb8d9d8585f6ed1a24a Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Wed, 8 Nov 2023 20:02:34 +0000 Subject: [PATCH 6/8] add performance_profile.py --- scoring/performance_profile.py | 369 +++++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100644 scoring/performance_profile.py diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py new file mode 100644 index 000000000..3aa531e26 --- /dev/null +++ b/scoring/performance_profile.py @@ -0,0 +1,369 @@ +"""Performance and scoring code. + +The three primary methods exposed by the `scoring` module are: +- `compute_performance_profiles`: generates performance profiles for a set of + submissions over all workloads as defined in the scoring rules: + https://github.com/mlcommons/algorithmic-efficiency/blob/main/RULES.md +- `compute_leaderboard_score`: computes final scores from performance profiles. +- `plot_performance_profiles`: plot performance profiles for a set of + submissions. + +The two primary inputs to `compute_performance_profiles` are +1. A dictionary of pandas DataFrames, where each key is a globally unique + identifier for a submission and each value is a DataFrame containing one row + per trial per workload in that submission. At minimum, this DataFrame should + include a column of np.arrays indicating time (e.g., 'global_step'), a column + of np.arrays indicating performance (e.g., 'validation/accuracy') for each + workload and a column 'workload' that indicates the workload identifier. +2. A dictionary of workload metadata describing each workload in the form: + { + 'workload_identifier': { + 'target': VALUE, + 'metric': 'validation/error_rate', + } + } + The keys in this dictionary should match the workload identifiers used in + the dictionary of submissions. +""" +import itertools +import operator +import os +import re + +from absl import logging +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import scoring_utils + +import algorithmic_efficiency.workloads.workloads as workloads_registry + +WORKLOADS = workloads_registry.WORKLOADS +WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)' +BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/' +# These global variables have to be set according to the current set of +# workloads and rules for the scoring to be correct. +# We do not use the workload registry since it contains test and development +# workloads as well. +NUM_WORKLOADS = 8 +NUM_TRIALS = 5 + +MIN_EVAL_METRICS = [ + 'ce_loss', + 'error_rate', + 'ctc_loss', + 'wer', + 'l1_loss', + 'loss', +] + +MAX_EVAL_METRICS = ['mean_average_precision', 'ssim', 'accuracy', 'bleu'] + + +def generate_eval_cols(metrics): + splits = ['train', 'validation'] + return [f'{split}/{col}' for split, col in itertools.product(splits, metrics)] + + +MINIMIZE_REGISTRY = {k: True for k in generate_eval_cols(MIN_EVAL_METRICS)} +MINIMIZE_REGISTRY.update( + {k: False for k in generate_eval_cols(MAX_EVAL_METRICS)}) +MINIMIZE_REGISTRY['train_cost'] = True + + +def check_if_minimized(col_name): + """Guess if the eval metric column name should be minimized or not.""" + for prefix in ['best_', 'final_']: + col_name = col_name.replace(prefix, '') + for col in MINIMIZE_REGISTRY: + if col in col_name: + return MINIMIZE_REGISTRY[col] + + raise ValueError(f'Column {col_name} not found in `MINIMIZE_REGISTRY` as ' + 'either a column name or a substring of a column name.') + + +def get_index_that_reaches_best(workload_df, metric_col): + """Get the eval index in which a workload reaches the best on metric_col. + + Args: + workload_df: A subset of a submission's trials DataFrame that + includes only the trials in a single workload. + metric_col: Name of array column in workload_df + (e.g., `validation/l1_loss`). + + Returns: + Tuple of trial index, time index, and best value where the workload + reached the best metric_col. Return (-1, -1, -1) if no undiverged trials. + """ + is_minimized = check_if_minimized(metric_col) + series = workload_df[metric_col] + + series = series[series != np.nan] + + op = np.min if is_minimized else np.max + best = series.apply(op) + + op_idx = np.argmin if is_minimized else np.argmax + best_idx = series.apply(op_idx) + + if best.empty: + return -1, -1, -1 + else: + trial = best.idxmin() if is_minimized else best.idxmax() + return trial, best_idx[trial], best[trial] + + +def get_index_that_reaches_target(workload_df, + validation_metric, + validation_target): + """Get the eval index in which a workload reaches the target metric_col. + + Args: + workload_df: A subset of a submission's trials DataFrame that + includes only the trials in a single workload. + metric_col: Name of array column in workload_df (e.g. `validation/l1_loss`). + target: Target value for metric_col. + + Returns: + Tuple of trial index and time index where the workload reached the target + metric_col. Return (-1, -1) if not reached. + """ + is_minimized = check_if_minimized(validation_metric) + validation_series = workload_df[validation_metric] + validation_series = validation_series[validation_series != np.nan] + + op = operator.le if is_minimized else operator.ge + validation_target_reached = validation_series.apply( + lambda x: op(x, validation_target)) + target_reached = pd.Series(validation_target_reached) + # Remove trials that never reach the target + target_reached = target_reached[target_reached.apply(np.any)] + + # If less than 3 trials reach the target, the submission will be scored as + # missing the target on this workload; return -1. Else, return the eval index + # of the earliest point the target is reached. + if len(target_reached) < 3: + return -1, -1 + else: + index_reached = target_reached.apply(np.argmax) + trial = index_reached.idxmin() + return trial, index_reached[trial] + + +def get_times_for_submission(submission, + submission_tag, + time_col='global_step', + verbosity=1, + self_tuning_ruleset=False): + """Get times to target for each workload in a submission. + + Args: + submission: A DataFrame containing one row for each trial in each workload + for a given submission. + submission_tag: Globally unique identified for a submission. + time_col: A string indicating which column to use for time. + verbosity: Debug level of information; choice of (1, 2, 3). + + Returns: + DataFrame with columns `submission`, `workload`, and time_col. + """ + workloads = [] + submission_name = submission_tag.split('.')[1] + num_workloads = len(submission.groupby('workload')) + if num_workloads != NUM_WORKLOADS: + logging.warning(f'Expecting {NUM_WORKLOADS} workloads ' + f'but found {num_workloads} workloads.') + for workload, group in submission.groupby('workload'): + num_trials = len(group) + if num_trials != NUM_TRIALS and not self_tuning_ruleset: + logging.warning(f'Expecting {NUM_TRIALS} trials for workload ' + f'{workload} but found {num_trials} trials.') + validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) + validation_target = validation_target + + trial_idx, time_idx = get_index_that_reaches_target( + group, validation_metric, validation_target) + if time_idx > -1: + time_val = group[time_col].loc[trial_idx][time_idx] + else: + time_val = float('inf') + + workloads.append({ + 'submission': submission_name, + 'workload': workload, + time_col: time_val, + }) + + if verbosity > 0: + print(' hparams:') + if time_idx > -1: + hparams = group.loc[trial_idx, 'hparams'] + for key, val in hparams.items(): + print(f' - {key}: {val}') + else: + print('Submission did not reach target') + df = pd.DataFrame.from_records(workloads) + df = df.pivot(index='submission', columns='workload', values=time_col) + + return df + + +def compute_performance_profiles(results, + time_col='global_step', + min_tau=1.0, + max_tau=None, + reference_submission_tag=None, + num_points=100, + scale='linear', + verbosity=0): + """Compute performance profiles for a set of submission by some time column. + + Args: + results: Dict where keys are submission names and values are a DataFrame of + trials where each row is a trial and each column is a field for a given + trial. Results should contain keys for each workload's metric, time_col, + 'workload'. See file header comment for more details. + time_col: A string indicating which column to use for time. + min_tau: Minimum tau to use for plotting. + max_tau: Maximum tau to use for plotting. + reference_submission_tag: If specified, must be an element of + `submission_tags`. Used as the denominator for computing tau. Otherwise, + the minimum time to target is computed per-workload and used as the + denominator for tau. + num_points: Number of points to use for plotting. + scale: Linear or log scale for the x-axis. + verbosity: Debug level of information; choice of (1, 2, 3). + + Returns: + A DataFrame of performance profiles for the set of submissions given in + `results` based on `time_col`. Each row represents a submission and each + column represents rho(tau) for some value of tau (df.volumns are the + different values of tau). + """ + dfs = [] + + for submission_tag, result in results.items(): + logging.info( + f'\nComputing performance profile with respect to `{time_col}` for ' + f'{submission_tag}') + dfs.append( + get_times_for_submission(result, submission_tag, time_col, verbosity)) + df = pd.concat(dfs) + + if verbosity > 0: + logging.info('\n`{time_col}` to reach target:') + with pd.option_context('display.max_rows', + None, + 'display.max_columns', + None, + 'display.width', + 1000): + logging.info(df) + + # Divide by the fastest. + if reference_submission_tag is None: + df.update(df.div(df.min(axis=0), axis=1)) + else: + df.update(df.div(df.loc[reference_submission_tag, :], axis=1)) + + if verbosity > 0: + logging.info('\n`{time_col}` to reach target normalized to best:') + with pd.option_context('display.max_rows', + None, + 'display.max_columns', + None, + 'display.width', + 1000): + logging.info(df) + + # If no max_tau is supplied, choose the value of tau that would plot all non + # inf or nan data. + if max_tau is None: + max_tau = df.replace(float('inf'), -1).replace(np.nan, -1).values.max() + + if scale == 'linear': + points = np.linspace(min_tau, max_tau, num=num_points) + elif scale == 'log': + points = np.logspace( + np.log10(min_tau), np.log10(max_tau), num=num_points, base=10.0) + + def rho(r, tau): + return (r <= tau).sum(axis=1) / NUM_WORKLOADS + + perf_df = pd.concat([rho(df, tau) for tau in points], axis=1) + + cols = points + if scale == 'log': + cols = np.log10(points) + perf_df.columns = cols + + return perf_df + + +def compute_leaderboard_score(df, normalize=False): + """Compute leaderboard score by taking integral of performance profile. + + Args: + df: pd.DataFrame returned from `compute_performance_profiles`. + normalize: divide by the range of the performance profile's tau. + + Returns: + pd.DataFrame with one column of scores indexed by submission. + """ + scores = np.trapz(df, x=df.columns) + if normalize: + scores /= df.columns.max() - df.columns.min() + return pd.DataFrame(scores, columns=['score'], index=df.index) + + +def maybe_save_figure(save_dir, name, ext='pdf'): + """Maybe save the current matplotlib.pyplot figure.""" + if save_dir: + path = os.path.join(save_dir, f'{name}.{ext}') + with open(path, 'wb') as fout: + plt.savefig(fout, format=ext) + + +def maybe_save_df_to_csv(save_dir, df, path, **to_csv_kwargs): + if save_dir: + path = os.path.join(save_dir, path) + with open(path, 'w') as fout: + df.to_csv(fout, **to_csv_kwargs) + + +def plot_performance_profiles(perf_df, + df_col, + scale='linear', + save_dir=None, + figsize=(30, 10), + font_size=18): + """Plot performance profiles. + + Args: + perf_df: A DataFrame of performance profiles where each row represents a + submission and each column represents rho(tau) for some value of tau + (df.volumns are the different values of tau). + df_col: The column in the original submission results DataFrame used to + compute the performance profile. This argument is only used for axis + and file naming. + scale: Whether or not the data in perf_df is on a linear or log scale. This + argument is only used for axis and file naming. + save_dir: If a valid directory is provided, save both the plot and perf_df + to the provided directory. + figsize: The size of the plot. + font_size: The font size to use for the legend. + + Returns: + None. If a valid save_dir is provided, save both the plot and perf_df. + """ + fig = perf_df.T.plot(figsize=figsize) + df_col_display = f'log10({df_col})' if scale == 'log' else df_col + fig.set_xlabel( + f'Ratio of `{df_col_display}` to best submission', size=font_size) + fig.set_ylabel('Proportion of workloads', size=font_size) + fig.legend(prop={'size': font_size}, bbox_to_anchor=(1.0, 1.0)) + maybe_save_figure(save_dir, f'performance_profile_by_{df_col_display}') + maybe_save_df_to_csv(save_dir, + perf_df, + f'performance_profile_{df_col_display}.csv') From 11e70c0f91915eb8d4f5a9059967941fbcb1820c Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 9 Nov 2023 21:06:56 +0000 Subject: [PATCH 7/8] style fixes --- scoring/performance_profile.py | 3 +-- scoring/score_submission.py | 5 ++--- scoring/scoring_utils.py | 6 ++---- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index 3aa531e26..01241f95d 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -34,7 +34,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -import scoring_utils +from scoring import scoring_utils import algorithmic_efficiency.workloads.workloads as workloads_registry @@ -180,7 +180,6 @@ def get_times_for_submission(submission, logging.warning(f'Expecting {NUM_TRIALS} trials for workload ' f'{workload} but found {num_trials} trials.') validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) - validation_target = validation_target trial_idx, time_idx = get_index_that_reaches_target( group, validation_metric, validation_target) diff --git a/scoring/score_submission.py b/scoring/score_submission.py index f284044a3..0dd84ff55 100644 --- a/scoring/score_submission.py +++ b/scoring/score_submission.py @@ -10,7 +10,6 @@ from tabulate import tabulate from scoring import performance_profile -from scoring.performance_profile import check_if_minimized flags.DEFINE_string( 'experiment_path', @@ -28,7 +27,7 @@ def get_summary_df(workload, workload_df): validation_metric, validation_target = scoring_utils.get_workload_validation_target(workload) - is_minimized = check_if_minimized(validation_metric) + is_minimized = performance_profile.check_if_minimized(validation_metric) target_op = operator.le if is_minimized else operator.ge best_op = min if is_minimized else max idx_op = np.argmin if is_minimized else np.argmax @@ -65,7 +64,7 @@ def main(_): dfs.append(summary_df) df = pd.concat(dfs) - print(tabulate(df, headers='keys', tablefmt='psql')) + logging.info(tabulate(df, headers='keys', tablefmt='psql')) if FLAGS.compute_performance_profiles: performance_profile_df = performance_profile.compute_performance_profiles( diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 45665c011..d10617896 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -140,7 +140,7 @@ def get_trials_df(logfile): def get_experiment_df(experiment_dir): """Gets a df of per trial results from an experiment dir. The output df can be provided as input to - score_profilecompute_performance_profiles. + performance_profile.compute_performance_profiles. Args: experiment_dir: path to experiment directory containing results for workloads. @@ -196,8 +196,7 @@ def get_experiment_df(experiment_dir): ## Get workload properties def get_workload_validation_target(workload): - """Returns workload target metric name and value. - """ + """Returns workload target metric name and value.""" workload_name = re.match(WORKLOAD_NAME_PATTERN, workload).group(1) framework = re.match(WORKLOAD_NAME_PATTERN, workload).group(2) workload_metadata = copy.copy(WORKLOADS[workload_name]) @@ -208,7 +207,6 @@ def get_workload_validation_target(workload): workload_metadata['workload_path'] + f'{framework}', 'workload.py') workload_init_kwargs = {} - print(workload_metadata['workload_path']) workload_obj = workloads_registry.import_workload( workload_path=workload_metadata['workload_path'], workload_class_name=workload_metadata['workload_class_name'], From dca6a26f7c50cccbce661db74626a82dea15d4ff Mon Sep 17 00:00:00 2001 From: Priya Kasimbeg Date: Thu, 9 Nov 2023 21:22:14 +0000 Subject: [PATCH 8/8] import fix --- scoring/performance_profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index 01241f95d..e62e8e18e 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -34,9 +34,9 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from scoring import scoring_utils import algorithmic_efficiency.workloads.workloads as workloads_registry +from scoring import scoring_utils WORKLOADS = workloads_registry.WORKLOADS WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)'