Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions scoring/score_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from absl import logging
import scoring_utils

from algorithmic_efficiency import workloads
import scoring
from scoring import scoring

flags.DEFINE_string(
'experiment_path',
Expand Down
15 changes: 11 additions & 4 deletions scoring/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@
WORKLOADS = workloads_registry.WORKLOADS
WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)'
BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/'
# These global variables have to be set according to the current set of
# workloads and rules for the scoring to be correct.
# We do not use the workload registry since it contains test and development
# workloads as well.
NUM_WORKLOADS = 8
NUM_TRIALS = 5

MIN_EVAL_METRICS = [
'ce_loss',
Expand Down Expand Up @@ -133,9 +139,10 @@ def get_index_that_reaches_target(workload_df,
# Remove trials that never reach the target
target_reached = target_reached[target_reached.apply(np.any)]

# If we have no trials that have reached the target, return -1. Else, return
# the eval index of the earliest point the target is reached.
if target_reached.empty:
# If less than 3 trials reach the target, the submission will be scored as
# missing the target on this workload; return -1. Else, return the eval index
# of the earliest point the target is reached.
if len(target_reached) < 3:
return -1, -1
else:
index_reached = target_reached.apply(np.argmax)
Expand Down Expand Up @@ -287,7 +294,7 @@ def compute_performance_profiles(results,
np.log10(min_tau), np.log10(max_tau), num=num_points, base=10.0)

def rho(r, tau):
return (r <= tau).sum(axis=1) / len(r.columns)
return (r <= tau).sum(axis=1) / NUM_WORKLOADS

perf_df = pd.concat([rho(df, tau) for tau in points], axis=1)

Expand Down
21 changes: 17 additions & 4 deletions scoring/scoring_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import json
import os
import re
import warnings

from absl import logging
import pandas as pd

from scoring.scoring import NUM_TRIALS
from scoring.scoring import NUM_WORKLOADS

TRIAL_LINE_REGEX = '(.*) --- Tuning run (\d+)/(\d+) ---'
METRICS_LINE_REGEX = '(.*) Metrics: ({.*})'
TRIAL_DIR_REGEX = 'trial_(\d+)'
Expand Down Expand Up @@ -103,8 +107,7 @@ def get_trials_df_dict(logfile):
"""
trials_dict = get_trials_dict(logfile)
trials_df_dict = {}
for trial in trials_dict.keys():
metrics = trials_dict[trial]
for trial, metrics in trials_dict.items():
trials_df_dict[trial] = pd.DataFrame(metrics)
return trials_df_dict

Expand Down Expand Up @@ -156,6 +159,10 @@ def get_experiment_df(experiment_dir):
"""
df = pd.DataFrame()
workload_dirs = os.listdir(experiment_dir)
num_workloads = len(workload_dirs)
if num_workloads != NUM_WORKLOADS:
warnings.warn(f'There should be {NUM_WORKLOADS} workloads but there are '
f'{num_workloads}.')
for workload in workload_dirs:
data = {
'workload': workload,
Expand All @@ -164,6 +171,7 @@ def get_experiment_df(experiment_dir):
t for t in os.listdir(os.path.join(experiment_dir, workload))
if re.match(TRIAL_DIR_REGEX, t)
]
workload_df = pd.DataFrame()
for trial in trial_dirs:
eval_measurements_filepath = os.path.join(
experiment_dir,
Expand All @@ -173,13 +181,18 @@ def get_experiment_df(experiment_dir):
)
try:
trial_df = pd.read_csv(eval_measurements_filepath)
except FileNotFoundError as e:
except FileNotFoundError:
logging.info(f'Could not read {eval_measurements_filepath}')
continue
data['trial'] = trial
for column in trial_df.columns:
values = trial_df[column].to_numpy()
data[column] = values
trial_df = pd.DataFrame([data])
df = pd.concat([df, trial_df], ignore_index=True)
workload_df = pd.concat([workload_df, trial_df], ignore_index=True)
num_trials = len(workload_df)
if num_trials != NUM_TRIALS:
warnings.warn(f'There should be {NUM_TRIALS} trials for workload '
f'{workload} but there are only {num_trials}.')
df = pd.concat([df, workload_df], ignore_index=True)
return df
25 changes: 19 additions & 6 deletions scoring/test_scoring_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from absl.testing import absltest
import scoring_utils

TEST_LOGFILE = 'test_data/adamw_fastmri_jax_04-18-2023-13-10-58.log'
TEST_DIR = 'test_data/experiment_dir'
from scoring import scoring_utils
from scoring.scoring import NUM_TRIALS
from scoring.scoring import NUM_WORKLOADS

TEST_LOGFILE = 'scoring/test_data/adamw_fastmri_jax_04-18-2023-13-10-58.log'
TEST_DIR = 'scoring/test_data/experiment_dir'
NUM_EVALS = 18


Expand All @@ -14,8 +17,7 @@ def test_get_trials_dict(self):

def test_get_trials_df_dict(self):
trials_dict = scoring_utils.get_trials_df_dict(TEST_LOGFILE)
for trial in trials_dict:
df = trials_dict[trial]
for df in trials_dict.values():
self.assertEqual(len(df.index), NUM_EVALS)

def test_get_trials_df(self):
Expand All @@ -24,7 +26,18 @@ def test_get_trials_df(self):
self.assertEqual(len(df.at['1', column]), NUM_EVALS)

def test_get_experiment_df(self):
df = scoring_utils.get_experiment_df(TEST_DIR)
_ = scoring_utils.get_experiment_df(TEST_DIR)
self.assertWarnsRegex(
Warning,
f'There should be {NUM_WORKLOADS} workloads but there are 1.',
scoring_utils.get_experiment_df,
TEST_DIR)
self.assertWarnsRegex(
Warning,
f'There should be {NUM_TRIALS} trials for workload mnist_jax but there '
'are only 1.',
scoring_utils.get_experiment_df,
TEST_DIR)


if __name__ == '__main__':
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ install_requires =
absl-py==1.4.0
numpy>=1.23
pandas>=2.0.1
tabulate==0.9.0
tensorflow==2.12.0
tensorflow-datasets==4.9.2
tensorflow-probability==0.20.0
Expand Down