-
Notifications
You must be signed in to change notification settings - Fork 62
/
base_experiment_data.py
165 lines (137 loc) · 5.19 KB
/
base_experiment_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
#
"""Base interface for accessing the stored benchmark experiment data."""
from abc import ABCMeta, abstractmethod
from distutils.util import strtobool # pylint: disable=deprecated-module
from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple
import pandas
from mlos_bench.storage.base_tunable_config_data import TunableConfigData
if TYPE_CHECKING:
from mlos_bench.storage.base_trial_data import TrialData
from mlos_bench.storage.base_tunable_config_trial_group_data import (
TunableConfigTrialGroupData,
)
class ExperimentData(metaclass=ABCMeta):
"""
Base interface for accessing the stored experiment benchmark data.
An experiment groups together a set of trials that are run with a given set of
scripts and mlos_bench configuration files.
"""
RESULT_COLUMN_PREFIX = "result."
CONFIG_COLUMN_PREFIX = "config."
def __init__( # pylint: disable=too-many-arguments
self,
*,
experiment_id: str,
description: str,
root_env_config: str,
git_repo: str,
git_commit: str,
):
self._experiment_id = experiment_id
self._description = description
self._root_env_config = root_env_config
self._git_repo = git_repo
self._git_commit = git_commit
@property
def experiment_id(self) -> str:
"""ID of the experiment."""
return self._experiment_id
@property
def description(self) -> str:
"""Description of the experiment."""
return self._description
@property
def root_env_config(self) -> Tuple[str, str, str]:
"""
Root environment configuration.
Returns
-------
root_env_config : Tuple[str, str, str]
A tuple of (root_env_config, git_repo, git_commit) for the root environment.
"""
return (self._root_env_config, self._git_repo, self._git_commit)
def __repr__(self) -> str:
return f"Experiment :: {self._experiment_id}: '{self._description}'"
@property
@abstractmethod
def objectives(self) -> Dict[str, Literal["min", "max"]]:
"""
Retrieve the experiment's objectives data from the storage.
Returns
-------
objectives : Dict[str, objective]
A dictionary of the experiment's objective names (optimization_targets)
and their directions (e.g., min or max).
"""
@property
@abstractmethod
def trials(self) -> Dict[int, "TrialData"]:
"""
Retrieve the experiment's trials' data from the storage.
Returns
-------
trials : Dict[int, TrialData]
A dictionary of the trials' data, keyed by trial id.
"""
@property
@abstractmethod
def tunable_configs(self) -> Dict[int, TunableConfigData]:
"""
Retrieve the experiment's (tunable) configs' data from the storage.
Returns
-------
trials : Dict[int, TunableConfigData]
A dictionary of the configs' data, keyed by (tunable) config id.
"""
@property
@abstractmethod
def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]:
"""
Retrieve the Experiment's (Tunable) Config Trial Group data from the storage.
Returns
-------
trials : Dict[int, TunableConfigTrialGroupData]
A dictionary of the trials' data, keyed by (tunable) by config id.
"""
@property
def default_tunable_config_id(self) -> Optional[int]:
"""
Retrieves the (tunable) config id for the default tunable values for this
experiment.
Note: this is by *default* the first trial executed for this experiment.
However, it is currently possible that the user changed the tunables config
in between resumptions of an experiment.
Returns
-------
int
"""
# Note: this implementation is quite inefficient and may be better
# reimplemented by subclasses.
# Check to see if we included it in trial metadata.
trials_items = sorted(self.trials.items())
if not trials_items:
return None
for _trial_id, trial in trials_items:
# Take the first config id marked as "defaults" when it was instantiated.
if strtobool(str(trial.metadata_dict.get("is_defaults", False))):
return trial.tunable_config_id
# Fallback (min trial_id)
return trials_items[0][1].tunable_config_id
@property
@abstractmethod
def results_df(self) -> pandas.DataFrame:
"""
Retrieve all experimental results as a single DataFrame.
Returns
-------
results : pandas.DataFrame
A DataFrame with configurations and results from all trials of the experiment.
Has columns
[trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status]
followed by tunable config parameters (prefixed with "config.") and
trial results (prefixed with "result."). The latter can be NULLs if the
trial was not successful.
"""