Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 35 additions & 27 deletions dvc/commands/experiments/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def _collect_names(all_experiments, **kwargs):
def _collect_rows(
base_rev,
experiments,
all_headers,
metric_headers,
param_headers,
Comment on lines +68 to +70
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to just use TabularData.row_from_dict() here? The parameters are getting complicated here.

Copy link
Contributor Author

@daavoo daavoo Feb 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried adding a change using row_from_dict but it only saves us from passing all_headers.

Unfortunately, we still need metric_headers and param_headers later in this function because of the bug being fixed.

The overall handling of metrics and params could be simplified but feels out of the scope of this fix.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Thanks.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should simplify these parameters soon though, as it is getting complicated as exp show is becoming complex.
May get more confusing in the future. Anyway, thanks for looking into it. πŸ™‚

metric_names,
param_names,
deps_names,
Expand All @@ -87,24 +90,26 @@ def _collect_rows(

new_checkpoint = True
for i, (rev, results) in enumerate(experiments.items()):
fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value
row_dict = {k: fill_value for k in all_headers}

exp = results.get("data", {})

if exp.get("running"):
state = "Running"
elif exp.get("queued"):
state = "Queued"
else:
state = fill_value
executor = exp.get("executor", fill_value)

is_baseline = rev == "baseline"

if is_baseline:
name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev
else:
name_rev = rev[:7]

exp_name = exp.get("name", "")
tip = exp.get("checkpoint_tip")

parent_rev = exp.get("checkpoint_parent", "")
parent_exp = experiments.get(parent_rev, {}).get("data", {})
parent_tip = parent_exp.get("checkpoint_tip")
Expand All @@ -130,26 +135,28 @@ def _collect_rows(
if not is_baseline:
new_checkpoint = not (tip and tip == parent_tip)

row = [
exp_name,
name_rev,
typ,
_format_time(exp.get("timestamp"), fill_value, iso),
parent,
state,
executor,
]
fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value
row_dict["Experiment"] = exp.get("name", "")
row_dict["rev"] = name_rev
row_dict["typ"] = typ
row_dict["Created"] = _format_time(
exp.get("timestamp"), fill_value, iso
)
row_dict["parent"] = parent
row_dict["State"] = state
row_dict["Executor"] = exp.get("executor", fill_value)
Comment on lines +138 to +146
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now this requires us to keep headers in sync in two places. (we did that before by forcing an order anyway though).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true but, as you mentioned, the sync was required before. By using dict keys, at least the sync is explicit (and prevents bug being fixed)

Comment on lines +138 to +146
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a nitpicky suggestion, what do you think of using row_dict.update() here? I'm okay with how it is as well though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean like the following?

Suggested change
row_dict["Experiment"] = exp.get("name", "")
row_dict["rev"] = name_rev
row_dict["typ"] = typ
row_dict["Created"] = _format_time(
exp.get("timestamp"), fill_value, iso
)
row_dict["parent"] = parent
row_dict["State"] = state
row_dict["Executor"] = exp.get("executor", fill_value)
row_dict.update(
Experiment=exp.get("name", ""),
rev=name_rev,
typ=typ,
Created=_format_time(
exp.get("timestamp"), fill_value, iso
),
parent=parent,
State=state,
Executor=exp.get("executor", fill_value)
)

No strong preference, is update more efficient or is there another reason?

Copy link
Collaborator

@skshetry skshetry Feb 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@daavoo, I meant it more like following, to use dict arg than as kwargs:

row_dict.update({
    "Experiment": exp.get("name", ""),
    "rev": name_rev,
    "typ": typ,
    "Created": _format_time(exp.get("timestamp"), fill_value, iso),
    "parent": parent,
    "State": state,
    "Executor": exp.get("executor", fill_value),
    ...
})

It's a matter of personal preference, I guess. As I said, I am fine as-is too.


_extend_row(
row,
row_dict,
metric_names,
metric_headers,
exp.get("metrics", {}).items(),
precision,
fill_value=fill_value,
)
_extend_row(
row,
row_dict,
param_names,
param_headers,
exp.get("params", {}).items(),
precision,
fill_value=fill_value,
Expand All @@ -158,8 +165,8 @@ def _collect_rows(
hash_info = exp.get("deps", {}).get(dep, {}).get("hash")
if hash_info is not None:
hash_info = hash_info[:7]
row.append(hash_info or fill_value)
yield row
row_dict[dep] = hash_info
yield list(row_dict.values())


def _sort_column(sort_by, metric_names, param_names):
Expand Down Expand Up @@ -225,13 +232,9 @@ def _format_time(datetime_obj, fill_value=FILL_VALUE, iso=False):
return datetime_obj.strftime(fmt)


def _extend_row(row, names, items, precision, fill_value=FILL_VALUE):
def _extend_row(row, names, headers, items, precision, fill_value=FILL_VALUE):
from dvc.compare import _format_field, with_value

if not items:
row.extend(fill_value for keys in names.values() for _ in keys)
return

for fname, data in items:
item = data.get("data", {})
item = flatten(item) if isinstance(item, dict) else {fname: item}
Expand All @@ -243,7 +246,11 @@ def _extend_row(row, names, items, precision, fill_value=FILL_VALUE):
# wrap field data in ui.rich_text, otherwise rich may
# interpret unescaped braces from list/dict types as rich
# markup tags
row.append(ui.rich_text(str(_format_field(value, precision))))
value = ui.rich_text(str(_format_field(value, precision)))
if name in headers:
row[name] = value
else:
row[f"{fname}:{name}"] = value


def experiments_table(
Expand All @@ -264,14 +271,15 @@ def experiments_table(

from dvc.compare import TabularData

td = TabularData(
lconcat(headers, metric_headers, param_headers, deps_names),
fill_value=fill_value,
)
all_headers = lconcat(headers, metric_headers, param_headers, deps_names)
td = TabularData(all_headers, fill_value=fill_value)
for base_rev, experiments in all_experiments.items():
rows = _collect_rows(
base_rev,
experiments,
all_headers,
metric_headers,
param_headers,
metric_names,
param_names,
deps_names,
Expand Down
79 changes: 70 additions & 9 deletions tests/func/experiments/test_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,34 @@ def test_show_outs(tmp_dir, dvc, scm):
deps=["copy.py"],
outs=["out"],
)

scm.commit("init")

outs = dvc.experiments.show()["workspace"]["baseline"]["data"]["outs"]
assert outs == {
"out": {
"hash": ANY,
"size": ANY,
"nfiles": None,
}
}


def test_metrics_renaming(tmp_dir, dvc, scm, capsys):
tmp_dir.gen("copy.py", COPY_SCRIPT)
params_file = tmp_dir / "params.yaml"
params_data = {
"foo": 1,
}
(tmp_dir / params_file).dump(params_data)

dvc.run(
cmd="python copy.py params.yaml metrics.yaml",
metrics_no_cache=["metrics.yaml"],
params=["foo"],
name="copy-file",
deps=["copy.py"],
)
scm.add(
[
"dvc.yaml",
Expand All @@ -650,16 +678,49 @@ def test_show_outs(tmp_dir, dvc, scm):
".gitignore",
]
)
scm.commit("init")

outs = dvc.experiments.show()["workspace"]["baseline"]["data"]["outs"]
assert outs == {
"out": {
"hash": ANY,
"size": ANY,
"nfiles": None,
}
}
scm.commit("metrics.yaml")
metrics_rev = scm.get_rev()

dvc.run(
cmd="python copy.py params.yaml scores.yaml",
metrics_no_cache=["scores.yaml"],
params=["foo"],
name="copy-file",
deps=["copy.py"],
)
scm.add(
[
"dvc.yaml",
"dvc.lock",
"params.yaml",
"scores.yaml",
]
)
scm.commit("scores.yaml")
scores_rev = scm.get_rev()

capsys.readouterr()
assert main(["exp", "show", "--csv", "-A"]) == 0
cap = capsys.readouterr()

def _get_rev_isotimestamp(rev):
return datetime.fromtimestamp(
scm.gitpython.repo.rev_parse(rev).committed_date
).isoformat()

assert (
"master,{},baseline,{},,1,,1".format(
scores_rev[:7], _get_rev_isotimestamp(scores_rev)
)
in cap.out
)
assert (
",{},baseline,{},,,1,1".format(
metrics_rev[:7], _get_rev_isotimestamp(metrics_rev)
)
in cap.out
)


def test_show_sorted_deps(tmp_dir, dvc, scm, capsys):
Expand Down