-
Notifications
You must be signed in to change notification settings - Fork 1.3k
dump: deterministic lockfile dump #3711
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,182 @@ | ||
| import os | ||
| from collections import OrderedDict | ||
| from operator import itemgetter | ||
| from textwrap import dedent | ||
|
|
||
| import pytest | ||
| import yaml | ||
| from dvc.dvcfile import PIPELINE_LOCK | ||
| from dvc.serialize import get_params_deps | ||
| from dvc.utils.fs import remove | ||
| from dvc.utils.stage import parse_stage_for_update | ||
|
|
||
| from tests.func.test_run_multistage import supported_params | ||
|
|
||
|
|
||
| FS_STRUCTURE = { | ||
| "foo": "bar\nfoobar", | ||
| "bar": "foo\nfoobar", | ||
| "foobar": "foobar\nbar", | ||
| "params.yaml": yaml.dump(supported_params), | ||
| "params2.yaml": yaml.dump(supported_params), | ||
| } | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def run_head(tmp_dir, dvc): | ||
| """Output first line of each file to different file with '-1' appended.""" | ||
| tmp_dir.gen( | ||
| "head.py", | ||
| dedent( | ||
| """ | ||
| import sys | ||
| for file in sys.argv[1:]: | ||
| with open(file) as f, open(file +"-1","w+") as w: | ||
| w.write(f.readline()) | ||
| """ | ||
| ), | ||
| ) | ||
|
|
||
| def run(*args, **run_kwargs): | ||
| return dvc.run( | ||
| cmd="python head.py {}".format(" ".join(args)), | ||
| outs=[dep + "-1" for dep in args], | ||
| deps=args, | ||
| **run_kwargs | ||
| ) | ||
|
|
||
| return run | ||
|
|
||
|
|
||
| def read_lock_file(file=PIPELINE_LOCK): | ||
| with open(file) as f: | ||
| data = parse_stage_for_update(f.read(), file) | ||
| assert isinstance(data, OrderedDict) | ||
| return data | ||
|
|
||
|
|
||
| def assert_eq_lockfile(previous, new): | ||
| for content in (previous, new): | ||
| assert isinstance(content, OrderedDict) | ||
|
|
||
| # if they both are OrderedDict, then `==` will also check for order | ||
| assert previous == new | ||
|
|
||
|
|
||
| def test_deps_outs_are_sorted_by_path(tmp_dir, dvc, run_head): | ||
| tmp_dir.gen(FS_STRUCTURE) | ||
| deps = ["foo", "bar", "foobar"] | ||
| run_head(*deps, name="copy-first-line") | ||
|
|
||
| initial_content = read_lock_file() | ||
| lock = initial_content["copy-first-line"] | ||
|
|
||
| # lock stage key order: | ||
| assert list(lock.keys()) == ["cmd", "deps", "outs"] | ||
|
|
||
| # `path` key appear first and then the `md5` | ||
| assert all(list(dep.keys()) == ["path", "md5"] for dep in lock["deps"]) | ||
| assert all(list(out.keys()) == ["path", "md5"] for out in lock["outs"]) | ||
|
|
||
| # deps are always sorted by the file path naming | ||
| assert list(map(itemgetter("path"), lock["deps"])) == sorted(deps) | ||
|
|
||
| # outs are too | ||
| assert list( | ||
| map(itemgetter("path"), initial_content["copy-first-line"]["outs"]) | ||
| ) == [d + "-1" for d in sorted(deps)] | ||
|
|
||
|
|
||
| def test_order_is_preserved_when_pipeline_order_changes( | ||
| tmp_dir, dvc, run_head | ||
| ): | ||
| tmp_dir.gen(FS_STRUCTURE) | ||
| deps = ["foo", "bar", "foobar"] | ||
| stage = run_head(*deps, name="copy-first-line") | ||
|
|
||
| initial_content = read_lock_file() | ||
| # reverse order of stage.outs and dump to the pipeline file | ||
| # then, again change stage.deps and dump to the pipeline file | ||
| reversal = stage.outs.reverse, stage.deps.reverse | ||
| for reverse_items in reversal: | ||
| reverse_items() | ||
| stage.dvcfile._dump_pipeline_file(stage) | ||
|
|
||
| # we only changed the order, should not reproduce | ||
| assert not dvc.reproduce(stage.addressing) | ||
|
|
||
| new_lock_content = read_lock_file() | ||
| assert_eq_lockfile(new_lock_content, initial_content) | ||
|
|
||
| (tmp_dir / PIPELINE_LOCK).unlink() | ||
| assert dvc.reproduce(stage.addressing) == [stage] | ||
| new_lock_content = read_lock_file() | ||
| assert_eq_lockfile(new_lock_content, initial_content) | ||
|
|
||
|
|
||
| def test_cmd_changes_other_orders_are_preserved(tmp_dir, dvc, run_head): | ||
| tmp_dir.gen(FS_STRUCTURE) | ||
| deps = ["foo", "bar", "foobar"] | ||
| stage = run_head(*deps, name="copy-first-line") | ||
|
|
||
| initial_content = read_lock_file() | ||
| # let's change cmd in pipeline file | ||
| # it should only change "cmd", otherwise it should be | ||
| # structurally same as cmd | ||
| stage.cmd = " ".join(stage.cmd.split()) | ||
| stage.dvcfile._dump_pipeline_file(stage) | ||
|
|
||
| initial_content["copy-first-line"]["cmd"] = stage.cmd | ||
|
|
||
| assert dvc.reproduce(stage.addressing) == [stage] | ||
|
|
||
| new_lock_content = read_lock_file() | ||
| assert_eq_lockfile(new_lock_content, initial_content) | ||
|
|
||
|
|
||
| def test_params_dump(tmp_dir, dvc, run_head): | ||
| tmp_dir.gen(FS_STRUCTURE) | ||
|
|
||
| stage = run_head( | ||
| "foo", | ||
| "bar", | ||
| "foobar", | ||
| name="copy-first-line", | ||
| params=[ | ||
| "params2.yaml:answer,lists,name", | ||
| "params.yaml:lists,floats,nested.nested1,nested.nested1.nested2", | ||
| ], | ||
| ) | ||
|
|
||
| initial_content = read_lock_file() | ||
| lock = initial_content["copy-first-line"] | ||
|
|
||
| # lock stage key order: | ||
| assert list(lock.keys()) == ["cmd", "deps", "params", "outs"] | ||
| assert list(lock["params"].keys()) == ["params.yaml", "params2.yaml"] | ||
|
|
||
| # # params keys are always sorted by the name | ||
| assert list(lock["params"]["params.yaml"].keys()) == [ | ||
| "floats", | ||
| "lists", | ||
| "nested.nested1", | ||
| "nested.nested1.nested2", | ||
| ] | ||
| assert list(lock["params"]["params2.yaml"]) == ["answer", "lists", "name"] | ||
|
|
||
| assert not dvc.reproduce(stage.addressing) | ||
|
|
||
| # let's change the order of params and dump them in pipeline file | ||
| params, _ = get_params_deps(stage) | ||
| for param in params: | ||
| param.params.reverse() | ||
|
|
||
| stage.dvcfile._dump_pipeline_file(stage) | ||
| assert not dvc.reproduce(stage.addressing) | ||
|
|
||
| (tmp_dir / PIPELINE_LOCK).unlink() | ||
| # XXX: temporary workaround due to lack of params support in build cache | ||
| remove(os.path.join(dvc.cache.local.cache_dir, "stages")) | ||
|
|
||
| assert dvc.reproduce(stage.addressing) == [stage] | ||
| assert_eq_lockfile(initial_content, read_lock_file()) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.