From 45d751294b5582a2545e02c558a7d8418b21c8e5 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 28 Apr 2020 15:46:42 +0900 Subject: [PATCH 1/4] tests: ensure repro --downstream preserves dependency order --- tests/func/test_repro.py | 77 ++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 186c0a8290..ef89f081fc 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -1586,40 +1586,49 @@ def _hide_md5(text): return re.sub(r"\b[a-f0-9]{32}\b", "", text) -class TestReproDownstream(TestDvc): - def test(self): - # The dependency graph should look like this: - # - # E - # / \ - # D F - # / \ \ - # B C G - # \ / - # A - # - assert main(["run", "-o", "A", "echo A>A"]) == 0 - assert main(["run", "-d", "A", "-o", "B", "echo B>B"]) == 0 - assert main(["run", "-d", "A", "-o", "C", "echo C>C"]) == 0 - assert main(["run", "-d", "B", "-d", "C", "-o", "D", "echo D>D"]) == 0 - assert main(["run", "-o", "G", "echo G>G"]) == 0 - assert main(["run", "-d", "G", "-o", "F", "echo F>F"]) == 0 - assert main(["run", "-d", "D", "-d", "F", "-o", "E", "echo E>E"]) == 0 - - # We want the evaluation to move from B to E - # - # E - # / - # D - # / - # B - # - evaluation = self.dvc.reproduce("B.dvc", downstream=True, force=True) - - assert len(evaluation) == 3 - assert evaluation[0].relpath == "B.dvc" - assert evaluation[1].relpath == "D.dvc" - assert evaluation[2].relpath == "E.dvc" +def test_repro_downstream(dvc): + # The dependency graph should look like this: + # + # E + # / \ + # D F + # / \ \ + # B C G + # \ / + # A + # + assert main(["run", "-o", "A", "echo A>A"]) == 0 + assert main(["run", "-d", "A", "-o", "B", "echo B>B"]) == 0 + assert main(["run", "-d", "A", "-o", "C", "echo C>C"]) == 0 + assert main(["run", "-d", "B", "-d", "C", "-o", "D", "echo D>D"]) == 0 + assert main(["run", "-o", "G", "echo G>G"]) == 0 + assert main(["run", "-d", "G", "-o", "F", "echo F>F"]) == 0 + assert main(["run", "-d", "D", "-d", "F", "-o", "E", "echo E>E"]) == 0 + + # We want the evaluation to move from B to E + # + # E + # / + # D + # / + # B + # + evaluation = dvc.reproduce("B.dvc", downstream=True, force=True) + + assert len(evaluation) == 3 + assert evaluation[0].relpath == "B.dvc" + assert evaluation[1].relpath == "D.dvc" + assert evaluation[2].relpath == "E.dvc" + + # B, C should be run (in any order) before D + # See https://github.com/iterative/dvc/issues/3602 + evaluation = dvc.reproduce("A.dvc", downstream=True, force=True) + + assert len(evaluation) == 5 + assert evaluation[0].relpath == "A.dvc" + assert {evaluation[1].relpath, evaluation[2].relpath} == {"B.dvc", "C.dvc"} + assert evaluation[3].relpath == "D.dvc" + assert evaluation[4].relpath == "E.dvc" @pytest.mark.skipif( From 7350952020d6d6f6ae59a4795ba5ea23cdb2e1a7 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 28 Apr 2020 15:49:18 +0900 Subject: [PATCH 2/4] repo: use reverse post-order DFS in repro --downstream - Fixes #3602 --- dvc/repo/reproduce.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index e77934e89e..288bdd39aa 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -126,7 +126,8 @@ def _reproduce_stages( is to derive the evaluation starting from the given stage up to the ancestors. However, the `networkx.ancestors` returns a set, without any guarantee of any order, so we are going to reverse the graph and - use a pre-ordered search using the given stage as a starting point. + use a reverse post-ordered search using the given stage as a starting + point. E A / \ / \ @@ -154,9 +155,10 @@ def _reproduce_stages( # itself, and then reverse it, instead of using # graph.reverse() directly because it calls `deepcopy` # underneath -- unless copy=False is specified. - all_pipelines += nx.dfs_preorder_nodes( + nodes = nx.dfs_postorder_nodes( G.copy().reverse(copy=False), stage ) + all_pipelines += reversed(list(nodes)) else: all_pipelines += nx.dfs_postorder_nodes(G, stage) From 7fa7bc451fd3f3951fea9922274cd935c3975970 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 28 Apr 2020 16:21:35 +0900 Subject: [PATCH 3/4] make test_repro and test_repro_multistage consistent --- tests/func/test_repro.py | 2 +- tests/func/test_repro_multistage.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index ef89f081fc..ce13ddd9fc 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -1586,7 +1586,7 @@ def _hide_md5(text): return re.sub(r"\b[a-f0-9]{32}\b", "", text) -def test_repro_downstream(dvc): +def test_downstream(dvc): # The dependency graph should look like this: # # E diff --git a/tests/func/test_repro_multistage.py b/tests/func/test_repro_multistage.py index 08d059326f..08e42b7989 100644 --- a/tests/func/test_repro_multistage.py +++ b/tests/func/test_repro_multistage.py @@ -259,6 +259,34 @@ def test_downstream(tmp_dir, dvc): and evaluation[2].relpath == "E.dvc" ) + # B, C should be run (in any order) before D + # See https://github.com/iterative/dvc/issues/3602 + evaluation = dvc.reproduce("Dvcfile:A-gen", downstream=True, force=True) + + assert len(evaluation) == 5 + assert ( + isinstance(evaluation[0], PipelineStage) + and evaluation[0].relpath == "Dvcfile" + and evaluation[0].name == "A-gen" + ) + names = set() + for stage in evaluation[1:3]: + if isinstance(stage, PipelineStage): + assert stage.relpath == "Dvcfile" + names.add(stage.name) + else: + names.add(stage.relpath) + assert names == {"B-gen", "C.dvc"} + assert ( + isinstance(evaluation[3], PipelineStage) + and evaluation[3].relpath == "Dvcfile" + and evaluation[3].name == "D-gen" + ) + assert ( + not isinstance(evaluation[4], PipelineStage) + and evaluation[4].relpath == "E.dvc" + ) + def test_repro_when_cmd_changes(tmp_dir, dvc, run_copy): from dvc.dvcfile import PipelineFile From 71f68bd6a9052f6a839215ce9db0d6d1dc4f5734 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 28 Apr 2020 21:22:46 +0900 Subject: [PATCH 4/4] rebase master, update multistage test --- tests/func/test_repro_multistage.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/func/test_repro_multistage.py b/tests/func/test_repro_multistage.py index 08e42b7989..529f1f6589 100644 --- a/tests/func/test_repro_multistage.py +++ b/tests/func/test_repro_multistage.py @@ -261,25 +261,27 @@ def test_downstream(tmp_dir, dvc): # B, C should be run (in any order) before D # See https://github.com/iterative/dvc/issues/3602 - evaluation = dvc.reproduce("Dvcfile:A-gen", downstream=True, force=True) + evaluation = dvc.reproduce( + PIPELINE_FILE + ":A-gen", downstream=True, force=True + ) assert len(evaluation) == 5 assert ( isinstance(evaluation[0], PipelineStage) - and evaluation[0].relpath == "Dvcfile" + and evaluation[0].relpath == PIPELINE_FILE and evaluation[0].name == "A-gen" ) names = set() for stage in evaluation[1:3]: if isinstance(stage, PipelineStage): - assert stage.relpath == "Dvcfile" + assert stage.relpath == PIPELINE_FILE names.add(stage.name) else: names.add(stage.relpath) assert names == {"B-gen", "C.dvc"} assert ( isinstance(evaluation[3], PipelineStage) - and evaluation[3].relpath == "Dvcfile" + and evaluation[3].relpath == PIPELINE_FILE and evaluation[3].name == "D-gen" ) assert (