Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dvc/repo/reproduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def _reproduce_stages(
is to derive the evaluation starting from the given stage up to the
ancestors. However, the `networkx.ancestors` returns a set, without
any guarantee of any order, so we are going to reverse the graph and
use a pre-ordered search using the given stage as a starting point.
use a reverse post-ordered search using the given stage as a starting
point.

E A
/ \ / \
Expand Down Expand Up @@ -154,9 +155,10 @@ def _reproduce_stages(
# itself, and then reverse it, instead of using
# graph.reverse() directly because it calls `deepcopy`
# underneath -- unless copy=False is specified.
all_pipelines += nx.dfs_preorder_nodes(
nodes = nx.dfs_postorder_nodes(
G.copy().reverse(copy=False), stage
)
all_pipelines += reversed(list(nodes))
else:
all_pipelines += nx.dfs_postorder_nodes(G, stage)

Expand Down
77 changes: 43 additions & 34 deletions tests/func/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -1586,40 +1586,49 @@ def _hide_md5(text):
return re.sub(r"\b[a-f0-9]{32}\b", "<md5>", text)


class TestReproDownstream(TestDvc):
def test(self):
# The dependency graph should look like this:
#
# E
# / \
# D F
# / \ \
# B C G
# \ /
# A
#
assert main(["run", "-o", "A", "echo A>A"]) == 0
assert main(["run", "-d", "A", "-o", "B", "echo B>B"]) == 0
assert main(["run", "-d", "A", "-o", "C", "echo C>C"]) == 0
assert main(["run", "-d", "B", "-d", "C", "-o", "D", "echo D>D"]) == 0
assert main(["run", "-o", "G", "echo G>G"]) == 0
assert main(["run", "-d", "G", "-o", "F", "echo F>F"]) == 0
assert main(["run", "-d", "D", "-d", "F", "-o", "E", "echo E>E"]) == 0

# We want the evaluation to move from B to E
#
# E
# /
# D
# /
# B
#
evaluation = self.dvc.reproduce("B.dvc", downstream=True, force=True)

assert len(evaluation) == 3
assert evaluation[0].relpath == "B.dvc"
assert evaluation[1].relpath == "D.dvc"
assert evaluation[2].relpath == "E.dvc"
def test_downstream(dvc):
# The dependency graph should look like this:
#
# E
# / \
# D F
# / \ \
# B C G
# \ /
# A
#
assert main(["run", "-o", "A", "echo A>A"]) == 0
assert main(["run", "-d", "A", "-o", "B", "echo B>B"]) == 0
assert main(["run", "-d", "A", "-o", "C", "echo C>C"]) == 0
assert main(["run", "-d", "B", "-d", "C", "-o", "D", "echo D>D"]) == 0
assert main(["run", "-o", "G", "echo G>G"]) == 0
assert main(["run", "-d", "G", "-o", "F", "echo F>F"]) == 0
assert main(["run", "-d", "D", "-d", "F", "-o", "E", "echo E>E"]) == 0

# We want the evaluation to move from B to E
#
# E
# /
# D
# /
# B
#
evaluation = dvc.reproduce("B.dvc", downstream=True, force=True)

assert len(evaluation) == 3
assert evaluation[0].relpath == "B.dvc"
assert evaluation[1].relpath == "D.dvc"
assert evaluation[2].relpath == "E.dvc"

# B, C should be run (in any order) before D
# See https://github.com/iterative/dvc/issues/3602
evaluation = dvc.reproduce("A.dvc", downstream=True, force=True)

assert len(evaluation) == 5
assert evaluation[0].relpath == "A.dvc"
assert {evaluation[1].relpath, evaluation[2].relpath} == {"B.dvc", "C.dvc"}
assert evaluation[3].relpath == "D.dvc"
assert evaluation[4].relpath == "E.dvc"


@pytest.mark.skipif(
Expand Down
30 changes: 30 additions & 0 deletions tests/func/test_repro_multistage.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,36 @@ def test_downstream(tmp_dir, dvc):
and evaluation[2].relpath == "E.dvc"
)

# B, C should be run (in any order) before D
# See https://github.com/iterative/dvc/issues/3602
evaluation = dvc.reproduce(
PIPELINE_FILE + ":A-gen", downstream=True, force=True
)

assert len(evaluation) == 5
assert (
isinstance(evaluation[0], PipelineStage)
and evaluation[0].relpath == PIPELINE_FILE
and evaluation[0].name == "A-gen"
)
names = set()
for stage in evaluation[1:3]:
if isinstance(stage, PipelineStage):
assert stage.relpath == PIPELINE_FILE
names.add(stage.name)
else:
names.add(stage.relpath)
assert names == {"B-gen", "C.dvc"}
assert (
isinstance(evaluation[3], PipelineStage)
and evaluation[3].relpath == PIPELINE_FILE
and evaluation[3].name == "D-gen"
)
assert (
not isinstance(evaluation[4], PipelineStage)
and evaluation[4].relpath == "E.dvc"
)


def test_repro_when_cmd_changes(tmp_dir, dvc, run_copy):
from dvc.dvcfile import PipelineFile
Expand Down