From e93504668d29b638e8cb6fd1e673668adc1d1d54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 8 Jan 2020 14:16:32 +0100 Subject: [PATCH 1/4] repro: tests: migrate to dir helpers --- tests/func/test_repro.py | 199 +++++++++++++++++---------------------- 1 file changed, 88 insertions(+), 111 deletions(-) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index e7ea6e7356..51bed1721c 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -2,7 +2,6 @@ import getpass import os import posixpath -import pathlib import re import shutil import uuid @@ -17,6 +16,7 @@ from google.cloud import storage as gc from mock import patch +from dvc.compat import fspath from dvc.exceptions import CyclicGraphError from dvc.exceptions import ReproductionError from dvc.exceptions import StagePathAsOutputError @@ -1372,83 +1372,83 @@ def test(self): @pytest.fixture -def repro_dir(dvc_repo, repo_dir): - repo_dir.dname = "dir" - os.mkdir(repo_dir.dname) - repo_dir.emptydname = "emptydir" - os.mkdir(repo_dir.emptydname) - subdname = os.path.join(repo_dir.dname, "subdir") - os.mkdir(subdname) - - repo_dir.source = "source" - repo_dir.source_stage = repo_dir.source + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.source_stage, - outs=[repo_dir.source], - deps=[repo_dir.FOO], +def repro_dir(tmp_dir, dvc, run_copy): + tmp_dir.gen( + { + "foo": "foo content", + "bar": "bar content", + "data_dir": {"dir_file": "dir file content"}, + } + ) + (tmp_dir / "dir").mkdir() + subdname = os.path.join("dir", "subdir") + (tmp_dir / "dir" / "subdir").mkdir() + + tmp_dir.source = "source" + tmp_dir.source_stage = tmp_dir.source + ".dvc" + stage = dvc.run( + fname=tmp_dir.source_stage, outs=[tmp_dir.source], deps=["foo"] ) assert stage is not None - assert filecmp.cmp(repo_dir.source, repo_dir.FOO, shallow=False) - - repo_dir.unrelated1 = "unrelated1" - repo_dir.unrelated1_stage = repo_dir.unrelated1 + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.unrelated1_stage, - outs=[repo_dir.unrelated1], - deps=[repo_dir.source], + assert filecmp.cmp(tmp_dir.source, "foo", shallow=False) + + tmp_dir.unrelated1 = "unrelated1" + tmp_dir.unrelated1_stage = tmp_dir.unrelated1 + ".dvc" + stage = dvc.run( + fname=tmp_dir.unrelated1_stage, + outs=[tmp_dir.unrelated1], + deps=[tmp_dir.source], ) assert stage is not None - repo_dir.unrelated2 = "unrelated2" - repo_dir.unrelated2_stage = repo_dir.unrelated2 + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.unrelated2_stage, - outs=[repo_dir.unrelated2], - deps=[repo_dir.DATA], + tmp_dir.unrelated2 = "unrelated2" + tmp_dir.unrelated2_stage = tmp_dir.unrelated2 + ".dvc" + stage = dvc.run( + fname=tmp_dir.unrelated2_stage, + outs=[tmp_dir.unrelated2], + deps=[fspath(tmp_dir / "data_dir" / "dir_file")], ) assert stage is not None - repo_dir.first = os.path.join(repo_dir.dname, "first") - repo_dir.first_stage = repo_dir.first + ".dvc" + tmp_dir.first = os.path.join("dir", "first") + tmp_dir.first_stage = tmp_dir.first + ".dvc" + + stage = run_copy(tmp_dir.source, tmp_dir.first, fname=tmp_dir.first_stage) - stage = dvc_repo.run( - fname=repo_dir.first_stage, - deps=[repo_dir.source], - outs=[repo_dir.first], - cmd="python {} {} {}".format( - repo_dir.CODE, repo_dir.source, repo_dir.first - ), - ) assert stage is not None - assert filecmp.cmp(repo_dir.first, repo_dir.FOO, shallow=False) - - repo_dir.second = os.path.join(subdname, "second") - repo_dir.second_stage = repo_dir.second + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.second_stage, - outs=[repo_dir.second], - deps=[repo_dir.DATA], + assert filecmp.cmp(tmp_dir.first, "foo", shallow=False) + + tmp_dir.second = os.path.join(subdname, "second") + tmp_dir.second_stage = tmp_dir.second + ".dvc" + stage = dvc.run( + fname=tmp_dir.second_stage, + outs=[tmp_dir.second], + deps=[fspath(tmp_dir / "data_dir" / "dir_file")], ) assert stage is not None - assert filecmp.cmp(repo_dir.second, repo_dir.DATA, shallow=False) + assert filecmp.cmp( + tmp_dir.second, + fspath(tmp_dir / "data_dir" / "dir_file"), + shallow=False, + ) - repo_dir.third_stage = os.path.join(repo_dir.dname, "Dvcfile") - stage = dvc_repo.run( - fname=repo_dir.third_stage, deps=[repo_dir.first, repo_dir.second] + tmp_dir.third_stage = os.path.join("dir", "Dvcfile") + stage = dvc.run( + fname=tmp_dir.third_stage, deps=[tmp_dir.first, tmp_dir.second] ) assert stage is not None - yield repo_dir + yield tmp_dir -def test_recursive_repro_default(dvc_repo, repro_dir): +def test_recursive_repro_default(dvc, repro_dir): """ Test recursive repro on dir after a dep outside this dir has changed. """ - os.unlink(repro_dir.FOO) - shutil.copyfile(repro_dir.BAR, repro_dir.FOO) + os.unlink("foo") + shutil.copyfile("bar", "foo") - stages = dvc_repo.reproduce(repro_dir.dname, recursive=True) + stages = dvc.reproduce("dir", recursive=True) # Check that the dependency ("source") and the dependent stages # inside the folder have been reproduced ("first", "third") assert len(stages) == 3 @@ -1456,24 +1456,23 @@ def test_recursive_repro_default(dvc_repo, repro_dir): assert repro_dir.source_stage in names assert repro_dir.first_stage in names assert repro_dir.third_stage in names - assert filecmp.cmp(repro_dir.source, repro_dir.BAR, shallow=False) - assert filecmp.cmp(repro_dir.first, repro_dir.BAR, shallow=False) + assert filecmp.cmp(repro_dir.source, "bar", shallow=False) + assert filecmp.cmp(repro_dir.first, "bar", shallow=False) -def test_recursive_repro_single(dvc_repo, repro_dir): +def test_recursive_repro_single(dvc, repro_dir): """ Test recursive single-item repro on dir after a dep outside this dir has changed. """ - os.unlink(repro_dir.FOO) - shutil.copyfile(repro_dir.BAR, repro_dir.FOO) + os.unlink("foo") + shutil.copyfile("bar", "foo") - os.unlink(repro_dir.DATA) - shutil.copyfile(repro_dir.BAR, repro_dir.DATA) + # os.unlink(repro_dir.DATA) + (repro_dir / "data_dir" / "dir_file").unlink() + shutil.copyfile("bar", fspath(repro_dir / "data_dir" / "dir_file")) - stages = dvc_repo.reproduce( - repro_dir.dname, recursive=True, single_item=True - ) + stages = dvc.reproduce("dir", recursive=True, single_item=True) # Check that just stages inside given dir # with changed direct deps have been reproduced. # This means that "first" stage should not be reproduced @@ -1482,17 +1481,15 @@ def test_recursive_repro_single(dvc_repo, repro_dir): assert len(stages) == 2 assert repro_dir.second_stage == stages[0].relpath assert repro_dir.third_stage == stages[1].relpath - assert filecmp.cmp(repro_dir.second, repro_dir.BAR, shallow=False) + assert filecmp.cmp(repro_dir.second, "bar", shallow=False) -def test_recursive_repro_single_force(dvc_repo, repro_dir): +def test_recursive_repro_single_force(dvc, repro_dir): """ Test recursive single-item force repro on dir without any dependencies changing. """ - stages = dvc_repo.reproduce( - repro_dir.dname, recursive=True, single_item=True, force=True - ) + stages = dvc.reproduce("dir", recursive=True, single_item=True, force=True) assert len(stages) == 3 names = [stage.relpath for stage in stages] # Check that all stages inside given dir have been reproduced @@ -1509,73 +1506,53 @@ def test_recursive_repro_single_force(dvc_repo, repro_dir): ) -def test_recursive_repro_empty_dir(dvc_repo, repro_dir): +def test_recursive_repro_empty_dir(tmp_dir, dvc): """ Test recursive repro on an empty directory """ - stages = dvc_repo.reproduce( - repro_dir.emptydname, recursive=True, force=True - ) + (tmp_dir / "emptydir").mkdir() + + stages = dvc.reproduce("emptydir", recursive=True, force=True) assert len(stages) == 0 -def test_recursive_repro_recursive_missing_file(dvc_repo): +def test_recursive_repro_recursive_missing_file(dvc): """ Test recursive repro on a missing file """ with pytest.raises(StageFileDoesNotExistError): - dvc_repo.reproduce("notExistingStage.dvc", recursive=True) + dvc.reproduce("notExistingStage.dvc", recursive=True) with pytest.raises(StageFileDoesNotExistError): - dvc_repo.reproduce("notExistingDir/", recursive=True) + dvc.reproduce("notExistingDir/", recursive=True) -def test_recursive_repro_on_stage_file(dvc_repo, repro_dir): +def test_recursive_repro_on_stage_file(dvc, repro_dir): """ Test recursive repro on a stage file instead of directory """ - stages = dvc_repo.reproduce( - repro_dir.first_stage, recursive=True, force=True - ) + stages = dvc.reproduce(repro_dir.first_stage, recursive=True, force=True) assert len(stages) == 2 names = [stage.relpath for stage in stages] assert repro_dir.source_stage in names assert repro_dir.first_stage in names -@pytest.fixture -def foo_copy(repo_dir, dvc_repo): - stages = dvc_repo.add(repo_dir.FOO) - assert len(stages) == 1 - foo_stage = stages[0] - assert foo_stage is not None - - fname = "foo_copy" - stage_fname = fname + ".dvc" - dvc_repo.run( - fname=stage_fname, - outs=[fname], - deps=[repo_dir.FOO, repo_dir.CODE], - cmd="python {} {} {}".format(repo_dir.CODE, repo_dir.FOO, fname), - ) - return {"fname": fname, "stage_fname": stage_fname} - - -def test_dvc_formatting_retained(dvc_repo, foo_copy): - root = pathlib.Path(dvc_repo.root_dir) - stage_file = root / foo_copy["stage_fname"] +def test_dvc_formatting_retained(tmp_dir, dvc, run_copy): + tmp_dir.dvc_gen("foo", "foo content") + stage = run_copy("foo", "foo_copy", fname="foo_copy.dvc") + stage_path = tmp_dir / stage.relpath # Add comments and custom formatting to DVC-file - lines = list(map(_format_dvc_line, stage_file.read_text().splitlines())) + lines = list(map(_format_dvc_line, stage_path.read_text().splitlines())) lines.insert(0, "# Starting comment") stage_text = "".join(l + "\n" for l in lines) - stage_file.write_text(stage_text) + stage_path.write_text(stage_text) # Rewrite data source and repro - (root / "foo").write_text("new_foo") - dvc_repo.reproduce(foo_copy["stage_fname"]) + (tmp_dir / "foo").write_text("new foo") + dvc.reproduce("foo_copy.dvc", force=True) - # All differences should be only about md5 - assert _hide_md5(stage_text) == _hide_md5(stage_file.read_text()) + assert _hide_md5(stage_text) == _hide_md5(stage_path.read_text()) def _format_dvc_line(line): @@ -1630,7 +1607,7 @@ def test(self): assert evaluation[2].relpath == "E.dvc" -def test_ssh_dir_out(dvc_repo): +def test_ssh_dir_out(dvc): if not _should_test_ssh(): pytest.skip() @@ -1643,7 +1620,7 @@ def test_ssh_dir_out(dvc_repo): assert main(["config", "cache.ssh", "sshcache"]) == 0 # Recreating to reread configs - repo = DvcRepo(dvc_repo.root_dir) + repo = DvcRepo(dvc.root_dir) url_info = URLInfo(remote_url) mkdir_cmd = "mkdir dir-out;cd dir-out;echo 1 > 1.txt; echo 2 > 2.txt" From 25437ca6006ce617b3e7853bd72648c613488b1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 8 Jan 2020 18:26:38 +0100 Subject: [PATCH 2/4] repro: tests: refactor for readibility --- tests/func/test_repro.py | 187 +++++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 87 deletions(-) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 51bed1721c..2d05c80f7f 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -5,6 +5,7 @@ import re import shutil import uuid +from pathlib import Path from subprocess import PIPE from subprocess import Popen from urllib.parse import urljoin @@ -1373,70 +1374,68 @@ def test(self): @pytest.fixture def repro_dir(tmp_dir, dvc, run_copy): + # Creates repo with following structure: + # data_dir/dir_file origin_data + # | | | + # | | origin_copy.dvc + # unrelated2.dvc | | | + # | | unrelated1.dvc + # dir/subdir/dir_file_copy.dvc | + # | | + # | dir/origin_copy_2.dvc + # | | + # \ / + # \ / + # dir/Dvcfile tmp_dir.gen( { - "foo": "foo content", - "bar": "bar content", + "origin_data": "origin data content", "data_dir": {"dir_file": "dir file content"}, + "dir": {"subdir": {}}, } ) - (tmp_dir / "dir").mkdir() - subdname = os.path.join("dir", "subdir") - (tmp_dir / "dir" / "subdir").mkdir() - - tmp_dir.source = "source" - tmp_dir.source_stage = tmp_dir.source + ".dvc" - stage = dvc.run( - fname=tmp_dir.source_stage, outs=[tmp_dir.source], deps=["foo"] - ) - assert stage is not None - assert filecmp.cmp(tmp_dir.source, "foo", shallow=False) - - tmp_dir.unrelated1 = "unrelated1" - tmp_dir.unrelated1_stage = tmp_dir.unrelated1 + ".dvc" - stage = dvc.run( - fname=tmp_dir.unrelated1_stage, - outs=[tmp_dir.unrelated1], - deps=[tmp_dir.source], - ) - assert stage is not None - - tmp_dir.unrelated2 = "unrelated2" - tmp_dir.unrelated2_stage = tmp_dir.unrelated2 + ".dvc" - stage = dvc.run( - fname=tmp_dir.unrelated2_stage, - outs=[tmp_dir.unrelated2], - deps=[fspath(tmp_dir / "data_dir" / "dir_file")], - ) - assert stage is not None - - tmp_dir.first = os.path.join("dir", "first") - tmp_dir.first_stage = tmp_dir.first + ".dvc" - stage = run_copy(tmp_dir.source, tmp_dir.first, fname=tmp_dir.first_stage) - - assert stage is not None - assert filecmp.cmp(tmp_dir.first, "foo", shallow=False) - - tmp_dir.second = os.path.join(subdname, "second") - tmp_dir.second_stage = tmp_dir.second + ".dvc" - stage = dvc.run( - fname=tmp_dir.second_stage, - outs=[tmp_dir.second], - deps=[fspath(tmp_dir / "data_dir" / "dir_file")], + tmp_dir.origin_copy = "origin_copy" + assert run_copy("origin_data", tmp_dir.origin_copy) is not None + assert (tmp_dir / tmp_dir.origin_copy).read_text() == "origin data content" + + # Unrelated are to verify that reproducing `dir` will not trigger them too + assert run_copy(tmp_dir.origin_copy, "unrelated1") is not None + dir_file_path = tmp_dir / "data_dir" / "dir_file" + assert run_copy(fspath(dir_file_path), "unrelated2") is not None + + tmp_dir.origin_copy_2 = os.path.join("dir", "origin_copy_2") + assert ( + run_copy( + tmp_dir.origin_copy, + tmp_dir.origin_copy_2, + fname=tmp_dir.origin_copy_2 + ".dvc", + ) + is not None ) - assert stage is not None - assert filecmp.cmp( - tmp_dir.second, - fspath(tmp_dir / "data_dir" / "dir_file"), - shallow=False, + assert ( + tmp_dir / tmp_dir.origin_copy_2 + ).read_text() == "origin data content" + + tmp_dir.dir_file_copy = os.path.join("dir", "subdir", "dir_file_copy") + assert ( + run_copy( + fspath(dir_file_path), + tmp_dir.dir_file_copy, + fname=tmp_dir.dir_file_copy + ".dvc", + ) + is not None ) + assert (tmp_dir / tmp_dir.dir_file_copy).read_text() == "dir file content" - tmp_dir.third_stage = os.path.join("dir", "Dvcfile") - stage = dvc.run( - fname=tmp_dir.third_stage, deps=[tmp_dir.first, tmp_dir.second] + tmp_dir.last_stage = os.path.join("dir", "Dvcfile") + assert ( + dvc.run( + fname=tmp_dir.last_stage, + deps=[tmp_dir.origin_copy_2, tmp_dir.dir_file_copy], + ) + is not None ) - assert stage is not None yield tmp_dir @@ -1445,19 +1444,25 @@ def test_recursive_repro_default(dvc, repro_dir): """ Test recursive repro on dir after a dep outside this dir has changed. """ - os.unlink("foo") - shutil.copyfile("bar", "foo") + origin = Path("origin_data") + origin.unlink() + origin.write_text("new origin data content") stages = dvc.reproduce("dir", recursive=True) + # Check that the dependency ("source") and the dependent stages # inside the folder have been reproduced ("first", "third") - assert len(stages) == 3 - names = [stage.relpath for stage in stages] - assert repro_dir.source_stage in names - assert repro_dir.first_stage in names - assert repro_dir.third_stage in names - assert filecmp.cmp(repro_dir.source, "bar", shallow=False) - assert filecmp.cmp(repro_dir.first, "bar", shallow=False) + names = [s.relpath for s in stages] + assert len(names) == 3 + assert set(names) == { + repro_dir.origin_copy + ".dvc", + repro_dir.origin_copy_2 + ".dvc", + repro_dir.last_stage, + } + assert Path(repro_dir.origin_copy).read_text() == "new origin data content" + assert ( + Path(repro_dir.origin_copy_2).read_text() == "new origin data content" + ) def test_recursive_repro_single(dvc, repro_dir): @@ -1465,23 +1470,26 @@ def test_recursive_repro_single(dvc, repro_dir): Test recursive single-item repro on dir after a dep outside this dir has changed. """ - os.unlink("foo") - shutil.copyfile("bar", "foo") + origin_data = Path("origin_data") + origin_data.unlink() + origin_data.write_text("new origin content") - # os.unlink(repro_dir.DATA) - (repro_dir / "data_dir" / "dir_file").unlink() - shutil.copyfile("bar", fspath(repro_dir / "data_dir" / "dir_file")) + dir_file = repro_dir / "data_dir" / "dir_file" + dir_file.unlink() + dir_file.write_text("new dir file content") stages = dvc.reproduce("dir", recursive=True, single_item=True) # Check that just stages inside given dir # with changed direct deps have been reproduced. # This means that "first" stage should not be reproduced # since it depends on "source". - # Also check that "second" stage was reproduced before "third" stage + # Also check that "dir_file_copy" stage was reproduced before "third" stage assert len(stages) == 2 - assert repro_dir.second_stage == stages[0].relpath - assert repro_dir.third_stage == stages[1].relpath - assert filecmp.cmp(repro_dir.second, "bar", shallow=False) + assert [s.relpath for s in stages] == [ + repro_dir.dir_file_copy + ".dvc", + repro_dir.last_stage, + ] + assert Path(repro_dir.dir_file_copy).read_text() == "new dir file content" def test_recursive_repro_single_force(dvc, repro_dir): @@ -1491,26 +1499,28 @@ def test_recursive_repro_single_force(dvc, repro_dir): """ stages = dvc.reproduce("dir", recursive=True, single_item=True, force=True) assert len(stages) == 3 - names = [stage.relpath for stage in stages] # Check that all stages inside given dir have been reproduced - # Also check that "second" stage was reproduced before "third" stage + # Also check that "dir_file_copy" stage was reproduced before "third" stage # and that "first" stage was reproduced before "third" stage - assert repro_dir.first_stage in names - assert repro_dir.second_stage in names - assert repro_dir.third_stage in names - assert names.index(repro_dir.first_stage) < names.index( - repro_dir.third_stage + names = [s.relpath for s in stages] + assert { + repro_dir.origin_copy_2 + ".dvc", + repro_dir.dir_file_copy + ".dvc", + repro_dir.last_stage, + } == set(names) + assert names.index(repro_dir.origin_copy_2 + ".dvc") < names.index( + repro_dir.last_stage ) - assert names.index(repro_dir.second_stage) < names.index( - repro_dir.third_stage + assert names.index(repro_dir.dir_file_copy + ".dvc") < names.index( + repro_dir.last_stage ) -def test_recursive_repro_empty_dir(tmp_dir, dvc): +def test_recursive_repro_empty_dir(dvc, repro_dir): """ Test recursive repro on an empty directory """ - (tmp_dir / "emptydir").mkdir() + (repro_dir / "emptydir").mkdir() stages = dvc.reproduce("emptydir", recursive=True, force=True) assert len(stages) == 0 @@ -1530,11 +1540,14 @@ def test_recursive_repro_on_stage_file(dvc, repro_dir): """ Test recursive repro on a stage file instead of directory """ - stages = dvc.reproduce(repro_dir.first_stage, recursive=True, force=True) + stages = dvc.reproduce( + repro_dir.origin_copy_2 + ".dvc", recursive=True, force=True + ) assert len(stages) == 2 - names = [stage.relpath for stage in stages] - assert repro_dir.source_stage in names - assert repro_dir.first_stage in names + assert [ + repro_dir.origin_copy + ".dvc", + repro_dir.origin_copy_2 + ".dvc", + ] == [stage.relpath for stage in stages] def test_dvc_formatting_retained(tmp_dir, dvc, run_copy): From 93c54763fecc1f55bfd24e7406b051c4286fbc7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 9 Jan 2020 11:10:41 +0100 Subject: [PATCH 3/4] repro: tests: yield dict of stages from repro_dir --- tests/func/test_repro.py | 167 ++++++++++++++++++--------------------- 1 file changed, 79 insertions(+), 88 deletions(-) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 2d05c80f7f..ff3e4eaebd 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -1395,73 +1395,75 @@ def repro_dir(tmp_dir, dvc, run_copy): } ) - tmp_dir.origin_copy = "origin_copy" - assert run_copy("origin_data", tmp_dir.origin_copy) is not None - assert (tmp_dir / tmp_dir.origin_copy).read_text() == "origin data content" + stages = {} + + origin_copy = "origin_copy" + stage = run_copy("origin_data", origin_copy) + assert stage is not None + assert (tmp_dir / origin_copy).read_text() == "origin data content" + stages["origin_copy"] = stage + + origin_copy_2 = os.path.join("dir", "origin_copy_2") + stage = run_copy(origin_copy, origin_copy_2, fname=origin_copy_2 + ".dvc") + assert stage is not None + assert (tmp_dir / origin_copy_2).read_text() == "origin data content" + stages["origin_copy_2"] = stage - # Unrelated are to verify that reproducing `dir` will not trigger them too - assert run_copy(tmp_dir.origin_copy, "unrelated1") is not None dir_file_path = tmp_dir / "data_dir" / "dir_file" + dir_file_copy = os.path.join("dir", "subdir", "dir_file_copy") + stage = run_copy( + fspath(dir_file_path), dir_file_copy, fname=dir_file_copy + ".dvc" + ) + assert stage is not None + assert (tmp_dir / dir_file_copy).read_text() == "dir file content" + stages["dir_file_copy"] = stage + + last_stage = os.path.join("dir", "Dvcfile") + stage = dvc.run(fname=last_stage, deps=[origin_copy_2, dir_file_copy]) + assert stage is not None + stages["last_stage"] = stage + + # Unrelated are to verify that reproducing `dir` will not trigger them too + assert run_copy(origin_copy, "unrelated1") is not None assert run_copy(fspath(dir_file_path), "unrelated2") is not None - tmp_dir.origin_copy_2 = os.path.join("dir", "origin_copy_2") - assert ( - run_copy( - tmp_dir.origin_copy, - tmp_dir.origin_copy_2, - fname=tmp_dir.origin_copy_2 + ".dvc", - ) - is not None - ) - assert ( - tmp_dir / tmp_dir.origin_copy_2 - ).read_text() == "origin data content" + yield stages - tmp_dir.dir_file_copy = os.path.join("dir", "subdir", "dir_file_copy") - assert ( - run_copy( - fspath(dir_file_path), - tmp_dir.dir_file_copy, - fname=tmp_dir.dir_file_copy + ".dvc", - ) - is not None - ) - assert (tmp_dir / tmp_dir.dir_file_copy).read_text() == "dir file content" - tmp_dir.last_stage = os.path.join("dir", "Dvcfile") - assert ( - dvc.run( - fname=tmp_dir.last_stage, - deps=[tmp_dir.origin_copy_2, tmp_dir.dir_file_copy], - ) - is not None - ) +def _rewrite_file(path_elements, new_content): + if isinstance(path_elements, str): + path_elements = [path_elements] + file = Path(os.sep.join(path_elements)) + file.unlink() + file.write_text(new_content) - yield tmp_dir + +def _out_path(stage): + return Path(stage.outs[0].fspath) def test_recursive_repro_default(dvc, repro_dir): """ Test recursive repro on dir after a dep outside this dir has changed. """ - origin = Path("origin_data") - origin.unlink() - origin.write_text("new origin data content") + _rewrite_file("origin_data", "new origin data content") stages = dvc.reproduce("dir", recursive=True) - # Check that the dependency ("source") and the dependent stages - # inside the folder have been reproduced ("first", "third") - names = [s.relpath for s in stages] - assert len(names) == 3 - assert set(names) == { - repro_dir.origin_copy + ".dvc", - repro_dir.origin_copy_2 + ".dvc", - repro_dir.last_stage, - } - assert Path(repro_dir.origin_copy).read_text() == "new origin data content" + # Check that the dependency ("origin_copy") and the dependent stages + # inside the folder have been reproduced ("origin_copy_2", "last_stage") + assert stages == [ + repro_dir["origin_copy"], + repro_dir["origin_copy_2"], + repro_dir["last_stage"], + ] + assert ( + _out_path(repro_dir["origin_copy"]).read_text() + == "new origin data content" + ) assert ( - Path(repro_dir.origin_copy_2).read_text() == "new origin data content" + _out_path(repro_dir["origin_copy_2"]).read_text() + == "new origin data content" ) @@ -1470,26 +1472,20 @@ def test_recursive_repro_single(dvc, repro_dir): Test recursive single-item repro on dir after a dep outside this dir has changed. """ - origin_data = Path("origin_data") - origin_data.unlink() - origin_data.write_text("new origin content") - - dir_file = repro_dir / "data_dir" / "dir_file" - dir_file.unlink() - dir_file.write_text("new dir file content") + _rewrite_file("origin_data", "new origin content") + _rewrite_file(["data_dir", "dir_file"], "new dir file content") stages = dvc.reproduce("dir", recursive=True, single_item=True) # Check that just stages inside given dir # with changed direct deps have been reproduced. - # This means that "first" stage should not be reproduced - # since it depends on "source". - # Also check that "dir_file_copy" stage was reproduced before "third" stage - assert len(stages) == 2 - assert [s.relpath for s in stages] == [ - repro_dir.dir_file_copy + ".dvc", - repro_dir.last_stage, - ] - assert Path(repro_dir.dir_file_copy).read_text() == "new dir file content" + # This means that "origin_copy_2" stage should not be reproduced + # since it depends on "origin_copy". + # Also check that "dir_file_copy" stage was reproduced before "last_stage" + assert stages == [repro_dir["dir_file_copy"], repro_dir["last_stage"]] + assert ( + _out_path(repro_dir["dir_file_copy"]).read_text() + == "new dir file content" + ) def test_recursive_repro_single_force(dvc, repro_dir): @@ -1498,32 +1494,31 @@ def test_recursive_repro_single_force(dvc, repro_dir): without any dependencies changing. """ stages = dvc.reproduce("dir", recursive=True, single_item=True, force=True) - assert len(stages) == 3 # Check that all stages inside given dir have been reproduced - # Also check that "dir_file_copy" stage was reproduced before "third" stage - # and that "first" stage was reproduced before "third" stage - names = [s.relpath for s in stages] - assert { - repro_dir.origin_copy_2 + ".dvc", - repro_dir.dir_file_copy + ".dvc", - repro_dir.last_stage, - } == set(names) - assert names.index(repro_dir.origin_copy_2 + ".dvc") < names.index( - repro_dir.last_stage + # Also check that "dir_file_copy" stage was reproduced before "last_stage" + # and that "origin_copy" stage was reproduced before "last_stage" stage + assert len(stages) == 3 + assert set(stages) == { + repro_dir["origin_copy_2"], + repro_dir["dir_file_copy"], + repro_dir["last_stage"], + } + assert stages.index(repro_dir["origin_copy_2"]) < stages.index( + repro_dir["last_stage"] ) - assert names.index(repro_dir.dir_file_copy + ".dvc") < names.index( - repro_dir.last_stage + assert stages.index(repro_dir["dir_file_copy"]) < stages.index( + repro_dir["last_stage"] ) -def test_recursive_repro_empty_dir(dvc, repro_dir): +def test_recursive_repro_empty_dir(tmp_dir, dvc, repro_dir): """ Test recursive repro on an empty directory """ - (repro_dir / "emptydir").mkdir() + (tmp_dir / "emptydir").mkdir() stages = dvc.reproduce("emptydir", recursive=True, force=True) - assert len(stages) == 0 + assert stages == [] def test_recursive_repro_recursive_missing_file(dvc): @@ -1541,13 +1536,9 @@ def test_recursive_repro_on_stage_file(dvc, repro_dir): Test recursive repro on a stage file instead of directory """ stages = dvc.reproduce( - repro_dir.origin_copy_2 + ".dvc", recursive=True, force=True + repro_dir["origin_copy_2"].relpath, recursive=True, force=True ) - assert len(stages) == 2 - assert [ - repro_dir.origin_copy + ".dvc", - repro_dir.origin_copy_2 + ".dvc", - ] == [stage.relpath for stage in stages] + assert stages == [repro_dir["origin_copy"], repro_dir["origin_copy_2"]] def test_dvc_formatting_retained(tmp_dir, dvc, run_copy): From 0554cbfe9c49dc6c9b8a4ffc1a3a5f20404f95d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Thu, 9 Jan 2020 11:21:47 +0100 Subject: [PATCH 4/4] repro: tests: use Path-like objects in repro_dir fixture --- tests/func/test_repro.py | 58 +++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index ff3e4eaebd..4d96457438 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -1397,34 +1397,43 @@ def repro_dir(tmp_dir, dvc, run_copy): stages = {} - origin_copy = "origin_copy" - stage = run_copy("origin_data", origin_copy) + origin_copy = tmp_dir / "origin_copy" + stage = run_copy("origin_data", fspath(origin_copy)) assert stage is not None - assert (tmp_dir / origin_copy).read_text() == "origin data content" + assert origin_copy.read_text() == "origin data content" stages["origin_copy"] = stage - origin_copy_2 = os.path.join("dir", "origin_copy_2") - stage = run_copy(origin_copy, origin_copy_2, fname=origin_copy_2 + ".dvc") + origin_copy_2 = tmp_dir / "dir" / "origin_copy_2" + stage = run_copy( + fspath(origin_copy), + fspath(origin_copy_2), + fname=fspath(origin_copy_2) + ".dvc", + ) assert stage is not None - assert (tmp_dir / origin_copy_2).read_text() == "origin data content" + assert origin_copy_2.read_text() == "origin data content" stages["origin_copy_2"] = stage dir_file_path = tmp_dir / "data_dir" / "dir_file" - dir_file_copy = os.path.join("dir", "subdir", "dir_file_copy") + dir_file_copy = tmp_dir / "dir" / "subdir" / "dir_file_copy" stage = run_copy( - fspath(dir_file_path), dir_file_copy, fname=dir_file_copy + ".dvc" + fspath(dir_file_path), + fspath(dir_file_copy), + fname=fspath(dir_file_copy) + ".dvc", ) assert stage is not None - assert (tmp_dir / dir_file_copy).read_text() == "dir file content" + assert dir_file_copy.read_text() == "dir file content" stages["dir_file_copy"] = stage - last_stage = os.path.join("dir", "Dvcfile") - stage = dvc.run(fname=last_stage, deps=[origin_copy_2, dir_file_copy]) + last_stage = tmp_dir / "dir" / "Dvcfile" + stage = dvc.run( + fname=fspath(last_stage), + deps=[fspath(origin_copy_2), fspath(dir_file_copy)], + ) assert stage is not None stages["last_stage"] = stage # Unrelated are to verify that reproducing `dir` will not trigger them too - assert run_copy(origin_copy, "unrelated1") is not None + assert run_copy(fspath(origin_copy), "unrelated1") is not None assert run_copy(fspath(dir_file_path), "unrelated2") is not None yield stages @@ -1438,8 +1447,8 @@ def _rewrite_file(path_elements, new_content): file.write_text(new_content) -def _out_path(stage): - return Path(stage.outs[0].fspath) +def _read_out(stage): + return Path(stage.outs[0].fspath).read_text() def test_recursive_repro_default(dvc, repro_dir): @@ -1457,14 +1466,8 @@ def test_recursive_repro_default(dvc, repro_dir): repro_dir["origin_copy_2"], repro_dir["last_stage"], ] - assert ( - _out_path(repro_dir["origin_copy"]).read_text() - == "new origin data content" - ) - assert ( - _out_path(repro_dir["origin_copy_2"]).read_text() - == "new origin data content" - ) + assert _read_out(repro_dir["origin_copy"]) == "new origin data content" + assert _read_out(repro_dir["origin_copy_2"]) == "new origin data content" def test_recursive_repro_single(dvc, repro_dir): @@ -1482,10 +1485,7 @@ def test_recursive_repro_single(dvc, repro_dir): # since it depends on "origin_copy". # Also check that "dir_file_copy" stage was reproduced before "last_stage" assert stages == [repro_dir["dir_file_copy"], repro_dir["last_stage"]] - assert ( - _out_path(repro_dir["dir_file_copy"]).read_text() - == "new dir file content" - ) + assert _read_out(repro_dir["dir_file_copy"]) == "new dir file content" def test_recursive_repro_single_force(dvc, repro_dir): @@ -1511,7 +1511,7 @@ def test_recursive_repro_single_force(dvc, repro_dir): ) -def test_recursive_repro_empty_dir(tmp_dir, dvc, repro_dir): +def test_recursive_repro_empty_dir(tmp_dir, dvc): """ Test recursive repro on an empty directory """ @@ -1611,10 +1611,12 @@ def test(self): assert evaluation[2].relpath == "E.dvc" -def test_ssh_dir_out(dvc): +def test_ssh_dir_out(tmp_dir, dvc): if not _should_test_ssh(): pytest.skip() + tmp_dir.gen({"foo": "foo content"}) + # Set up remote and cache remote_url = get_ssh_url() assert main(["remote", "add", "upstream", remote_url]) == 0