diff --git a/tests/conftest.py b/tests/conftest.py index 0cdb204b88..46b059d5ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,13 +7,13 @@ from git import Repo from git.exc import GitCommandNotFound -from .basic_env import TestDirFixture -from .basic_env import TestDvcGitFixture -from .basic_env import TestGitFixture from dvc.remote.config import RemoteConfig from dvc.remote.ssh.connection import SSHConnection from dvc.repo import Repo as DvcRepo from dvc.utils.compat import cast_bytes_py2 +from .basic_env import TestDirFixture, TestDvcGitFixture, TestGitFixture +from .dir_helpers import * # noqa + # Prevent updater and analytics from running their processes os.environ[cast_bytes_py2("DVC_TEST")] = cast_bytes_py2("true") diff --git a/tests/dir_helpers.py b/tests/dir_helpers.py new file mode 100644 index 0000000000..1d97897904 --- /dev/null +++ b/tests/dir_helpers.py @@ -0,0 +1,229 @@ +from __future__ import unicode_literals + +import os +import pytest + +from funcy.py3 import lmap, retry + +from dvc.utils import makedirs +from dvc.utils.compat import basestring, is_py2, pathlib, fspath, fspath_py35 + + +__all__ = ["tmp_dir", "scm", "dvc", "repo_template", "run_copy", "erepo_dir"] +REPO_TEMPLATE = { + "foo": "foo", + "bar": "bar", + "dir": { + "data": "dir/data text", + "subdir": {"subdata": "dir/subdir/subdata text"}, + }, +} + + +class TmpDir(pathlib.Path): + def __new__(cls, *args, **kwargs): + if cls is TmpDir: + cls = WindowsTmpDir if os.name == "nt" else PosixTmpDir + self = cls._from_parts(args, init=False) + if not self._flavour.is_supported: + raise NotImplementedError( + "cannot instantiate %r on your system" % (cls.__name__,) + ) + self._init() + return self + + # Not needed in Python 3.6+ + def __fspath__(self): + return str(self) + + def _require(self, name): + if not hasattr(self, name): + raise TypeError( + "Can't use {name} for this temporary dir. " + 'Did you forget to use "{name}" fixture?'.format(name=name) + ) + + def gen(self, struct, text=""): + if isinstance(struct, basestring): + struct = {struct: text} + + self._gen(struct) + return struct.keys() + + def _gen(self, struct, prefix=None): + for name, contents in struct.items(): + path = (prefix or self) / name + + if isinstance(contents, dict): + self._gen(contents, prefix=path) + else: + makedirs(path.parent, exist_ok=True) + if is_py2 and isinstance(contents, str): + path.write_bytes(contents) + else: + path.write_text(contents) + + def dvc_gen(self, struct, text="", commit=None): + paths = self.gen(struct, text) + return self.dvc_add(paths, commit=commit) + + def scm_gen(self, struct, text="", commit=None): + paths = self.gen(struct, text) + return self.scm_add(paths, commit=commit) + + def dvc_add(self, filenames, commit=None): + self._require("dvc") + filenames = _coerce_filenames(filenames) + + stages = self.dvc.add(filenames) + if commit: + stage_paths = [s.path for s in stages] + self.scm_add(stage_paths, commit=commit) + + return stages + + def scm_add(self, filenames, commit=None): + self._require("scm") + filenames = _coerce_filenames(filenames) + + self.scm.add(filenames) + if commit: + self.scm.commit(commit) + + # Introspection methods + def list(self): + return [p.name for p in self.iterdir()] + + +def _coerce_filenames(filenames): + if isinstance(filenames, (basestring, pathlib.PurePath)): + filenames = [filenames] + return lmap(fspath, filenames) + + +class WindowsTmpDir(TmpDir, pathlib.PureWindowsPath): + pass + + +class PosixTmpDir(TmpDir, pathlib.PurePosixPath): + pass + + +@pytest.fixture +def tmp_dir(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + return TmpDir(fspath_py35(tmp_path)) + + +@pytest.fixture +def scm(tmp_dir, request): + # Use dvc.scm if available + if "dvc" in request.fixturenames: + dvc = request.getfixturevalue("dvc") + tmp_dir.scm = dvc.scm + yield dvc.scm + + else: + from dvc.scm.git import Git + + _git_init() + try: + scm = tmp_dir.scm = Git(fspath(tmp_dir)) + yield scm + finally: + scm.close() + + +def _git_init(): + from git import Repo + from git.exc import GitCommandNotFound + + # NOTE: handles EAGAIN error on BSD systems (osx in our case). + # Otherwise when running tests you might get this exception: + # + # GitCommandNotFound: Cmd('git') not found due to: + # OSError('[Errno 35] Resource temporarily unavailable') + git = retry(5, GitCommandNotFound)(Repo.init)() + git.close() + + +@pytest.fixture +def dvc(tmp_dir, request): + from dvc.repo import Repo + + if "scm" in request.fixturenames: + if not hasattr(tmp_dir, "scm"): + _git_init() + + dvc = Repo.init(fspath(tmp_dir)) + dvc.scm.commit("init dvc") + else: + dvc = Repo.init(fspath(tmp_dir), no_scm=True) + + try: + tmp_dir.dvc = dvc + yield dvc + finally: + dvc.close() + + +@pytest.fixture +def repo_template(tmp_dir): + tmp_dir.gen(REPO_TEMPLATE) + + +@pytest.fixture +def run_copy(tmp_dir, dvc, request): + tmp_dir.gen( + "copy.py", + "import sys, shutil\nshutil.copyfile(sys.argv[1], sys.argv[2])", + ) + + # Do we need this? + if "scm" in request.fixturenames: + request.getfixturevalue("scm") + tmp_dir.git_add("copy.py", commit="add copy.py") + + def run_copy(src, dst, **run_kwargs): + return dvc.run( + cmd="python copy.py {} {}".format(src, dst), + outs=[dst], + deps=[src, "copy.py"], + **run_kwargs + ) + + return run_copy + + +@pytest.fixture +def erepo_dir(tmp_path_factory, monkeypatch): + from dvc.repo import Repo + from dvc.remote.config import RemoteConfig + + path = TmpDir(fspath_py35(tmp_path_factory.mktemp("erepo"))) + + # Chdir for git and dvc to work locally + monkeypatch.chdir(fspath_py35(path)) + + _git_init() + path.dvc = Repo.init() + path.scm = path.dvc.scm + path.dvc_gen(REPO_TEMPLATE, commit="init repo") + + rconfig = RemoteConfig(path.dvc.config) + rconfig.add("upstream", path.dvc.cache.local.cache_dir, default=True) + path.scm_add([path.dvc.config.config_file], commit="add remote") + + path.dvc_gen("version", "master") + path.scm_add([".gitignore", "version.dvc"], commit="master") + + path.scm.checkout("branch", create_new=True) + (path / "version").unlink() # For mac ??? + path.dvc_gen("version", "branch") + path.scm_add([".gitignore", "version.dvc"], commit="branch") + + path.scm.checkout("master") + path.dvc.close() + monkeypatch.undo() # Undo chdir + + return path diff --git a/tests/func/test_add.py b/tests/func/test_add.py index 6d51f233ca..b0f77de350 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -59,7 +59,7 @@ def test_unicode(self): self.assertTrue(os.path.isfile(stage.path)) -class TestAddUnSupportedFile(TestDvc): +class TestAddUnsupportedFile(TestDvc): def test(self): with self.assertRaises(DvcException): self.dvc.add("unsupported://unsupported") @@ -132,15 +132,11 @@ def test(self): self.assertEqual(os.path.abspath("directory.dvc"), stage.path) -def test_add_tracked_file(git, dvc_repo, repo_dir): - fname = "tracked_file" - repo_dir.create(fname, "tracked file contents") - - dvc_repo.scm.add([fname]) - dvc_repo.scm.commit("add {}".format(fname)) +def test_add_tracked_file(tmp_dir, scm, dvc): + tmp_dir.scm_gen("tracked_file", "...", commit="add tracked file") with pytest.raises(OutputAlreadyTrackedError): - dvc_repo.add(fname) + dvc.add("tracked_file") class TestAddDirWithExistingCache(TestDvc): @@ -474,23 +470,18 @@ def test(self): self.assertFalse(os.path.exists("foo.dvc")) -def test_should_cleanup_after_failed_add(git, dvc_repo, repo_dir): - stages = dvc_repo.add(repo_dir.FOO) - assert len(stages) == 1 - - foo_stage_file = repo_dir.FOO + Stage.STAGE_FILE_SUFFIX - - # corrupt stage file - repo_dir.create(foo_stage_file, "this will break yaml structure") +def test_should_cleanup_after_failed_add(tmp_dir, scm, dvc, repo_template): + # Add and corrupt a stage file + dvc.add("foo") + tmp_dir.gen("foo.dvc", "- broken\nyaml") with pytest.raises(StageFileCorruptedError): - dvc_repo.add(repo_dir.BAR) + dvc.add("bar") - bar_stage_file = repo_dir.BAR + Stage.STAGE_FILE_SUFFIX - assert not os.path.exists(bar_stage_file) + assert not os.path.exists("bar.dvc") gitignore_content = get_gitignore_content() - assert "/" + repo_dir.BAR not in gitignore_content + assert "/bar" not in gitignore_content class TestShouldNotTrackGitInternalFiles(TestDvc): @@ -634,7 +625,7 @@ def test_should_protect_on_repeated_add(link, dvc_repo, repo_dir): assert not os.access(repo_dir.FOO, os.W_OK) -def test_escape_gitignore_entries(git, dvc_repo, repo_dir): +def test_escape_gitignore_entries(tmp_dir, scm, dvc): fname = "file!with*weird#naming_[1].t?t" ignored_fname = r"/file\!with\*weird\#naming_\[1\].t\?t" @@ -644,8 +635,5 @@ def test_escape_gitignore_entries(git, dvc_repo, repo_dir): fname = "file!with_weird#naming_[1].txt" ignored_fname = r"/file\!with_weird\#naming_\[1\].txt" - os.rename(repo_dir.FOO, fname) - - dvc_repo.add(fname) - + tmp_dir.dvc_gen(fname, "...") assert ignored_fname in get_gitignore_content() diff --git a/tests/func/test_api.py b/tests/func/test_api.py index b0e0085e31..54623c080d 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import os import shutil @@ -27,51 +29,51 @@ def run_dvc(*argv): @pytest.mark.parametrize("remote_url", remote_params, indirect=True) -def test_get_url(remote_url, repo_dir, dvc_repo): +def test_get_url(remote_url, tmp_dir, dvc, repo_template): run_dvc("remote", "add", "-d", "upstream", remote_url) - dvc_repo.add(repo_dir.FOO) + dvc.add("foo") expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8" - assert api.get_url(repo_dir.FOO) == expected_url + assert api.get_url("foo") == expected_url @pytest.mark.parametrize("remote_url", remote_params, indirect=True) -def test_get_url_external(remote_url, dvc_repo, erepo): - _set_remote_url_and_commit(erepo.dvc, remote_url) +def test_get_url_external(remote_url, erepo_dir): + _set_remote_url_and_commit(erepo_dir.dvc, remote_url) # Using file url to force clone to tmp repo - repo_url = "file://" + erepo.dvc.root_dir + repo_url = "file://{}".format(erepo_dir) expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8" - assert api.get_url(erepo.FOO, repo=repo_url) == expected_url + assert api.get_url("foo", repo=repo_url) == expected_url @pytest.mark.parametrize("remote_url", all_remote_params, indirect=True) -def test_open(remote_url, repo_dir, dvc_repo): +def test_open(remote_url, tmp_dir, dvc): run_dvc("remote", "add", "-d", "upstream", remote_url) - dvc_repo.add(repo_dir.FOO) + tmp_dir.dvc_gen("foo", "foo-text") run_dvc("push") # Remove cache to force download - shutil.rmtree(dvc_repo.cache.local.cache_dir) + shutil.rmtree(dvc.cache.local.cache_dir) - with api.open(repo_dir.FOO) as fd: - assert fd.read() == repo_dir.FOO_CONTENTS + with api.open("foo") as fd: + assert fd.read() == "foo-text" @pytest.mark.parametrize("remote_url", all_remote_params, indirect=True) -def test_open_external(remote_url, dvc_repo, erepo): - erepo.dvc.scm.checkout("branch") - _set_remote_url_and_commit(erepo.dvc, remote_url) - erepo.dvc.scm.checkout("master") - _set_remote_url_and_commit(erepo.dvc, remote_url) +def test_open_external(remote_url, erepo_dir): + erepo_dir.scm.checkout("branch") + _set_remote_url_and_commit(erepo_dir.dvc, remote_url) + erepo_dir.scm.checkout("master") + _set_remote_url_and_commit(erepo_dir.dvc, remote_url) - erepo.dvc.push(all_branches=True) + erepo_dir.dvc.push(all_branches=True) # Remove cache to force download - shutil.rmtree(erepo.dvc.cache.local.cache_dir) + shutil.rmtree(erepo_dir.dvc.cache.local.cache_dir) # Using file url to force clone to tmp repo - repo_url = "file://" + erepo.dvc.root_dir + repo_url = "file://{}".format(erepo_dir) with api.open("version", repo=repo_url) as fd: assert fd.read() == "master" @@ -79,15 +81,15 @@ def test_open_external(remote_url, dvc_repo, erepo): @pytest.mark.parametrize("remote_url", all_remote_params, indirect=True) -def test_missing(remote_url, repo_dir, dvc_repo): - run_dvc("add", repo_dir.FOO) +def test_missing(remote_url, tmp_dir, dvc): + tmp_dir.dvc_gen("foo", "foo") run_dvc("remote", "add", "-d", "upstream", remote_url) # Remove cache to make foo missing - shutil.rmtree(dvc_repo.cache.local.cache_dir) + shutil.rmtree(dvc.cache.local.cache_dir) with pytest.raises(FileMissingError): - api.read(repo_dir.FOO) + api.read("foo") def _set_remote_url_and_commit(repo, remote_url): @@ -97,6 +99,7 @@ def _set_remote_url_and_commit(repo, remote_url): repo.scm.commit("modify remote") +# FIXME: this test doesn't use scm ;D def test_open_scm_controlled(dvc_repo, repo_dir): stage, = dvc_repo.add(repo_dir.FOO) @@ -105,13 +108,14 @@ def test_open_scm_controlled(dvc_repo, repo_dir): assert fd.read() == stage_content -def test_open_not_cached(dvc_repo): +# TODO: simplify, we shouldn't need run. +def test_open_not_cached(dvc): metric_file = "metric.txt" metric_content = "0.6" metric_code = "open('{}', 'w').write('{}')".format( metric_file, metric_content ) - dvc_repo.run( + dvc.run( metrics_no_cache=[metric_file], cmd=('python -c "{}"'.format(metric_code)), ) diff --git a/tests/func/test_commit.py b/tests/func/test_commit.py index 9bc97ed688..c3238e6add 100644 --- a/tests/func/test_commit.py +++ b/tests/func/test_commit.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import pytest from dvc.stage import StageCommitError @@ -41,29 +43,22 @@ def test_commit_force(dvc_repo, repo_dir): assert dvc_repo.status([stage.path]) == {} -def test_commit_with_deps(dvc_repo, repo_dir): - stages = dvc_repo.add(repo_dir.FOO, no_commit=True) - assert len(stages) == 1 - foo_stage = stages[0] +def test_commit_with_deps(tmp_dir, dvc, run_copy): + tmp_dir.gen("foo", "foo") + foo_stage, = dvc.add("foo", no_commit=True) assert foo_stage is not None assert len(foo_stage.outs) == 1 - fname = "file" - stage = dvc_repo.run( - cmd="python {} {} {}".format(repo_dir.CODE, repo_dir.FOO, fname), - outs=[fname], - deps=[repo_dir.FOO, repo_dir.CODE], - no_commit=True, - ) + stage = run_copy("foo", "file", no_commit=True) assert stage is not None assert len(stage.outs) == 1 - with dvc_repo.state: + with dvc.state: assert foo_stage.outs[0].changed_cache() assert stage.outs[0].changed_cache() - dvc_repo.commit(stage.path, with_deps=True) - with dvc_repo.state: + dvc.commit(stage.path, with_deps=True) + with dvc.state: assert not foo_stage.outs[0].changed_cache() assert not stage.outs[0].changed_cache() diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index 116234eb8d..7d06cb9a38 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -9,9 +9,7 @@ from dvc.exceptions import CollectCacheError from dvc.main import main from dvc.repo import Repo as DvcRepo -from dvc.utils.compat import pathlib -from tests.basic_env import TestDir -from tests.basic_env import TestDvcGit +from tests.basic_env import TestDir, TestDvcGit class TestGC(TestDvcGit): @@ -179,51 +177,32 @@ def test(self): self._check_cache(2) -def test_all_commits(git, dvc_repo): - def add_and_commit(): - stages = dvc_repo.add(str(testfile)) - dvc_repo.scm.add([s.relpath for s in stages]) - dvc_repo.scm.commit("message") +def test_all_commits(tmp_dir, scm, dvc): + tmp_dir.dvc_gen("testfile", "uncommitted") + tmp_dir.dvc_gen("testfile", "committed", commit="committed") + tmp_dir.dvc_gen("testfile", "modified", commit="modified") + tmp_dir.dvc_gen("testfile", "workspace") - cache_dir = os.path.join(dvc_repo.root_dir, ".dvc", "cache") - testfile = pathlib.Path("testfile") - - testfile.write_text("uncommitted") - dvc_repo.add(str(testfile)) - - testfile.write_text("committed") - add_and_commit() - - testfile.write_text("modified") - add_and_commit() - - testfile.write_text("workspace") - dvc_repo.add(str(testfile)) - - n = _count_files(cache_dir) - - dvc_repo.gc(all_commits=True) + n = _count_files(dvc.cache.local.cache_dir) + dvc.gc(all_commits=True) # Only one uncommitted file should go away - assert _count_files(cache_dir) == n - 1 + assert _count_files(dvc.cache.local.cache_dir) == n - 1 -def _count_files(path): - return sum(len(files) for _, _, files in os.walk(path)) - - -def test_gc_no_dir_cache(repo_dir, dvc_repo): - dvc_repo.add(repo_dir.FOO) - dvc_repo.add(repo_dir.BAR) - dir_stage, = dvc_repo.add(repo_dir.DATA_DIR) +def test_gc_no_dir_cache(tmp_dir, dvc, repo_template): + dvc.add(["foo", "bar"]) + dir_stage, = dvc.add("dir") os.unlink(dir_stage.outs[0].cache_path) with pytest.raises(CollectCacheError): - dvc_repo.gc() + dvc.gc() - assert _count_files(dvc_repo.cache.local.cache_dir) == 4 + assert _count_files(dvc.cache.local.cache_dir) == 4 + dvc.gc(force=True) + assert _count_files(dvc.cache.local.cache_dir) == 2 - dvc_repo.gc(force=True) - assert _count_files(dvc_repo.cache.local.cache_dir) == 2 +def _count_files(path): + return sum(len(files) for _, _, files in os.walk(path)) diff --git a/tests/func/test_metrics.py b/tests/func/test_metrics.py index dc1ef19ac4..c92ad9b81c 100644 --- a/tests/func/test_metrics.py +++ b/tests/func/test_metrics.py @@ -10,7 +10,6 @@ from dvc.main import main from dvc.repo import Repo as DvcRepo from dvc.repo.metrics.show import NO_METRICS_FILE_AT_REFERENCE_WARNING -from dvc.stage import Stage from dvc.utils import relpath from tests.basic_env import TestDvcGit @@ -791,80 +790,49 @@ def _do_show(self, file_name, xpath): self.assertSequenceEqual(ret[branch][file_name], branch) -class TestShouldDisplayMetricsEvenIfMetricIsMissing(object): - BRANCH_MISSING_METRIC = "missing_metric_branch" - METRIC_FILE = "metric" - METRIC_FILE_STAGE = METRIC_FILE + Stage.STAGE_FILE_SUFFIX +def test_display_missing_metrics(tmp_dir, scm, dvc, caplog): + scm.branch("branch") - def _write_metric(self): - with open(self.METRIC_FILE, "w+") as fd: - fd.write("0.5") - fd.flush() - - def _commit_metric(self, dvc, branch): - dvc.scm.add([self.METRIC_FILE_STAGE]) - dvc.scm.commit("{} commit".format(branch)) - - def setUp(self, dvc): - dvc.scm.branch(self.BRANCH_MISSING_METRIC) - - self._write_metric() - - ret = main(["run", "-m", self.METRIC_FILE]) - assert 0 == ret - - self._commit_metric(dvc, "master") - - def test(self, git, dvc_repo, caplog): - self.setUp(dvc_repo) - - dvc_repo.scm.checkout(self.BRANCH_MISSING_METRIC) - - self._write_metric() - ret = main(["run", "-M", self.METRIC_FILE]) - assert 0 == ret - - self._commit_metric(dvc_repo, self.BRANCH_MISSING_METRIC) - os.remove(self.METRIC_FILE) - - ret = main(["metrics", "show", "-a"]) - - assert ( - NO_METRICS_FILE_AT_REFERENCE_WARNING.format( - self.METRIC_FILE, self.BRANCH_MISSING_METRIC - ) - in caplog.text - ) - assert 0 == ret + # Create a metric in master + tmp_dir.gen("metric", "0.5") + assert 0 == main(["run", "-m", "metric"]) + tmp_dir.scm_add("metric.dvc", commit="master commit") + # Create a metric in branch + scm.checkout("branch") + tmp_dir.gen("metric", "0.5") + assert 0 == main(["run", "-M", "metric"]) + tmp_dir.scm_add("metric.dvc", commit="branch commit") -def test_show_xpath_should_override_stage_xpath(dvc_repo): - metric_file = "metric" - metric = {"m1": 0.1, "m2": 0.2} - with open(metric_file, "w") as fobj: - json.dump(metric, fobj) + os.remove("metric") + assert 0 == main(["metrics", "show", "-a"]) + assert ( + NO_METRICS_FILE_AT_REFERENCE_WARNING.format("metric", "branch") + in caplog.text + ) - dvc_repo.run(cmd="", overwrite=True, metrics=[metric_file]) - dvc_repo.metrics.modify(metric_file, typ="json", xpath="m2") - result = dvc_repo.metrics.show(xpath="m1") - assert result == {"": {metric_file: [0.1]}} +def test_show_xpath_should_override_stage_xpath(tmp_dir, dvc): + tmp_dir.gen("metric", json.dumps({"m1": 0.1, "m2": 0.2})) + dvc.run(cmd="", overwrite=True, metrics=["metric"]) + dvc.metrics.modify("metric", typ="json", xpath="m2") -def test_show_multiple_outputs(dvc_repo, caplog): - with open("1.json", "w") as fobj: - json.dump({"AUC": 1}, fobj) + assert dvc.metrics.show(xpath="m1") == {"": {"metric": [0.1]}} - with open("2.json", "w") as fobj: - json.dump({"AUC": 2}, fobj) - os.mkdir("metrics") - with open("metrics/3.json", "w") as fobj: - json.dump({"AUC": 3}, fobj) +def test_show_multiple_outputs(tmp_dir, dvc, caplog): + tmp_dir.gen( + { + "1.json": json.dumps({"AUC": 1}), + "2.json": json.dumps({"AUC": 2}), + "metrics/3.json": json.dumps({"AUC": 3}), + } + ) - dvc_repo.run(cmd="", overwrite=True, metrics=["1.json"]) - dvc_repo.run(cmd="", overwrite=True, metrics=["2.json"]) - dvc_repo.run(cmd="", overwrite=True, metrics=["metrics/3.json"]) + dvc.run(cmd="", overwrite=True, metrics=["1.json"]) + dvc.run(cmd="", overwrite=True, metrics=["2.json"]) + dvc.run(cmd="", overwrite=True, metrics=["metrics/3.json"]) with caplog.at_level(logging.INFO, logger="dvc"): assert 0 == main(["metrics", "show", "1.json", "2.json"])