From e505c38f203e7de972b52272b49d3317f4090ecb Mon Sep 17 00:00:00 2001 From: pared Date: Sat, 6 Apr 2019 13:58:11 +0200 Subject: [PATCH 1/3] cache: make reflink, copy default links --- dvc/remote/local.py | 6 ++++-- tests/test_cache.py | 5 +++++ tests/test_checkout.py | 21 ++++++++++++++------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 1123363657..04c043d3a6 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +from copy import copy + from dvc.utils.compat import str, makedirs import os @@ -49,7 +51,7 @@ class RemoteLOCAL(RemoteBase): PARAM_RELPATH = "relpath" MD5_DIR_SUFFIX = ".dir" - CACHE_TYPES = ["reflink", "hardlink", "symlink", "copy"] + DEFAULT_CACHE_TYPES = ["reflink", "copy"] CACHE_TYPE_MAP = { "copy": shutil.copyfile, "symlink": System.symlink, @@ -74,7 +76,7 @@ def __init__(self, repo, config): types = [t.strip() for t in types.split(",")] self.cache_types = types else: - self.cache_types = self.CACHE_TYPES + self.cache_types = copy(self.DEFAULT_CACHE_TYPES) if self.cache_dir is not None and not os.path.exists(self.cache_dir): os.mkdir(self.cache_dir) diff --git a/tests/test_cache.py b/tests/test_cache.py index 9fbba5fcdd..9e0bd1dadb 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -168,3 +168,8 @@ def test_relative_path(self): self.assertEqual(len(subdirs), 1) files = os.listdir(os.path.join(tmpdir, subdirs[0])) self.assertEqual(len(files), 1) + + +class TestShouldCacheBeReflinkOrCopyByDefault(TestDvc): + def test(self): + self.assertEqual(self.dvc.cache.local.cache_types, ["reflink", "copy"]) diff --git a/tests/test_checkout.py b/tests/test_checkout.py index f987f13760..6abe2de982 100644 --- a/tests/test_checkout.py +++ b/tests/test_checkout.py @@ -12,7 +12,7 @@ from dvc import progress from dvc.repo import Repo as DvcRepo from dvc.system import System -from dvc.utils import walk_files +from dvc.utils import walk_files, load_stage_file from tests.basic_env import TestDvc from tests.test_repro import TestRepro from dvc.stage import Stage, StageFileBadNameError, StageFileDoesNotExistError @@ -134,7 +134,9 @@ def outs_info(self, stage): FileInfo = collections.namedtuple("FileInfo", "path inode") paths = [ - path for output in stage.outs for path in walk_files(output.path) + path + for output in stage["outs"] + for path in walk_files(output["path"]) ] return [ @@ -210,14 +212,19 @@ def test(self): ret = main(["config", "cache.type", "copy"]) self.assertEqual(ret, 0) - stages = self.dvc.add(self.DATA_DIR) - self.assertEqual(len(stages), 1) - stage = stages[0] + ret = main(["add", self.DATA_DIR]) + self.assertEqual(0, ret) + + stage_path = self.DATA_DIR + Stage.STAGE_FILE_SUFFIX + stage = load_stage_file(stage_path) staged_files = self.outs_info(stage) - os.remove(staged_files[0].path) + # move instead of remove, to lock inode assigned to stage_files[0].path + # if we were to use remove, we might end up with same inode assigned to + # newly checked out file + shutil.move(staged_files[0].path, "random_name") - ret = main(["checkout", "--force", stage.relpath]) + ret = main(["checkout", "--force", stage_path]) self.assertEqual(ret, 0) checkedout_files = self.outs_info(stage) From 904947541274fcedd20cb625318643735b4c0c49 Mon Sep 17 00:00:00 2001 From: pared Date: Fri, 12 Apr 2019 22:10:18 +0200 Subject: [PATCH 2/3] test_repro: make cache use hardlink --- tests/test_repro.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_repro.py b/tests/test_repro.py index 0cb82aa528..3ae890cf50 100644 --- a/tests/test_repro.py +++ b/tests/test_repro.py @@ -909,6 +909,9 @@ def test(self, mock_prompt): ret = main(["remote", "modify", remote_name, "type", "hardlink"]) self.assertEqual(ret, 0) + ret = main(["config", "cache.type", "hardlink"]) + self.assertEqual(0, ret) + self.dvc = DvcRepo(".") foo_key = remote_key + self.sep + self.FOO From 074b6eaa9eb66c86f36d111e9270b8c91fa4113a Mon Sep 17 00:00:00 2001 From: pared Date: Mon, 15 Apr 2019 12:55:50 +0200 Subject: [PATCH 3/3] test: move corrupted hardlink cache test test_repro -> test_run --- tests/test_repro.py | 29 ----------------------------- tests/test_run.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/tests/test_repro.py b/tests/test_repro.py index 3ae890cf50..ed2e79648f 100644 --- a/tests/test_repro.py +++ b/tests/test_repro.py @@ -854,30 +854,6 @@ def check_already_cached(self, stage): mock_download.assert_not_called() mock_checkout.assert_called_once() - def corrupted_cache(self): - os.unlink("bar.dvc") - - stage = self.dvc.run( - deps=[self.FOO], outs=[self.BAR], cmd="echo bar > bar" - ) - - with open(self.BAR, "w") as fd: - fd.write("corrupting the cache") - - patch_checkout = patch.object( - stage.outs[0], "checkout", wraps=stage.outs[0].checkout - ) - - patch_run = patch.object(stage, "_run", wraps=stage._run) - - with self.dvc.state: - with patch_checkout as mock_checkout: - with patch_run as mock_run: - stage.run() - - mock_run.assert_called_once() - mock_checkout.assert_not_called() - @patch("dvc.prompt.confirm", return_value=True) def test(self, mock_prompt): if not self.should_test(): @@ -909,9 +885,6 @@ def test(self, mock_prompt): ret = main(["remote", "modify", remote_name, "type", "hardlink"]) self.assertEqual(ret, 0) - ret = main(["config", "cache.type", "hardlink"]) - self.assertEqual(0, ret) - self.dvc = DvcRepo(".") foo_key = remote_key + self.sep + self.FOO @@ -980,8 +953,6 @@ def test(self, mock_prompt): self.dvc.checkout(cmd_stage.path, force=True) self.assertEqual(self.dvc.status(cmd_stage.path), {}) - self.corrupted_cache() - class TestReproExternalS3(TestReproExternalBase): def should_test(self): diff --git a/tests/test_run.py b/tests/test_run.py index 26a194541f..c771fa463a 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -9,6 +9,7 @@ from dvc.main import main from dvc.output import OutputBase +from dvc.repo import Repo as DvcRepo from dvc.utils import file_md5, load_stage_file from dvc.system import System from dvc.stage import Stage, StagePathNotFoundError, StagePathNotDirectoryError @@ -920,3 +921,33 @@ def test(self): @property def _outs_command(self): return "--outs-persist" + + +class TestShouldNotCheckoutUponCorruptedLocalHardlinkCache(TestDvc): + def setUp(self): + super( + TestShouldNotCheckoutUponCorruptedLocalHardlinkCache, self + ).setUp() + ret = main(["config", "cache.type", "hardlink"]) + self.assertEqual(ret, 0) + self.dvc = DvcRepo(".") + + def test(self): + cmd = "cp {} {}".format(self.FOO, self.BAR) + stage = self.dvc.run(deps=[self.FOO], outs=[self.BAR], cmd=cmd) + + with open(self.BAR, "w") as fd: + fd.write("corrupting the output cache") + + patch_checkout = mock.patch.object( + stage.outs[0], "checkout", wraps=stage.outs[0].checkout + ) + patch_run = mock.patch.object(stage, "_run", wraps=stage._run) + + with self.dvc.state: + with patch_checkout as mock_checkout: + with patch_run as mock_run: + stage.run() + + mock_run.assert_called_once() + mock_checkout.assert_not_called()