From b46e2fb2857a04fd8d1072a370936096e1805dd7 Mon Sep 17 00:00:00 2001 From: Sujith H Date: Sat, 4 Jan 2020 18:25:58 +0530 Subject: [PATCH 1/2] dvc: update gc to remove unpacked dir In local remote its found that gc does not remove the unpacked dir. This change set helps to remove it. Signed-off-by: Sujith H --- dvc/remote/base.py | 5 +++++ dvc/remote/local.py | 4 ++++ tests/func/test_gc.py | 13 +++++++++++++ 3 files changed, 22 insertions(+) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 32e14069e8..1c316416b2 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -697,6 +697,8 @@ def gc(self, named_cache): if checksum in used: continue path_info = self.checksum_to_path_info(checksum) + if self.is_dir_checksum(checksum): + self._remove_unpacked_dir(checksum) self.remove(path_info) removed = True return removed @@ -1009,3 +1011,6 @@ def _changed_unpacked_dir(self, checksum): def _update_unpacked_dir(self, checksum): pass + + def _remove_unpacked_dir(self, checksum): + pass diff --git a/dvc/remote/local.py b/dvc/remote/local.py index f7a1ff4c80..1117ca9939 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -464,6 +464,10 @@ def _get_unpacked_dir_path_info(self, checksum): info = self.checksum_to_path_info(checksum) return info.with_name(info.name + self.UNPACKED_DIR_SUFFIX) + def _remove_unpacked_dir(self, checksum): + path_info = self._get_unpacked_dir_path_info(checksum) + self.remove(path_info) + def _path_info_changed(self, path_info): if self.exists(path_info) and self.state.get(path_info): return False diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index 386f168cfb..e35cf3ce01 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -7,6 +7,7 @@ from dvc.exceptions import CollectCacheError from dvc.main import main from dvc.repo import Repo as DvcRepo +from dvc.remote.local import RemoteLOCAL from tests.basic_env import TestDir, TestDvcGit @@ -204,3 +205,15 @@ def test_gc_no_dir_cache(tmp_dir, dvc, repo_template): def _count_files(path): return sum(len(files) for _, _, files in os.walk(path)) + +def test_gc_no_unpacked_dir(tmp_dir, dvc, repo_template): + dir_stages = dvc.add("dir") + dvc.status() + + os.remove("dir.dvc") + unpackeddir = dir_stages[0].outs[0].cache_path + RemoteLOCAL.UNPACKED_DIR_SUFFIX + + assert os.path.exists(unpackeddir) + + dvc.gc(force=True) + assert not os.path.exists(unpackeddir) \ No newline at end of file From 9a0bcdbd63ac0393019797c55e1ddd170cfbed23 Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Mon, 6 Jan 2020 16:39:06 +0000 Subject: [PATCH 2/2] Restyled by black --- tests/func/test_gc.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index e35cf3ce01..688f113968 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -206,14 +206,17 @@ def test_gc_no_dir_cache(tmp_dir, dvc, repo_template): def _count_files(path): return sum(len(files) for _, _, files in os.walk(path)) + def test_gc_no_unpacked_dir(tmp_dir, dvc, repo_template): dir_stages = dvc.add("dir") dvc.status() os.remove("dir.dvc") - unpackeddir = dir_stages[0].outs[0].cache_path + RemoteLOCAL.UNPACKED_DIR_SUFFIX + unpackeddir = ( + dir_stages[0].outs[0].cache_path + RemoteLOCAL.UNPACKED_DIR_SUFFIX + ) assert os.path.exists(unpackeddir) dvc.gc(force=True) - assert not os.path.exists(unpackeddir) \ No newline at end of file + assert not os.path.exists(unpackeddir)