From 1e3abe2f383b93a6c219f225c7fd1a684665b123 Mon Sep 17 00:00:00 2001 From: Sujith H Date: Sat, 4 Jan 2020 18:25:58 +0530 Subject: [PATCH] dvc: update gc to remove unpacked dir In local remote its found that gc does not remove the unpacked dir. This change set helps to remove it. Signed-off-by: Sujith H --- dvc/remote/base.py | 5 +++++ dvc/remote/local.py | 4 ++++ tests/func/test_gc.py | 16 ++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 32e14069e8..1c316416b2 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -697,6 +697,8 @@ def gc(self, named_cache): if checksum in used: continue path_info = self.checksum_to_path_info(checksum) + if self.is_dir_checksum(checksum): + self._remove_unpacked_dir(checksum) self.remove(path_info) removed = True return removed @@ -1009,3 +1011,6 @@ def _changed_unpacked_dir(self, checksum): def _update_unpacked_dir(self, checksum): pass + + def _remove_unpacked_dir(self, checksum): + pass diff --git a/dvc/remote/local.py b/dvc/remote/local.py index f7a1ff4c80..1117ca9939 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -464,6 +464,10 @@ def _get_unpacked_dir_path_info(self, checksum): info = self.checksum_to_path_info(checksum) return info.with_name(info.name + self.UNPACKED_DIR_SUFFIX) + def _remove_unpacked_dir(self, checksum): + path_info = self._get_unpacked_dir_path_info(checksum) + self.remove(path_info) + def _path_info_changed(self, path_info): if self.exists(path_info) and self.state.get(path_info): return False diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index 386f168cfb..688f113968 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -7,6 +7,7 @@ from dvc.exceptions import CollectCacheError from dvc.main import main from dvc.repo import Repo as DvcRepo +from dvc.remote.local import RemoteLOCAL from tests.basic_env import TestDir, TestDvcGit @@ -204,3 +205,18 @@ def test_gc_no_dir_cache(tmp_dir, dvc, repo_template): def _count_files(path): return sum(len(files) for _, _, files in os.walk(path)) + + +def test_gc_no_unpacked_dir(tmp_dir, dvc, repo_template): + dir_stages = dvc.add("dir") + dvc.status() + + os.remove("dir.dvc") + unpackeddir = ( + dir_stages[0].outs[0].cache_path + RemoteLOCAL.UNPACKED_DIR_SUFFIX + ) + + assert os.path.exists(unpackeddir) + + dvc.gc(force=True) + assert not os.path.exists(unpackeddir)