diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 0563f28e0b..2cb3dad0c2 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -1,11 +1,16 @@ import os +import logging +import colorama from dvc.repo.scm_context import scm_context from dvc.stage import Stage -from dvc.utils import walk_files +from dvc.utils import walk_files, LARGE_DIR_SIZE from dvc.exceptions import RecursiveAddingWhileUsingFilename +logger = logging.getLogger(__name__) + + @scm_context def add(repo, target, recursive=False, no_commit=False, fname=None): if recursive and fname: @@ -13,6 +18,19 @@ def add(repo, target, recursive=False, no_commit=False, fname=None): targets = _find_all_targets(repo, target, recursive) + if os.path.isdir(target) and len(targets) > LARGE_DIR_SIZE: + logger.warning( + "You are adding a large directory '{target}' recursively," + " consider tracking it as a whole instead.\n" + "{purple}HINT:{nc} Remove the generated stage files and then" + " run {cyan}dvc add {target}{nc}".format( + purple=colorama.Fore.MAGENTA, + cyan=colorama.Fore.CYAN, + nc=colorama.Style.RESET_ALL, + target=target, + ) + ) + stages = _create_stages(repo, targets, fname, no_commit) repo.check_dag(repo.stages() + stages) diff --git a/dvc/utils/compat.py b/dvc/utils/compat.py index 088c43a1f1..e0ee72078f 100644 --- a/dvc/utils/compat.py +++ b/dvc/utils/compat.py @@ -108,6 +108,7 @@ def _makedirs(name, mode=0o777, exist_ok=False): input = raw_input # noqa: F821 cast_bytes_py2 = cast_bytes makedirs = _makedirs + range = xrange # noqa: F821 import StringIO import io @@ -148,3 +149,4 @@ def __exit__(self, *args): input = input # noqa: F821 open = open # noqa: F821 cast_bytes_py2 = no_code + range = range # noqa: F821 diff --git a/tests/func/test_add.py b/tests/func/test_add.py index 4e7e66ad5e..fb54d7ef7d 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -7,13 +7,16 @@ import shutil import filecmp import posixpath +import logging +import colorama from dvc.system import System from mock import patch from dvc.main import main -from dvc.utils import file_md5 +from dvc.utils import file_md5, LARGE_DIR_SIZE from dvc.utils.stage import load_stage_file +from dvc.utils.compat import range from dvc.stage import Stage from dvc.exceptions import DvcException, RecursiveAddingWhileUsingFilename from dvc.output.base import OutputAlreadyTrackedError @@ -87,6 +90,30 @@ def test(self): ret = main(["add", "--recursive", self.DATA_DIR]) self.assertEqual(ret, 0) + def test_warn_about_large_directories(self): + warning = ( + "You are adding a large directory 'large-dir' recursively," + " consider tracking it as a whole instead.\n" + "{purple}HINT:{nc} Remove the generated stage files and then" + " run {cyan}dvc add large-dir{nc}".format( + purple=colorama.Fore.MAGENTA, + cyan=colorama.Fore.CYAN, + nc=colorama.Style.RESET_ALL, + ) + ) + + os.mkdir("large-dir") + + # Create a lot of files + for iteration in range(LARGE_DIR_SIZE + 1): + path = os.path.join("large-dir", str(iteration)) + with open(path, "w") as fobj: + fobj.write(path) + + with self._caplog.at_level(logging.WARNING, logger="dvc"): + assert main(["add", "--recursive", "large-dir"]) == 0 + assert warning in self._caplog.text + class TestAddDirectoryWithForwardSlash(TestDvc): def test(self):