diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index f0ff9df065..df35e45235 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -1,13 +1,18 @@ from __future__ import unicode_literals import copy +import os from contextlib import contextmanager +from dvc.utils.compat import FileNotFoundError from funcy import merge from .local import DependencyLOCAL from dvc.external_repo import external_repo from dvc.utils.compat import str +from dvc.exceptions import OutputNotFoundError +from dvc.exceptions import PathMissingError +from dvc.utils.fs import fs_copy class DependencyREPO(DependencyLOCAL): @@ -72,10 +77,42 @@ def fetch(self): return out + @staticmethod + def _is_git_file(repo, path): + if not os.path.isabs(path): + try: + output = repo.find_out_by_relpath(path) + if not output.use_cache: + return True + except OutputNotFoundError: + return True + return False + + def _copy_if_git_file(self, to_path): + src_path = self.def_path + with self._make_repo( + cache_dir=self.repo.cache.local.cache_dir + ) as repo: + if not self._is_git_file(repo, src_path): + return False + + src_full_path = os.path.join(repo.root_dir, src_path) + dst_full_path = os.path.abspath(to_path) + fs_copy(src_full_path, dst_full_path) + return True + def download(self, to): - out = self.fetch() - to.info = copy.copy(out.info) - to.checkout() + try: + if self._copy_if_git_file(to.fspath): + return + + out = self.fetch() + to.info = copy.copy(out.info) + to.checkout() + except (FileNotFoundError): + raise PathMissingError( + self.def_path, self.def_repo[self.PARAM_URL] + ) def update(self): with self._make_repo(rev_lock=None) as repo: diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 1beb8d6e1c..7151e2e2fa 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -344,3 +344,12 @@ def __init__(self, path, external_repo_path, external_repo_url): class HTTPError(DvcException): def __init__(self, code, reason): super(HTTPError, self).__init__("'{} {}'".format(code, reason)) + + +class PathMissingError(DvcException): + def __init__(self, path, repo): + msg = ( + "The path '{}' does not exist in the target repository '{}'" + " neighther as an output nor a git-handled file." + ) + super(PathMissingError, self).__init__(msg.format(path, repo)) diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 60c4c2e4ac..3c7e1e68b5 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -1,6 +1,5 @@ import logging import os -import shutil from dvc.utils.compat import FileNotFoundError import shortuuid @@ -10,12 +9,14 @@ NotDvcRepoError, OutputNotFoundError, UrlNotDvcRepoError, + PathMissingError, ) from dvc.external_repo import external_repo from dvc.path_info import PathInfo from dvc.stage import Stage from dvc.utils import resolve_output from dvc.utils.fs import remove +from dvc.utils.fs import fs_copy logger = logging.getLogger(__name__) @@ -28,15 +29,6 @@ def __init__(self): ) -class PathMissingError(DvcException): - def __init__(self, path, repo): - msg = ( - "The path '{}' does not exist in the target repository '{}'" - " neighther as an output nor a git-handled file." - ) - super(PathMissingError, self).__init__(msg.format(path, repo)) - - @staticmethod def get(url, path, out=None, rev=None): out = resolve_output(path, out) @@ -76,7 +68,7 @@ def get(url, path, out=None, rev=None): if os.path.isabs(path): raise FileNotFoundError - _copy(os.path.join(repo.root_dir, path), out) + fs_copy(os.path.join(repo.root_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) @@ -94,10 +86,3 @@ def _get_cached(repo, output, out): # This might happen when pull haven't really pulled all the files if failed: raise FileNotFoundError - - -def _copy(src, dst): - if os.path.isdir(src): - shutil.copytree(src, dst) - else: - shutil.copy2(src, dst) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 1b51576164..26ee05506e 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -22,6 +22,13 @@ logger = logging.getLogger(__name__) +def fs_copy(src, dst): + if os.path.isdir(src): + shutil.copytree(src, dst) + else: + shutil.copy2(src, dst) + + def get_inode(path): inode = System.inode(path) logger.debug("Path {} inode {}".format(path, inode)) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 20e40fde77..9a54b726f1 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -8,10 +8,13 @@ from mock import patch from dvc.config import Config -from dvc.exceptions import DownloadError, NoOutputInExternalRepoError +from dvc.exceptions import DownloadError +from dvc.exceptions import PathMissingError +from dvc.exceptions import NoOutputInExternalRepoError from dvc.stage import Stage from dvc.system import System from dvc.utils import makedirs +from dvc.utils.compat import fspath from tests.utils import trees_equal @@ -27,6 +30,34 @@ def test_import(git, dvc_repo, erepo): assert git.git.check_ignore(dst) +def test_import_git_file(erepo_dir, tmp_dir, dvc, scm): + src = "some_file" + dst = "some_file_imported" + + erepo_dir.scm_gen({src: "hello"}, commit="add a regular file") + + tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) + + assert (tmp_dir / dst).is_file() + assert filecmp.cmp( + fspath(erepo_dir / src), fspath(tmp_dir / dst), shallow=False + ) + assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst)) + + +def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): + src = "some_directory" + dst = "some_directory_imported" + + erepo_dir.scm_gen({src: {"file.txt": "hello"}}, commit="add a dir") + + tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) + + assert (tmp_dir / dst).is_dir() + trees_equal(fspath(erepo_dir / src), fspath(tmp_dir / dst)) + assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst)) + + def test_import_dir(git, dvc_repo, erepo): src = erepo.DATA_DIR dst = erepo.DATA_DIR + "_imported" @@ -39,6 +70,28 @@ def test_import_dir(git, dvc_repo, erepo): assert git.git.check_ignore(dst) +def test_import_non_cached(erepo_dir, tmp_dir, dvc, scm): + src = "non_cached_output" + dst = src + "_imported" + + erepo_dir.dvc.run( + cmd="echo hello > {}".format(src), + outs_no_cache=[src], + cwd=fspath(erepo_dir), + ) + + erepo_dir.scm.add([fspath(erepo_dir / src)]) + erepo_dir.scm.commit("add a non-cached output") + + tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) + + assert (tmp_dir / dst).is_file() + assert filecmp.cmp( + fspath(erepo_dir / src), fspath(tmp_dir / dst), shallow=False + ) + assert tmp_dir.scm.repo.git.check_ignore(dst) + + def test_import_rev(git, dvc_repo, erepo): src = "version" dst = src @@ -156,6 +209,9 @@ def test_pull_non_workspace(git, dvc_repo, erepo): assert os.path.exists(stage.outs[0].cache_path) -def test_import_non_existing(dvc_repo, erepo): +def test_import_non_existing(erepo_dir, tmp_dir, dvc): + with pytest.raises(PathMissingError): + tmp_dir.dvc.imp(fspath(erepo_dir), "invalid_output") + # https://github.com/iterative/dvc/pull/2837#discussion_r352123053 with pytest.raises(NoOutputInExternalRepoError): - dvc_repo.imp(erepo.root_dir, "invalid_output") + tmp_dir.dvc.imp(fspath(erepo_dir), "/root/", "root")