From 8109cade7206e18ecf3c5071026ccd75ad950953 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sun, 15 Sep 2019 13:09:15 -0500 Subject: [PATCH 01/13] wip - pull is working --- dvc/data_cloud.py | 5 +++++ dvc/dependency/repo.py | 11 ++++++++--- dvc/output/base.py | 6 ++++-- dvc/repo/__init__.py | 5 +++++ dvc/repo/fetch.py | 10 +++++++--- 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index 3568ae4cbb..f952e4f74a 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -99,6 +99,11 @@ def pull(self, targets, jobs=None, remote=None, show_checksums=False): show_checksums (bool): show checksums instead of file names in information messages. """ + # XXX: returning earlier to prevent `get_remote` from + # raising a ConfigError + if not targets: + return 0 + return self.repo.cache.local.pull( targets, jobs=jobs, diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 30d49fdc5f..b56bc8008d 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -61,7 +61,7 @@ def save(self): def dumpd(self): return {self.PARAM_PATH: self.def_path, self.PARAM_REPO: self.def_repo} - def download(self, to, resume=False): + def fetch(self): with self._make_repo( cache_dir=self.repo.cache.local.cache_dir ) as repo: @@ -70,8 +70,13 @@ def download(self, to, resume=False): out = repo.find_out_by_relpath(self.def_path) with repo.state: repo.cloud.pull(out.get_used_cache()) - to.info = copy.copy(out.info) - to.checkout() + + return out + + def download(self, to): + out = self.fetch() + to.info = copy.copy(out.info) + to.checkout() def update(self): with self._make_repo(rev_lock=None) as repo: diff --git a/dvc/output/base.py b/dvc/output/base.py index 48fd5bebbd..44890739f6 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -393,8 +393,10 @@ def get_used_cache(self, **kwargs): include the `info` of its files. """ - if self.stage.is_repo_import: - return [] + # XXX: There's no test for this one, it might not be needed + # + # if self.stage.is_repo_import: + # return [] if not self.use_cache: return [] diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 16317a68fc..7fd5f21d51 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -214,6 +214,7 @@ def used_cache( cache["hdfs"] = [] cache["ssh"] = [] cache["azure"] = [] + cache["repo"] = [] for branch in self.brancher( all_branches=all_branches, all_tags=all_tags @@ -231,6 +232,10 @@ def used_cache( stages = self.stages() for stage in stages: + if stage.is_repo_import: + cache["repo"].append(stage) + continue + for out in stage.outs: scheme = out.path_info.scheme used_cache = out.get_used_cache( diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 3132e30d33..ec9894f3e6 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -22,7 +22,11 @@ def fetch( remote=remote, jobs=jobs, recursive=recursive, - )["local"] - return self.cloud.pull( - used, jobs, remote=remote, show_checksums=show_checksums ) + + for stage in used["repo"]: + stage.reproduce() + + return self.cloud.pull( + used["local"], jobs, remote=remote, show_checksums=show_checksums + ) + len(used["repo"]) From 81536b9cf81f616edc11328f383ad310777af14e Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 16 Sep 2019 12:23:58 -0500 Subject: [PATCH 02/13] use fetch instead of reproduce --- dvc/repo/__init__.py | 2 +- dvc/repo/fetch.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 7fd5f21d51..31eca793e0 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -233,7 +233,7 @@ def used_cache( for stage in stages: if stage.is_repo_import: - cache["repo"].append(stage) + cache["repo"] += stage.deps continue for out in stage.outs: diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index ec9894f3e6..cfaada2608 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -24,8 +24,8 @@ def fetch( recursive=recursive, ) - for stage in used["repo"]: - stage.reproduce() + for dep in used["repo"]: + dep.fetch() return self.cloud.pull( used["local"], jobs, remote=remote, show_checksums=show_checksums From a865cfc24344620997e4d848af03d9365c440a05 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 16 Sep 2019 18:47:44 -0500 Subject: [PATCH 03/13] test pull and fetch --- tests/func/test_import.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index b097f98450..9156fb62c0 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -3,6 +3,8 @@ from tests.utils import trees_equal +from dvc.stage import Stage + def test_import(repo_dir, git, dvc_repo, erepo): src = erepo.FOO @@ -39,3 +41,37 @@ def test_import_rev(repo_dir, git, dvc_repo, erepo): with open(dst, "r+") as fobj: assert fobj.read() == "branch" assert git.git.check_ignore(dst) + + +def test_fetching_imported_stage(dvc_repo, erepo): + src = erepo.FOO + dst = erepo.FOO + "_imported" + + dvc_repo.imp(erepo.root_dir, src, dst) + + dst_stage = Stage.load(dvc_repo, 'foo_imported.dvc') + dst_cache = dst_stage.outs[0].cache_path + + os.remove(dst_cache) + + dvc_repo.fetch(['foo_imported.dvc']) + + assert os.path.isfile(dst_cache) + + +def test_pulling_imported_stage(dvc_repo, erepo): + src = erepo.FOO + dst = erepo.FOO + "_imported" + + dvc_repo.imp(erepo.root_dir, src, dst) + + dst_stage = Stage.load(dvc_repo, 'foo_imported.dvc') + dst_cache = dst_stage.outs[0].cache_path + + os.remove(dst) + os.remove(dst_cache) + + dvc_repo.pull(['foo_imported.dvc']) + + assert os.path.isfile(dst) + assert os.path.isfile(dst_cache) From 6b524c9c4bca2afb08ab54e054f8db6cabae2528 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 16 Sep 2019 18:53:56 -0500 Subject: [PATCH 04/13] style changes --- tests/func/test_import.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 9156fb62c0..1e9093f943 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -49,12 +49,12 @@ def test_fetching_imported_stage(dvc_repo, erepo): dvc_repo.imp(erepo.root_dir, src, dst) - dst_stage = Stage.load(dvc_repo, 'foo_imported.dvc') + dst_stage = Stage.load(dvc_repo, "foo_imported.dvc") dst_cache = dst_stage.outs[0].cache_path os.remove(dst_cache) - dvc_repo.fetch(['foo_imported.dvc']) + dvc_repo.fetch(["foo_imported.dvc"]) assert os.path.isfile(dst_cache) @@ -65,13 +65,13 @@ def test_pulling_imported_stage(dvc_repo, erepo): dvc_repo.imp(erepo.root_dir, src, dst) - dst_stage = Stage.load(dvc_repo, 'foo_imported.dvc') + dst_stage = Stage.load(dvc_repo, "foo_imported.dvc") dst_cache = dst_stage.outs[0].cache_path os.remove(dst) os.remove(dst_cache) - dvc_repo.pull(['foo_imported.dvc']) + dvc_repo.pull(["foo_imported.dvc"]) assert os.path.isfile(dst) assert os.path.isfile(dst_cache) From 2b9422356be6b9ea35b865fee563b9197dd55dc5 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 18 Sep 2019 23:43:52 -0500 Subject: [PATCH 05/13] fetch: catch config error on fetch instead of get_remote --- dvc/data_cloud.py | 5 ----- dvc/repo/fetch.py | 14 +++++++++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index f952e4f74a..3568ae4cbb 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -99,11 +99,6 @@ def pull(self, targets, jobs=None, remote=None, show_checksums=False): show_checksums (bool): show checksums instead of file names in information messages. """ - # XXX: returning earlier to prevent `get_remote` from - # raising a ConfigError - if not targets: - return 0 - return self.repo.cache.local.pull( targets, jobs=jobs, diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index cfaada2608..745c6679ff 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -1,4 +1,5 @@ from __future__ import unicode_literals +from dvc.config import ConfigError def fetch( @@ -27,6 +28,13 @@ def fetch( for dep in used["repo"]: dep.fetch() - return self.cloud.pull( - used["local"], jobs, remote=remote, show_checksums=show_checksums - ) + len(used["repo"]) + try: + return self.cloud.pull( + used["local"], + jobs, + remote=remote, + show_checksums=show_checksums, + ) + len(used["repo"]) + except ConfigError: + if not used["repo"]: + raise From ee1cf0c16219a459322ad3e31ed09b42c8d567cf Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Thu, 19 Sep 2019 10:21:49 -0500 Subject: [PATCH 06/13] output: remove unused guard clause --- dvc/output/base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dvc/output/base.py b/dvc/output/base.py index 5df4a73ec3..455f8951d6 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -402,11 +402,6 @@ def get_used_cache(self, **kwargs): include the `info` of its files. """ - # XXX: There's no test for this one, it might not be needed - # - # if self.stage.is_repo_import: - # return [] - if not self.use_cache: return [] From 34ed75f254ff9ed51129594f8ea4c3fe59c77795 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Thu, 19 Sep 2019 20:03:54 -0500 Subject: [PATCH 07/13] fetch: define a more specific error instead of ConfigError --- dvc/config.py | 12 ++++++++++++ dvc/data_cloud.py | 11 +++-------- dvc/repo/fetch.py | 17 +++++++++++------ 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/dvc/config.py b/dvc/config.py index c7925aec21..1e1190a119 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -31,6 +31,18 @@ def __init__(self, msg, cause=None): ) +class NoRemoteRepositoryError(ConfigError): + def __init__(self, command, cause=None): + msg = ( + "No remote repository specified. Setup default repository with\n" + " dvc config core.remote \n" + "or use:\n" + " dvc {} -r \n".format(command) + ) + + super(NoRemoteRepositoryError, self).__init__(msg, cause=cause) + + def supported_cache_type(types): """Checks if link type config option has a valid value. diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index 3568ae4cbb..ad5f48dd54 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -4,7 +4,7 @@ import logging -from dvc.config import Config, ConfigError +from dvc.config import Config, NoRemoteRepositoryError from dvc.remote import Remote from dvc.remote.s3 import RemoteS3 from dvc.remote.gs import RemoteGS @@ -27,7 +27,7 @@ class DataCloud(object): we are working on. Raises: - config.ConfigError: thrown when config has invalid format. + config.NoRemoteRepositoryErro: thrown when config has invalid format. """ CLOUD_MAP = { @@ -60,12 +60,7 @@ def get_remote(self, remote=None, command=""): if remote: return self._init_remote(remote) - raise ConfigError( - "No remote repository specified. Setup default repository with\n" - " dvc config core.remote \n" - "or use:\n" - " dvc {} -r \n".format(command) - ) + raise NoRemoteRepositoryError(command) def _init_remote(self, remote): return Remote(self.repo, name=remote) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 745c6679ff..70a76098df 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -1,5 +1,5 @@ from __future__ import unicode_literals -from dvc.config import ConfigError +from dvc.config import NoRemoteRepositoryError def fetch( @@ -25,16 +25,21 @@ def fetch( recursive=recursive, ) - for dep in used["repo"]: - dep.fetch() + downloaded_files = 0 try: - return self.cloud.pull( + downloaded_files += self.cloud.pull( used["local"], jobs, remote=remote, show_checksums=show_checksums, - ) + len(used["repo"]) - except ConfigError: + ) + except NoRemoteRepositoryError: if not used["repo"]: raise + + for dep in used["repo"]: + dep.fetch() + downloaded_files += 1 + + return downloaded_files From 3de1b2bf547af11162a2aa5d0f46e50e67b00430 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 20 Sep 2019 19:21:44 -0500 Subject: [PATCH 08/13] fetch: display number of errors correctly --- dvc/config.py | 6 +++--- dvc/data_cloud.py | 6 +++--- dvc/exceptions.py | 18 ++++++++++++++++++ dvc/remote/local/__init__.py | 9 ++++----- dvc/repo/fetch.py | 36 +++++++++++++++++++++++++++++------- tests/func/test_import.py | 19 ++----------------- 6 files changed, 59 insertions(+), 35 deletions(-) diff --git a/dvc/config.py b/dvc/config.py index 1e1190a119..d89b2fc6f0 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -31,16 +31,16 @@ def __init__(self, msg, cause=None): ) -class NoRemoteRepositoryError(ConfigError): +class NoRemoteError(ConfigError): def __init__(self, command, cause=None): msg = ( - "No remote repository specified. Setup default repository with\n" + "no remote specified. Setup default remote with\n" " dvc config core.remote \n" "or use:\n" " dvc {} -r \n".format(command) ) - super(NoRemoteRepositoryError, self).__init__(msg, cause=cause) + super(NoRemoteError, self).__init__(msg, cause=cause) def supported_cache_type(types): diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index ad5f48dd54..841c8ffa80 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -4,7 +4,7 @@ import logging -from dvc.config import Config, NoRemoteRepositoryError +from dvc.config import Config, NoRemoteError from dvc.remote import Remote from dvc.remote.s3 import RemoteS3 from dvc.remote.gs import RemoteGS @@ -27,7 +27,7 @@ class DataCloud(object): we are working on. Raises: - config.NoRemoteRepositoryErro: thrown when config has invalid format. + config.ConfigError: thrown when config has invalid format. """ CLOUD_MAP = { @@ -60,7 +60,7 @@ def get_remote(self, remote=None, command=""): if remote: return self._init_remote(remote) - raise NoRemoteRepositoryError(command) + raise NoRemoteError(command) def _init_remote(self, remote): return Remote(self.repo, name=remote) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index f7b6739a33..5697720ff2 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -303,3 +303,21 @@ def __init__(self, hook_name): "https://man.dvc.org/install " "for more info.".format(hook_name) ) + + +class FailedDownloadError(DvcException): + def __init__(self, amount): + self.amount = amount + + super(FailedDownloadError, self).__init__( + "{amount} files failed to download".format(amount=amount) + ) + + +class FailedUploadError(DvcException): + def __init__(self, amount): + self.amount = amount + + super(FailedUploadError, self).__init__( + "{amount} files failed to upload".format(amount=amount) + ) diff --git a/dvc/remote/local/__init__.py b/dvc/remote/local/__init__.py index 1c1256efd7..debdee60bd 100644 --- a/dvc/remote/local/__init__.py +++ b/dvc/remote/local/__init__.py @@ -30,7 +30,7 @@ makedirs, ) from dvc.config import Config -from dvc.exceptions import DvcException +from dvc.exceptions import DvcException, FailedDownloadError, FailedUploadError from dvc.progress import Tqdm, TqdmThreadPoolExecutor from dvc.path_info import PathInfo @@ -388,10 +388,9 @@ def _process( fails = sum(map(func, *plans)) if fails: - msg = "{} files failed to {}" - raise DvcException( - msg.format(fails, "download" if download else "upload") - ) + if download: + raise FailedDownloadError(fails) + raise FailedUploadError(fails) return len(plans[0]) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 70a76098df..806b9be9fb 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals -from dvc.config import NoRemoteRepositoryError +from dvc.config import NoRemoteError +from dvc.exceptions import FailedDownloadError def fetch( @@ -13,6 +14,18 @@ def fetch( all_tags=False, recursive=False, ): + """Download data items from a cloud and imported repositories + + Returns: + int: number of succesfully downloaded files + + Raises: + FailedDownloadError: thrown when there are failed downloads, either + during `cloud.pull` or trying to fetch imported files + + config.NoRemoteError: thrown when downloading only local files and no + remote is configured + """ with self.state: used = self.used_cache( targets, @@ -25,21 +38,30 @@ def fetch( recursive=recursive, ) - downloaded_files = 0 + downloaded = 0 + failed = 0 try: - downloaded_files += self.cloud.pull( + downloaded += self.cloud.pull( used["local"], jobs, remote=remote, show_checksums=show_checksums, ) - except NoRemoteRepositoryError: + except NoRemoteError as exc: if not used["repo"]: raise + except FailedDownloadError as exc: + failed += exc.amount for dep in used["repo"]: - dep.fetch() - downloaded_files += 1 + try: + out = dep.fetch() + downloaded += out.get_files_number() + except FailedDownloadError as exc: + failed += exc.amount + + if failed: + raise FailedDownloadError(failed) - return downloaded_files + return downloaded diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 1e9093f943..3e0b234532 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -1,5 +1,6 @@ import os import filecmp +import shutil from tests.utils import trees_equal @@ -43,23 +44,7 @@ def test_import_rev(repo_dir, git, dvc_repo, erepo): assert git.git.check_ignore(dst) -def test_fetching_imported_stage(dvc_repo, erepo): - src = erepo.FOO - dst = erepo.FOO + "_imported" - - dvc_repo.imp(erepo.root_dir, src, dst) - - dst_stage = Stage.load(dvc_repo, "foo_imported.dvc") - dst_cache = dst_stage.outs[0].cache_path - - os.remove(dst_cache) - - dvc_repo.fetch(["foo_imported.dvc"]) - - assert os.path.isfile(dst_cache) - - -def test_pulling_imported_stage(dvc_repo, erepo): +def test_pull_imported_stage(dvc_repo, erepo): src = erepo.FOO dst = erepo.FOO + "_imported" From 76b643de6aeef8894843863571457af41f97b813 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 20 Sep 2019 23:50:41 -0500 Subject: [PATCH 09/13] exceptions: remove "Failed" prefix to download/upload --- dvc/exceptions.py | 8 ++++---- dvc/remote/local/__init__.py | 6 +++--- dvc/repo/fetch.py | 13 +++++++------ tests/func/test_import.py | 1 - 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 5697720ff2..eb6697068f 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -305,19 +305,19 @@ def __init__(self, hook_name): ) -class FailedDownloadError(DvcException): +class DownloadError(DvcException): def __init__(self, amount): self.amount = amount - super(FailedDownloadError, self).__init__( + super(DownloadError, self).__init__( "{amount} files failed to download".format(amount=amount) ) -class FailedUploadError(DvcException): +class UploadError(DvcException): def __init__(self, amount): self.amount = amount - super(FailedUploadError, self).__init__( + super(UploadError, self).__init__( "{amount} files failed to upload".format(amount=amount) ) diff --git a/dvc/remote/local/__init__.py b/dvc/remote/local/__init__.py index debdee60bd..7592ecb274 100644 --- a/dvc/remote/local/__init__.py +++ b/dvc/remote/local/__init__.py @@ -30,7 +30,7 @@ makedirs, ) from dvc.config import Config -from dvc.exceptions import DvcException, FailedDownloadError, FailedUploadError +from dvc.exceptions import DvcException, DownloadError, UploadError from dvc.progress import Tqdm, TqdmThreadPoolExecutor from dvc.path_info import PathInfo @@ -389,8 +389,8 @@ def _process( if fails: if download: - raise FailedDownloadError(fails) - raise FailedUploadError(fails) + raise DownloadError(fails) + raise UploadError(fails) return len(plans[0]) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 806b9be9fb..deb1d53ea3 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals from dvc.config import NoRemoteError -from dvc.exceptions import FailedDownloadError +from dvc.exceptions import DownloadError def fetch( @@ -20,7 +20,7 @@ def fetch( int: number of succesfully downloaded files Raises: - FailedDownloadError: thrown when there are failed downloads, either + DownloadError: thrown when there are failed downloads, either during `cloud.pull` or trying to fetch imported files config.NoRemoteError: thrown when downloading only local files and no @@ -48,20 +48,21 @@ def fetch( remote=remote, show_checksums=show_checksums, ) - except NoRemoteError as exc: + except NoRemoteError: if not used["repo"]: raise - except FailedDownloadError as exc: + + except DownloadError as exc: failed += exc.amount for dep in used["repo"]: try: out = dep.fetch() downloaded += out.get_files_number() - except FailedDownloadError as exc: + except DownloadError as exc: failed += exc.amount if failed: - raise FailedDownloadError(failed) + raise DownloadError(failed) return downloaded diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 3e0b234532..f5b8b5bbc0 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -1,6 +1,5 @@ import os import filecmp -import shutil from tests.utils import trees_equal From f099981df3dd98d8d89369214fd461a1984d10a5 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 21 Sep 2019 13:04:04 -0500 Subject: [PATCH 10/13] fetch: catch other exceptions that might halt the download --- dvc/repo/fetch.py | 12 ++++++++---- tests/func/test_import.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index deb1d53ea3..2b692336e4 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from dvc.config import NoRemoteError -from dvc.exceptions import DownloadError +from dvc.exceptions import DownloadError, OutputNotFoundError +from dvc.scm.base import CloneError def fetch( @@ -49,7 +50,7 @@ def fetch( show_checksums=show_checksums, ) except NoRemoteError: - if not used["repo"]: + if not used["repo"] and used["local"]: raise except DownloadError as exc: @@ -59,8 +60,11 @@ def fetch( try: out = dep.fetch() downloaded += out.get_files_number() - except DownloadError as exc: - failed += exc.amount + except (DownloadError, CloneError, OutputNotFoundError) as exc: + if hasattr(exc, "amount"): + failed += exc.amount + else: + failed += 1 if failed: raise DownloadError(failed) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index f5b8b5bbc0..b5b99d596a 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -1,9 +1,12 @@ import os import filecmp +import shutil +import pytest from tests.utils import trees_equal from dvc.stage import Stage +from dvc.exceptions import DownloadError def test_import(repo_dir, git, dvc_repo, erepo): @@ -59,3 +62,20 @@ def test_pull_imported_stage(dvc_repo, erepo): assert os.path.isfile(dst) assert os.path.isfile(dst_cache) + + +def test_download_error_pulling_imported_stage(dvc_repo, erepo): + src = erepo.FOO + dst = erepo.FOO + "_imported" + + dvc_repo.imp(erepo.root_dir, src, dst) + + dst_stage = Stage.load(dvc_repo, "foo_imported.dvc") + dst_cache = dst_stage.outs[0].cache_path + + shutil.rmtree(erepo.root_dir) + os.remove(dst) + os.remove(dst_cache) + + with pytest.raises(DownloadError): + dvc_repo.pull(["foo_imported.dvc"]) From 7ef764347c4cf253ee35ad35ab028fca8c614462 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 21 Sep 2019 14:05:23 -0500 Subject: [PATCH 11/13] fetch: log catched exceptions --- dvc/repo/fetch.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 2b692336e4..c26bce90d4 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -1,9 +1,15 @@ from __future__ import unicode_literals + +import logging + from dvc.config import NoRemoteError from dvc.exceptions import DownloadError, OutputNotFoundError from dvc.scm.base import CloneError +logger = logging.getLogger(__name__) + + def fetch( self, targets=None, @@ -60,11 +66,11 @@ def fetch( try: out = dep.fetch() downloaded += out.get_files_number() - except (DownloadError, CloneError, OutputNotFoundError) as exc: - if hasattr(exc, "amount"): - failed += exc.amount - else: - failed += 1 + except DownloadError as exc: + failed += exc.amount + except (CloneError, OutputNotFoundError) as exc: + failed += 1 + logger.exception(str(exc)) if failed: raise DownloadError(failed) From a8caeef6597609c70a296ff8927cacf09a6eabd1 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 21 Sep 2019 16:06:18 -0500 Subject: [PATCH 12/13] fetch: reformat exception Co-Authored-By: Ruslan Kuprieiev --- dvc/repo/fetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index c26bce90d4..f4098cb972 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -70,7 +70,7 @@ def fetch( failed += exc.amount except (CloneError, OutputNotFoundError) as exc: failed += 1 - logger.exception(str(exc)) + logger.exception("failed to fetch data for '{}'".format(dep.stage.outs[0])) if failed: raise DownloadError(failed) From ac70ff8c50deea7f247ab37bb9f341e59dd64647 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 21 Sep 2019 16:43:15 -0500 Subject: [PATCH 13/13] fix formatting --- dvc/repo/fetch.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index f4098cb972..ee58c15423 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -68,9 +68,11 @@ def fetch( downloaded += out.get_files_number() except DownloadError as exc: failed += exc.amount - except (CloneError, OutputNotFoundError) as exc: + except (CloneError, OutputNotFoundError): failed += 1 - logger.exception("failed to fetch data for '{}'".format(dep.stage.outs[0])) + logger.exception( + "failed to fetch data for '{}'".format(dep.stage.outs[0]) + ) if failed: raise DownloadError(failed)