From a9b9d52853409ae370347e78916cceb532771ed5 Mon Sep 17 00:00:00 2001 From: n3hrox Date: Sun, 27 Oct 2019 17:59:04 +0100 Subject: [PATCH 01/12] updater: detect conda --- dvc/updater.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/dvc/updater.py b/dvc/updater.py index 21251e4dab..b987d3edc6 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import subprocess import sys import os import time @@ -134,6 +135,7 @@ def _get_update_instructions(self): "2. Go to {blue}https://dvc.org{reset}\n" "3. Download and install new binary" ), + "conda": "Run {yellow}conda{reset} {update}update{reset} dvc", None: ( "Find the latest release at\n{blue}" "https://github.com/iterative/dvc/releases/latest" @@ -145,10 +147,20 @@ def _get_update_instructions(self): return instructions[package_manager] + def _get_dvc_path(self, system): + if system in ("linux", "darwin"): + output = subprocess.run(["which", "dvc"], capture_output=True) + else: + output = subprocess.run(["where", "dvc"], capture_output=True) + + return output.stdout.decode("utf-8").lower() + def _get_linux(self): import distro if not is_binary(): + if "conda" in self._get_dvc_path("linux"): + return "conda" return "pip" package_managers = { @@ -168,6 +180,8 @@ def _get_darwin(self): if __file__.startswith("/usr/local/Cellar"): return "formula" else: + if "conda" in self._get_dvc_path("darwin"): + return "conda" return "pip" # NOTE: both pkg and cask put dvc binary into /usr/local/bin, @@ -180,7 +194,13 @@ def _get_darwin(self): return None def _get_windows(self): - return None if is_binary() else "pip" + if is_binary(): + return None + + if "conda" in self._get_dvc_path("linux"): + return "conda" + + return "pip" def _get_package_manager(self): import platform From cf9037932d731cfe5c690a6e57e8ab762c6d424c Mon Sep 17 00:00:00 2001 From: n3hrox Date: Sun, 27 Oct 2019 18:03:33 +0100 Subject: [PATCH 02/12] change method to static --- dvc/updater.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dvc/updater.py b/dvc/updater.py index b987d3edc6..14c2f8f4ec 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -147,7 +147,8 @@ def _get_update_instructions(self): return instructions[package_manager] - def _get_dvc_path(self, system): + @staticmethod + def _get_dvc_path(system): if system in ("linux", "darwin"): output = subprocess.run(["which", "dvc"], capture_output=True) else: From 795db730feac1e06c12bbcd17548d04172afe449 Mon Sep 17 00:00:00 2001 From: n3hrox Date: Sun, 27 Oct 2019 18:15:16 +0100 Subject: [PATCH 03/12] switch subprocess to popen --- dvc/updater.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/dvc/updater.py b/dvc/updater.py index 14c2f8f4ec..9fa55777d8 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import subprocess import sys import os import time @@ -150,11 +149,11 @@ def _get_update_instructions(self): @staticmethod def _get_dvc_path(system): if system in ("linux", "darwin"): - output = subprocess.run(["which", "dvc"], capture_output=True) + output = os.popen("which dvc") else: - output = subprocess.run(["where", "dvc"], capture_output=True) + output = os.popen("where dvc") - return output.stdout.decode("utf-8").lower() + return output.read().lower() def _get_linux(self): import distro @@ -177,22 +176,23 @@ def _get_linux(self): return package_managers.get(distro.id()) def _get_darwin(self): - if not is_binary(): - if __file__.startswith("/usr/local/Cellar"): - return "formula" - else: - if "conda" in self._get_dvc_path("darwin"): - return "conda" - return "pip" - - # NOTE: both pkg and cask put dvc binary into /usr/local/bin, - # so in order to know which method of installation was used, - # we need to actually call `brew cask` - ret = os.system("brew cask ls dvc") - if ret == 0: - return "cask" - - return None + if is_binary(): + # NOTE: both pkg and cask put dvc binary into /usr/local/bin, + # so in order to know which method of installation was used, + # we need to actually call `brew cask` + ret = os.system("brew cask ls dvc") + if ret == 0: + return "cask" + + return None + + if __file__.startswith("/usr/local/Cellar"): + return "formula" + + if "conda" in self._get_dvc_path("darwin"): + return "conda" + + return "pip" def _get_windows(self): if is_binary(): From 225d538069eae92539b8c3de876c3c51caf2b4e2 Mon Sep 17 00:00:00 2001 From: n3hrox Date: Sun, 27 Oct 2019 18:19:45 +0100 Subject: [PATCH 04/12] reduce amount of returns --- dvc/updater.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dvc/updater.py b/dvc/updater.py index 9fa55777d8..87b2ea4c7c 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -186,13 +186,15 @@ def _get_darwin(self): return None + package_manager = None + if __file__.startswith("/usr/local/Cellar"): - return "formula" + package_manager = "formula" if "conda" in self._get_dvc_path("darwin"): - return "conda" + package_manager = "conda" - return "pip" + return package_manager or "pip" def _get_windows(self): if is_binary(): From 03a2f284e2dd05d7a5ee766a420035d8f32bd388 Mon Sep 17 00:00:00 2001 From: n3hrox Date: Sun, 27 Oct 2019 18:28:38 +0100 Subject: [PATCH 05/12] refactor --- dvc/updater.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dvc/updater.py b/dvc/updater.py index 87b2ea4c7c..2fd885e74d 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -155,13 +155,16 @@ def _get_dvc_path(system): return output.read().lower() + @staticmethod + def _get_conda(path): + return "conda" if "conda" in path else None + def _get_linux(self): import distro if not is_binary(): - if "conda" in self._get_dvc_path("linux"): - return "conda" - return "pip" + dvc_path = self._get_dvc_path("linux") + return self._get_conda(dvc_path) or "pip" package_managers = { "rhel": "yum", @@ -191,8 +194,8 @@ def _get_darwin(self): if __file__.startswith("/usr/local/Cellar"): package_manager = "formula" - if "conda" in self._get_dvc_path("darwin"): - package_manager = "conda" + dvc_path = self._get_dvc_path("darwin") + package_manager = package_manager or self._get_conda(dvc_path) return package_manager or "pip" @@ -200,10 +203,8 @@ def _get_windows(self): if is_binary(): return None - if "conda" in self._get_dvc_path("linux"): - return "conda" - - return "pip" + dvc_path = self._get_dvc_path("windows") + return self._get_conda(dvc_path) or "pip" def _get_package_manager(self): import platform From 752e584a9b156b69d8b27c84c9adb39fcfb816cd Mon Sep 17 00:00:00 2001 From: n3hrox Date: Sun, 27 Oct 2019 22:25:27 +0100 Subject: [PATCH 06/12] refactor function and add test --- dvc/updater.py | 14 +++++++++----- tests/func/test_updater.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/dvc/updater.py b/dvc/updater.py index 2fd885e74d..629b2e7463 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -156,15 +156,15 @@ def _get_dvc_path(system): return output.read().lower() @staticmethod - def _get_conda(path): - return "conda" if "conda" in path else None + def _is_conda(path): + return "conda" in path def _get_linux(self): import distro if not is_binary(): dvc_path = self._get_dvc_path("linux") - return self._get_conda(dvc_path) or "pip" + return "conda" if self._is_conda(dvc_path) else "pip" package_managers = { "rhel": "yum", @@ -195,7 +195,8 @@ def _get_darwin(self): package_manager = "formula" dvc_path = self._get_dvc_path("darwin") - package_manager = package_manager or self._get_conda(dvc_path) + if self._is_conda(dvc_path): + package_manager = "conda" return package_manager or "pip" @@ -204,7 +205,10 @@ def _get_windows(self): return None dvc_path = self._get_dvc_path("windows") - return self._get_conda(dvc_path) or "pip" + if self._is_conda(dvc_path): + return "conda" + + return "pip" def _get_package_manager(self): import platform diff --git a/tests/func/test_updater.py b/tests/func/test_updater.py index 3cd2758041..0ae1263ab0 100644 --- a/tests/func/test_updater.py +++ b/tests/func/test_updater.py @@ -49,3 +49,13 @@ def test_check_version_outdated(updater): updater.current = "0.20.8" assert updater._is_outdated() + + +@mock.patch("dvc.updater.Updater._is_conda") +def test_check_dvc_from_conda(mocked_is_conda, updater): + mocked_is_conda.return_value = True + updater.latest = "0.21.0" + updater.current = "0.20.8" + + msg = "Run {yellow}conda{reset} {update}update{reset} dvc" + assert updater._get_update_instructions() == msg From 82b69a5abd5278af24b0817092a35ea24f936e4b Mon Sep 17 00:00:00 2001 From: n3hrox Date: Sun, 27 Oct 2019 22:31:25 +0100 Subject: [PATCH 07/12] remove cask check --- dvc/updater.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dvc/updater.py b/dvc/updater.py index 629b2e7463..d2cc16fc4c 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -180,13 +180,6 @@ def _get_linux(self): def _get_darwin(self): if is_binary(): - # NOTE: both pkg and cask put dvc binary into /usr/local/bin, - # so in order to know which method of installation was used, - # we need to actually call `brew cask` - ret = os.system("brew cask ls dvc") - if ret == 0: - return "cask" - return None package_manager = None From 952fee57c13f31056796faeb30ce0a84ab585ef7 Mon Sep 17 00:00:00 2001 From: n3hrox Date: Mon, 4 Nov 2019 20:52:24 +0100 Subject: [PATCH 08/12] resolve conflicts --- .github/PULL_REQUEST_TEMPLATE.md | 24 +++- .github/release-drafter.yml | 5 + .gitignore | 1 + .restyled.yaml | 11 ++ dvc/command/destroy.py | 2 +- dvc/command/gc.py | 2 +- dvc/command/imp_url.py | 2 +- dvc/command/init.py | 2 +- dvc/command/remove.py | 2 +- dvc/command/run.py | 6 +- dvc/command/version.py | 6 +- dvc/exceptions.py | 6 +- dvc/lock.py | 11 ++ dvc/logger.py | 27 ++-- dvc/main.py | 4 + dvc/output/base.py | 2 +- dvc/output/local.py | 2 +- dvc/path_info.py | 169 +++++++++++------------ dvc/progress.py | 23 --- dvc/remote/local.py | 7 +- dvc/remote/s3.py | 7 +- dvc/remote/slow_link_detection.py | 2 +- dvc/remote/ssh/__init__.py | 3 +- dvc/remote/ssh/connection.py | 21 ++- dvc/repo/checkout.py | 6 - dvc/repo/fetch.py | 28 ++-- dvc/repo/get.py | 1 - dvc/repo/get_url.py | 3 +- dvc/repo/imp.py | 1 - dvc/repo/init.py | 4 +- dvc/repo/pull.py | 7 +- dvc/scm/git/__init__.py | 8 +- dvc/stage.py | 12 +- dvc/state.py | 8 +- dvc/updater.py | 81 +---------- dvc/utils/__init__.py | 2 +- dvc/utils/fs.py | 6 +- dvc/utils/pkg.py | 51 +++++++ dvc/version.py | 2 +- setup.py | 6 +- tests/func/test_import.py | 18 +++ tests/func/test_remote.py | 2 +- tests/func/test_repro.py | 4 +- tests/unit/command/test_imp_url.py | 4 +- tests/unit/remote/ssh/test_connection.py | 6 + tests/unit/remote/test_local.py | 2 +- tests/unit/remote/test_s3.py | 17 +++ tests/unit/test_logger.py | 38 ++--- tests/unit/utils/test_fs.py | 18 +++ 49 files changed, 360 insertions(+), 322 deletions(-) create mode 100644 .github/release-drafter.yml create mode 100644 .restyled.yaml create mode 100644 dvc/utils/pkg.py diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 74975611dc..92da7b4003 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,9 +1,19 @@ -* [ ] Have you followed the guidelines in our - [Contributing document](https://dvc.org/doc/user-guide/contributing/core)? +* [ ] ❗ Have you followed the guidelines in the + [Contributing to DVC](https://dvc.org/doc/user-guide/contributing/core) + list? -* [ ] Does your PR affect documented changes or does it add new functionality - that should be documented? If yes, have you created a PR for - [dvc.org](https://github.com/iterative/dvc.org) documenting it or at - least opened an issue for it? If so, please add a link to it. +* [ ] 📖 Check this box if this PR **does not** require + [documentation](https://dvc.org/doc) updates, or if it does **and** you + have created a separate PR in + [dvc.org](https://github.com/iterative/dvc.org) + with such updates (or at least opened an issue about it in that repo). + Please link below to your PR (or issue) in the + [dvc.org](https://github.com/iterative/dvc.org) repo. + +* [ ] ❌ Have you checked DeepSource, CodeClimate, and other sanity checks + below? We consider their findings recommendatory and don't expect + everything to be addresses. Please review them carefully and fix those + that actually improve code or fix bugs. + +Thank you for the contribution - we'll try to review it as soon as possible. 🙏 ------ diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 0000000000..abf195c69e --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,5 @@ +# Config for https://github.com/apps/release-drafter +branches: + - master +template: | + $CHANGES diff --git a/.gitignore b/.gitignore index d2a206fe3d..33be5cb20c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ neatlynx/__pycache__ cache *.pyc .env/ +.env2.7/ cache .dvc.conf.lock diff --git a/.restyled.yaml b/.restyled.yaml new file mode 100644 index 0000000000..d5eeec7157 --- /dev/null +++ b/.restyled.yaml @@ -0,0 +1,11 @@ +--- +restylers: + - name: black + image: restyled/restyler-black:v19.3b0 + command: + - black + arguments: [] + include: + - "**/*.py" + interpreters: + - python diff --git a/dvc/command/destroy.py b/dvc/command/destroy.py index ca739de4ae..3dc2513875 100644 --- a/dvc/command/destroy.py +++ b/dvc/command/destroy.py @@ -24,7 +24,7 @@ def run(self): if not self.args.force and not prompt.confirm(statement): raise DvcException( "cannot destroy without a confirmation from the user." - " Use '-f' to force." + " Use `-f` to force." ) self.repo.destroy() diff --git a/dvc/command/gc.py b/dvc/command/gc.py index 699f88304b..45d774f6d4 100644 --- a/dvc/command/gc.py +++ b/dvc/command/gc.py @@ -83,7 +83,7 @@ def add_parser(subparsers, parent_parser): "--all-commits", action="store_true", default=False, - help="Keep data files for all commits.", + help=argparse.SUPPRESS, ) gc_parser.add_argument( "-c", diff --git a/dvc/command/imp_url.py b/dvc/command/imp_url.py index 012ea80c0e..1b0b1cf87c 100644 --- a/dvc/command/imp_url.py +++ b/dvc/command/imp_url.py @@ -19,7 +19,7 @@ def run(self): except DvcException: logger.exception( "failed to import {}. You could also try downloading " - "it manually and adding it with `dvc add` command.".format( + "it manually, and adding it with `dvc add`.".format( self.args.url ) ) diff --git a/dvc/command/init.py b/dvc/command/init.py index 0e9bdf83f8..075e3be4c0 100644 --- a/dvc/command/init.py +++ b/dvc/command/init.py @@ -53,7 +53,7 @@ def add_parser(subparsers, parent_parser): action="store_true", default=False, help=( - "Overwrite existing '.dvc' directory. " + "Overwrite existing '.dvc/' directory. " "This operation removes local cache." ), ) diff --git a/dvc/command/remove.py b/dvc/command/remove.py index 814adcfae3..a004480290 100644 --- a/dvc/command/remove.py +++ b/dvc/command/remove.py @@ -28,7 +28,7 @@ def _is_outs_only(self, target): raise DvcException( "Cannot purge without a confirmation from the user." - " Use '-f' to force." + " Use `-f` to force." ) def run(self): diff --git a/dvc/command/run.py b/dvc/command/run.py index b0761435f3..c51f10baba 100644 --- a/dvc/command/run.py +++ b/dvc/command/run.py @@ -27,9 +27,9 @@ def run(self): ] ): # pragma: no cover logger.error( - "too few arguments. Specify at least one: '-d', '-o', '-O'," - " '-m', '-M', '--outs-persist', '--outs-persist-no-cache'," - " 'command'." + "too few arguments. Specify at least one: `-d`, `-o`, `-O`, " + "`-m`, `-M`, `--outs-persist`, `--outs-persist-no-cache`, " + "`command`." ) return 1 diff --git a/dvc/command/version.py b/dvc/command/version.py index 078f1fdec1..7cf00c7516 100644 --- a/dvc/command/version.py +++ b/dvc/command/version.py @@ -18,7 +18,7 @@ from dvc.version import __version__ from dvc.exceptions import DvcException, NotDvcRepoError from dvc.system import System - +from dvc.utils.pkg import get_package_manager logger = logging.getLogger(__name__) @@ -31,17 +31,19 @@ def run(self): python_version = platform.python_version() platform_type = platform.platform() binary = is_binary() - + package_manager = get_package_manager() info = ( "DVC version: {dvc_version}\n" "Python version: {python_version}\n" "Platform: {platform_type}\n" "Binary: {binary}\n" + "Package manager: {package_manager}\n" ).format( dvc_version=dvc_version, python_version=python_version, platform_type=platform_type, binary=binary, + package_manager=package_manager, ) try: diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 2309bcdbf8..16b8b837b3 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -179,7 +179,7 @@ class ConfirmRemoveError(DvcException): def __init__(self, path): super(ConfirmRemoveError, self).__init__( "unable to remove '{}' without a confirmation from the user. Use " - "'-f' to force.".format(path) + "`-f` to force.".format(path) ) @@ -209,7 +209,7 @@ class NoMetricsError(DvcException): def __init__(self): super(NoMetricsError, self).__init__( "no metric files in this repository. " - "Use 'dvc metrics add' to add a metric file to track." + "Use `dvc metrics add` to add a metric file to track." ) @@ -247,7 +247,7 @@ def __init__(self, out_1, out_2): class CheckoutErrorSuggestGit(DvcException): def __init__(self, target, cause): super(CheckoutErrorSuggestGit, self).__init__( - "Did you mean 'git checkout {}'?".format(target), cause=cause + "Did you mean `git checkout {}`?".format(target), cause=cause ) diff --git a/dvc/lock.py b/dvc/lock.py index 8859ff9e72..d3bc83d199 100644 --- a/dvc/lock.py +++ b/dvc/lock.py @@ -87,6 +87,17 @@ def _set_claimfile(self, pid=None): self._tmp_dir, filename + ".lock" ) + # Fix for __del__ bug in flufl.lock [1] which is causing errors on + # Python shutdown [2]. + # [1] https://gitlab.com/warsaw/flufl.lock/issues/7 + # [2] https://github.com/iterative/dvc/issues/2573 + def __del__(self): + try: + if self._owned: + self.finalize() + except ImportError: + pass + else: import zc.lockfile diff --git a/dvc/logger.py b/dvc/logger.py index 71f9d9147c..f26ef31cee 100644 --- a/dvc/logger.py +++ b/dvc/logger.py @@ -11,6 +11,17 @@ import colorama +FOOTER = ( + "\n{yellow}Having any troubles?{nc}" + " Hit us up at {blue}https://dvc.org/support{nc}," + " we are always happy to help!" +).format( + blue=colorama.Fore.BLUE, + nc=colorama.Fore.RESET, + yellow=colorama.Fore.YELLOW, +) + + class LoggingException(Exception): def __init__(self, record): msg = "failed to log {}".format(str(record)) @@ -48,16 +59,6 @@ class ColorFormatter(logging.Formatter): "CRITICAL": colorama.Fore.RED, } - footer = ( - "{yellow}Having any troubles?{nc}" - " Hit us up at {blue}https://dvc.org/support{nc}," - " we are always happy to help!" - ).format( - blue=colorama.Fore.BLUE, - nc=colorama.Fore.RESET, - yellow=colorama.Fore.YELLOW, - ) - def format(self, record): if record.levelname == "INFO": return record.msg @@ -66,10 +67,7 @@ def format(self, record): exception, stack_trace = self._parse_exc(record.exc_info) return ( - "{color}{levelname}{nc}: {description}" - "{stack_trace}\n" - "\n" - "{footer}" + "{color}{levelname}{nc}: {description}" "{stack_trace}\n" ).format( color=self.color_code.get(record.levelname, ""), nc=colorama.Fore.RESET, @@ -77,7 +75,6 @@ def format(self, record): description=self._description(record.msg, exception), msg=record.msg, stack_trace=stack_trace, - footer=self.footer, ) return "{color}{levelname}{nc}: {msg}".format( diff --git a/dvc/main.py b/dvc/main.py index f8b94b8d31..5d3f01193d 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -6,6 +6,7 @@ from dvc.cli import parse_args from dvc.lock import LockError +from dvc.logger import FOOTER from dvc.config import ConfigError from dvc.analytics import Analytics from dvc.exceptions import NotDvcRepoError, DvcParserError @@ -75,6 +76,9 @@ def main(argv=None): # so won't be reused by any other subsequent run anyway. clean_repos() + if ret != 0: + logger.info(FOOTER) + Analytics().send_cmd(cmd, args, ret) return ret diff --git a/dvc/output/base.py b/dvc/output/base.py index f940d3f52e..8fa3ad367c 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -334,7 +334,7 @@ def unprotect(self): self.remote.unprotect(self.path_info) def _collect_used_dir_cache(self, remote=None, force=False, jobs=None): - """Get a list of `info`s retaled to the given directory. + """Get a list of `info`s related to the given directory. - Pull the directory entry from the remote cache if it was changed. diff --git a/dvc/output/local.py b/dvc/output/local.py index 777ac37569..cbd9c9503b 100644 --- a/dvc/output/local.py +++ b/dvc/output/local.py @@ -26,7 +26,7 @@ def _parse_path(self, remote, path): # so we should expect both posix and windows style paths. # PathInfo accepts both, i.e. / works everywhere, \ only on win. # - # FIXME: if we have Windows path containig / or posix one with \ + # FIXME: if we have Windows path containing / or posix one with \ # then we have #2059 bug and can't really handle that. p = self.REMOTE.path_cls(path) if not p.is_absolute(): diff --git a/dvc/path_info.py b/dvc/path_info.py index 3db44a4393..6707c8d81c 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -8,13 +8,12 @@ from dvc.utils.compat import str, builtin_str, basestring, is_py2 from dvc.utils.compat import pathlib, urlparse +from dvc.utils import relpath # On Python 2.7/Windows sys.getfilesystemencoding() is set to mbcs, # which is lossy, thus we can't use that, # see https://github.com/mcmtroffaes/pathlib2/issues/56. -from dvc.utils import relpath - if is_py2: fs_encoding = "utf-8" @@ -112,52 +111,80 @@ class PosixPathInfo(PathInfo, pathlib.PurePosixPath): pass +class _URLPathInfo(PosixPathInfo): + def __str__(self): + return self.__fspath__() + + __unicode__ = __str__ + + class _URLPathParents(object): - def __init__(self, pathcls, scheme, netloc, path): - self._scheme = scheme - self._netloc = netloc - self._parents = path.parents - self._pathcls = pathcls + def __init__(self, src): + self.src = src + self._parents = self.src._path.parents def __len__(self): return len(self._parents) def __getitem__(self, idx): - return self._pathcls.from_parts( - scheme=self._scheme, - netloc=self._netloc, - path=self._parents[idx].fspath, - ) + return self.src.replace(path=self._parents[idx]) def __repr__(self): - return "<{}.parents>".format(self._pathcls.__name__) + return "<{}.parents>".format(self.src) class URLInfo(object): DEFAULT_PORTS = {"http": 80, "https": 443, "ssh": 22, "hdfs": 0} def __init__(self, url): - self.parsed = urlparse(url) - assert self.parsed.scheme != "remote" + p = urlparse(url) + assert not p.query and not p.params and not p.fragment + assert p.password is None + + self.fill_parts(p.scheme, p.hostname, p.username, p.port, p.path) @classmethod def from_parts( - cls, scheme=None, netloc=None, host=None, user=None, port=None, path="" + cls, scheme=None, host=None, user=None, port=None, path="", netloc=None ): - assert scheme and (bool(host) ^ bool(netloc)) + assert bool(host) ^ bool(netloc) + + if netloc is not None: + return cls("{}://{}{}".format(scheme, netloc, path)) + + obj = cls.__new__(cls) + obj.fill_parts(scheme, host, user, port, path) + return obj + + def fill_parts(self, scheme, host, user, port, path): + assert scheme != "remote" + assert isinstance(path, (basestring, _URLPathInfo)) + + self.scheme, self.host, self.user = scheme, host, user + self.port = int(port) if port else self.DEFAULT_PORTS.get(self.scheme) + + if isinstance(path, _URLPathInfo): + self._spath = builtin_str(path) + self._path = path + else: + if path and path[0] != "/": + path = "/" + path + self._spath = path + + @property + def _base_parts(self): + return (self.scheme, self.host, self.user, self.port) + + @property + def parts(self): + return self._base_parts + self._path.parts - if netloc is None: - netloc = host - if user: - netloc = user + "@" + host - if port: - netloc += ":" + str(port) - return cls("{}://{}{}".format(scheme, netloc, path)) + def replace(self, path=None): + return self.from_parts(*self._base_parts, path=path) @cached_property def url(self): - p = self.parsed - return "{}://{}{}".format(p.scheme, self.netloc, p.path) + return "{}://{}{}".format(self.scheme, self.netloc, self._spath) def __str__(self): return self.url @@ -170,92 +197,60 @@ def __eq__(self, other): other = self.__class__(other) return ( self.__class__ == other.__class__ - and self.scheme == other.scheme - and self.netloc == other.netloc + and self._base_parts == other._base_parts and self._path == other._path ) def __hash__(self): - return hash(self.url) + return hash(self.parts) def __div__(self, other): - p = self.parsed - new_path = posixpath.join(p.path, str(other)) - if not new_path.startswith("/"): - new_path = "/" + new_path - new_url = "{}://{}{}".format(p.scheme, p.netloc, new_path) - return self.__class__(new_url) + return self.replace(path=posixpath.join(self._spath, other)) __truediv__ = __div__ - def __getattr__(self, name): - # When deepcopy is called, it creates and object without __init__, - # self.parsed is not initialized and it causes infinite recursion. - # More on this special casing here: - # https://stackoverflow.com/a/47300262/298182 - if name.startswith("__"): - raise AttributeError(name) - return getattr(self.parsed, name) - - @cached_property - def netloc(self): - p = self.parsed - netloc = p.hostname - if p.username: - netloc = p.username + "@" + netloc - if p.port and int(p.port) != self.DEFAULT_PORTS.get(p.scheme): - netloc += ":" + str(p.port) - return netloc - @property - def port(self): - return self.parsed.port or self.DEFAULT_PORTS.get(self.parsed.scheme) - - @property - def host(self): - return self.parsed.hostname - - @property - def user(self): - return self.parsed.username + def path(self): + return self._spath @cached_property def _path(self): - return PosixPathInfo(self.parsed.path) + return _URLPathInfo(self._spath) @property def name(self): return self._path.name - @property - def parts(self): - return (self.scheme, self.netloc) + self._path.parts + @cached_property + def netloc(self): + netloc = self.host + if self.user: + netloc = self.user + "@" + netloc + if self.port and int(self.port) != self.DEFAULT_PORTS.get(self.scheme): + netloc += ":" + str(self.port) + return netloc @property def bucket(self): - return self.parsed.netloc + return self.netloc @property def parent(self): - return self.from_parts( - scheme=self.scheme, - netloc=self.parsed.netloc, - path=self._path.parent.fspath, - ) + return self.replace(path=self._path.parent) @property def parents(self): - return _URLPathParents( - type(self), self.scheme, self.parsed.netloc, self._path - ) + return _URLPathParents(self) def relative_to(self, other): - if isinstance(other, str): - other = URLInfo(other) - if self.scheme != other.scheme or self.netloc != other.netloc: - raise ValueError( - "'{}' does not start with '{}'".format(self, other) - ) + if isinstance(other, basestring): + other = self.__class__(other) + if self.__class__ != other.__class__: + msg = "'{}' has incompatible class with '{}'".format(self, other) + raise ValueError(msg) + if self._base_parts != other._base_parts: + msg = "'{}' does not start with '{}'".format(self, other) + raise ValueError(msg) return self._path.relative_to(other._path) def isin(self, other): @@ -263,14 +258,12 @@ def isin(self, other): other = self.__class__(other) elif self.__class__ != other.__class__: return False - return ( - self.scheme == other.scheme - and self.netloc == other.netloc - and self._path.isin(other._path) + return self._base_parts == other._base_parts and self._path.isin( + other._path ) class CloudURLInfo(URLInfo): @property def path(self): - return self.parsed.path.lstrip("/") + return self._spath.lstrip("/") diff --git a/dvc/progress.py b/dvc/progress.py index 7e85b6e18c..68c6ddd594 100644 --- a/dvc/progress.py +++ b/dvc/progress.py @@ -3,35 +3,12 @@ import logging import sys from tqdm import tqdm -from concurrent.futures import ThreadPoolExecutor from funcy import merge from dvc.utils import env2bool logger = logging.getLogger(__name__) -class TqdmThreadPoolExecutor(ThreadPoolExecutor): - """ - Ensure worker progressbars are cleared away properly. - """ - - def __enter__(self): - """ - Creates a blank initial dummy progress bar if needed so that workers - are forced to create "nested" bars. - """ - blank_bar = Tqdm(bar_format="Multi-Threaded:", leave=False) - if blank_bar.pos > 0: - # already nested - don't need a placeholder bar - blank_bar.close() - self.bar = blank_bar - return super(TqdmThreadPoolExecutor, self).__enter__() - - def __exit__(self, *a, **k): - super(TqdmThreadPoolExecutor, self).__exit__(*a, **k) - self.bar.close() - - class Tqdm(tqdm): """ maximum-compatibility tqdm-based progressbars diff --git a/dvc/remote/local.py b/dvc/remote/local.py index dff4337566..7f950404ab 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -4,6 +4,7 @@ import errno import logging from functools import partial +from concurrent.futures import ThreadPoolExecutor from shortuuid import uuid @@ -29,7 +30,7 @@ ) from dvc.config import Config from dvc.exceptions import DvcException, DownloadError, UploadError -from dvc.progress import Tqdm, TqdmThreadPoolExecutor +from dvc.progress import Tqdm from dvc.path_info import PathInfo @@ -263,7 +264,7 @@ def status( # This is a performance optimization. We can safely assume that, # if the resources that we want to fetch are already cached, - # there's no need to check the remote storage for the existance of + # there's no need to check the remote storage for the existence of # those files. if download and sorted(local_exists) == sorted(md5s): remote_exists = local_exists @@ -359,7 +360,7 @@ def _process( return 0 if jobs > 1: - with TqdmThreadPoolExecutor(max_workers=jobs) as executor: + with ThreadPoolExecutor(max_workers=jobs) as executor: fails = sum(executor.map(func, *plans)) else: fails = sum(map(func, *plans)) diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index 7557caecd6..ded688b3c7 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -4,7 +4,6 @@ import os import logging -import posixpath from funcy import cached_property from dvc.progress import Tqdm @@ -145,13 +144,13 @@ def _copy(cls, s3, from_info, to_info, extra_args): # multipart. # # If an object's etag looks like '8978c98bb5a48c2fb5f2c4c905768afa', - # then it was transfered as a single part, which means that the chunk + # then it was transferred as a single part, which means that the chunk # size used to transfer it was greater or equal to the ContentLength # of that object. So to preserve that tag over the next transfer, we # could use any value >= ContentLength. # # If an object's etag looks like '50d67013a5e1a4070bef1fc8eea4d5f9-13', - # then it was transfered as a multipart, which means that the chunk + # then it was transferred as a multipart, which means that the chunk # size used to transfer it was less than ContentLength of that object. # Unfortunately, in general, it doesn't mean that the chunk size was # the same throughout the transfer, so it means that in order to @@ -273,4 +272,4 @@ def _generate_download_url(self, path_info, expires=3600): def walk_files(self, path_info, max_items=None): for fname in self._list_paths(path_info, max_items): - yield path_info / posixpath.relpath(fname, path_info.path) + yield path_info.replace(path=fname) diff --git a/dvc/remote/slow_link_detection.py b/dvc/remote/slow_link_detection.py index 0dd2285d60..bffa6e9e8a 100644 --- a/dvc/remote/slow_link_detection.py +++ b/dvc/remote/slow_link_detection.py @@ -18,7 +18,7 @@ "See {blue}https://dvc.org/doc/commands-reference/config#cache{reset} " "for more information.\n" "To disable this message, run:\n" - "'dvc config cache.slow_link_warning false'".format( + "`dvc config cache.slow_link_warning false`".format( blue=colorama.Fore.BLUE, reset=colorama.Fore.RESET ) ) diff --git a/dvc/remote/ssh/__init__.py b/dvc/remote/ssh/__init__.py index c6b39b5b5a..7230f016ab 100644 --- a/dvc/remote/ssh/__init__.py +++ b/dvc/remote/ssh/__init__.py @@ -4,7 +4,6 @@ import itertools import io import os -import posixpath import getpass import logging import threading @@ -268,7 +267,7 @@ def list_cache_paths(self): def walk_files(self, path_info): with self.ssh(path_info) as ssh: for fname in ssh.walk_files(path_info.path): - yield path_info / posixpath.relpath(fname, path_info.path) + yield path_info.replace(path=fname) def makedirs(self, path_info): with self.ssh(path_info) as ssh: diff --git a/dvc/remote/ssh/connection.py b/dvc/remote/ssh/connection.py index dcbb1727cc..b2da0b67ac 100644 --- a/dvc/remote/ssh/connection.py +++ b/dvc/remote/ssh/connection.py @@ -185,8 +185,27 @@ def download(self, src, dest, no_progress_bar=False, progress_title=None): self.sftp.get(src, dest, callback=pbar.update_to) def move(self, src, dst): + """Rename src to dst, if it is not possible (in case src and dst are + on different filesystems) and actual physical copying of data is + happening. + """ self.makedirs(posixpath.dirname(dst)) - self.sftp.rename(src, dst) + + try: + self.sftp.rename(src, dst) + except OSError: + self._atomic_copy(src, dst) + + self.remove(src) + + def _atomic_copy(self, src, dst): + tmp = tmp_fname(dst) + + try: + self.copy(src, tmp) + self.sftp.rename(tmp, dst) + finally: + self.remove(tmp) def upload(self, src, dest, no_progress_bar=False, progress_title=None): self.makedirs(posixpath.dirname(dest)) diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index 6f8abde42b..f753c14190 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -50,12 +50,6 @@ def _checkout( total=total, unit="file", desc="Checkout", disable=total == 0 ) as pbar: for stage in stages: - if stage.locked: - logger.warning( - "DVC-file '{path}' is locked. Its dependencies are" - " not going to be checked out.".format(path=stage.relpath) - ) - failed.extend( stage.checkout(force=force, progress_callback=pbar.update_desc) ) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 7029461630..95eba34b10 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -26,7 +26,7 @@ def _fetch( """Download data items from a cloud and imported repositories Returns: - int: number of succesfully downloaded files + int: number of successfully downloaded files Raises: DownloadError: thrown when there are failed downloads, either @@ -76,20 +76,20 @@ def _fetch_external(self, repo_url, repo_rev, files): cache_dir = self.cache.local.cache_dir try: with external_repo(repo_url, repo_rev, cache_dir=cache_dir) as repo: - cache = NamedCache() - for name in files: - try: - out = repo.find_out_by_relpath(name) - except OutputNotFoundError: - failed += 1 - logger.exception( - "failed to fetch data for '{}'".format(name) - ) - continue - else: - cache.update(out.get_used_cache()) - with repo.state: + cache = NamedCache() + for name in files: + try: + out = repo.find_out_by_relpath(name) + except OutputNotFoundError: + failed += 1 + logger.exception( + "failed to fetch data for '{}'".format(name) + ) + continue + else: + cache.update(out.get_used_cache()) + try: return repo.cloud.pull(cache), failed except DownloadError as exc: diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 258f6d8840..9096216c9d 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -21,7 +21,6 @@ @staticmethod def get(url, path, out=None, rev=None): out = resolve_output(path, out) - path = path.lstrip("/") if Stage.is_valid_filename(out): raise GetDVCFileError() diff --git a/dvc/repo/get_url.py b/dvc/repo/get_url.py index ff191e149e..8ba1878653 100644 --- a/dvc/repo/get_url.py +++ b/dvc/repo/get_url.py @@ -12,7 +12,8 @@ def get_url(url, out=None): if os.path.exists(url): url = os.path.abspath(url) - out = os.path.abspath(out) + + out = os.path.abspath(out) dep, = dependency.loads_from(None, [url]) out, = output.loads_from(None, [out], use_cache=False) diff --git a/dvc/repo/imp.py b/dvc/repo/imp.py index 270d4caa6a..68ead389b6 100644 --- a/dvc/repo/imp.py +++ b/dvc/repo/imp.py @@ -2,6 +2,5 @@ def imp(self, url, path, out=None, rev=None): erepo = {"url": url} if rev is not None: erepo["rev"] = rev - path = path.lstrip("/") return self.imp_url(path, out=out, erepo=erepo, locked=True) diff --git a/dvc/repo/init.py b/dvc/repo/init.py index 19dc20d1bf..e2f6b4702a 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -65,7 +65,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False): if isinstance(scm, NoSCM) and not no_scm: raise InitError( "{repo} is not tracked by any supported scm tool (e.g. git). " - "Use '--no-scm' if you don't want to use any scm.".format( + "Use `--no-scm` if you don't want to use any scm.".format( repo=root_dir ) ) @@ -73,7 +73,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False): if os.path.isdir(dvc_dir): if not force: raise InitError( - "'{repo}' exists. Use '-f' to force.".format( + "'{repo}' exists. Use `-f` to force.".format( repo=relpath(dvc_dir) ) ) diff --git a/dvc/repo/pull.py b/dvc/repo/pull.py index 7c92588247..822897e292 100644 --- a/dvc/repo/pull.py +++ b/dvc/repo/pull.py @@ -1,6 +1,11 @@ from __future__ import unicode_literals -from . import locked +import logging + +from dvc.repo import locked + + +logger = logging.getLogger(__name__) @locked diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 05bb2faf10..fd4e7ad10f 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -200,10 +200,10 @@ def add(self, paths): self.repo.index.add(paths) except AssertionError: msg = ( - "failed to add '{}' to git. You can add those files" - " manually using 'git add'." - " See 'https://github.com/iterative/dvc/issues/610'" - " for more details.".format(str(paths)) + "failed to add '{}' to git. You can add those files " + "manually using `git add`. See " + "https://github.com/iterative/dvc/issues/610 for more " + "details.".format(str(paths)) ) logger.exception(msg) diff --git a/dvc/stage.py b/dvc/stage.py index 4d3b7d3d0e..94ec95c210 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -242,7 +242,7 @@ def _changed_deps(self): if self.is_callback: logger.warning( - "DVC-file '{fname}' is a 'callback' stage " + "DVC-file '{fname}' is a \"callback\" stage " "(has a command and no dependencies) and thus always " "considered as changed.".format(fname=self.relpath) ) @@ -451,8 +451,8 @@ def create(repo, **kwargs): if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "DVC-file name '{fname}' may not contain subdirectories" - " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'" - " along with '-f' to specify DVC-file path with working" + " if `-c|--cwd` (deprecated) is specified. Use `-w|--wdir`" + " along with `-f` to specify DVC-file path with working" " directory.".format(fname=fname) ) wdir = cwd @@ -571,8 +571,8 @@ def _fill_stage_outputs(stage, **kwargs): def _check_dvc_filename(fname): if not Stage.is_valid_filename(fname): raise StageFileBadNameError( - "bad DVC-file name '{}'. DVC-files should be named" - " 'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( + "bad DVC-file name '{}'. DVC-files should be named " + "'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( relpath(fname), os.path.basename(fname) ) ) @@ -736,7 +736,7 @@ def check_can_commit(self, force): if not force and not prompt.confirm(msg): raise StageCommitError( "unable to commit changed '{}'. Use `-f|--force` to " - "force.`".format(self.relpath) + "force.".format(self.relpath) ) self.save() diff --git a/dvc/state.py b/dvc/state.py index 61f82549d9..dacc07693c 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -369,14 +369,12 @@ def save(self, path_info, checksum): """ assert path_info.scheme == "local" assert checksum is not None - - path = fspath_py35(path_info) - assert os.path.exists(path) + assert os.path.exists(fspath_py35(path_info)) actual_mtime, actual_size = get_mtime_and_size( - path, self.repo.dvcignore + path_info, self.repo.dvcignore ) - actual_inode = get_inode(path) + actual_inode = get_inode(path_info) existing_record = self.get_state_record_for_inode(actual_inode) if not existing_record: diff --git a/dvc/updater.py b/dvc/updater.py index d2cc16fc4c..8b13754e85 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -9,8 +9,8 @@ from dvc import __version__ from dvc.lock import Lock, LockError -from dvc.utils import is_binary, boxify, env2bool - +from dvc.utils import boxify, env2bool +from dvc.utils.pkg import get_package_manager logger = logging.getLogger(__name__) @@ -123,7 +123,6 @@ def _get_update_instructions(self): "yum": "Run {yellow}yum{reset} update dvc", "yay": "Run {yellow}yay{reset} {blue}-S{reset} dvc", "formula": "Run {yellow}brew{reset} upgrade dvc", - "cask": "Run {yellow}brew cask{reset} upgrade dvc", "apt": ( "Run {yellow}apt-get{reset} install" " {blue}--only-upgrade{reset} dvc" @@ -142,80 +141,6 @@ def _get_update_instructions(self): ), } - package_manager = self._get_package_manager() + package_manager = get_package_manager() return instructions[package_manager] - - @staticmethod - def _get_dvc_path(system): - if system in ("linux", "darwin"): - output = os.popen("which dvc") - else: - output = os.popen("where dvc") - - return output.read().lower() - - @staticmethod - def _is_conda(path): - return "conda" in path - - def _get_linux(self): - import distro - - if not is_binary(): - dvc_path = self._get_dvc_path("linux") - return "conda" if self._is_conda(dvc_path) else "pip" - - package_managers = { - "rhel": "yum", - "centos": "yum", - "fedora": "yum", - "amazon": "yum", - "opensuse": "yum", - "ubuntu": "apt", - "debian": "apt", - } - - return package_managers.get(distro.id()) - - def _get_darwin(self): - if is_binary(): - return None - - package_manager = None - - if __file__.startswith("/usr/local/Cellar"): - package_manager = "formula" - - dvc_path = self._get_dvc_path("darwin") - if self._is_conda(dvc_path): - package_manager = "conda" - - return package_manager or "pip" - - def _get_windows(self): - if is_binary(): - return None - - dvc_path = self._get_dvc_path("windows") - if self._is_conda(dvc_path): - return "conda" - - return "pip" - - def _get_package_manager(self): - import platform - from dvc.exceptions import DvcException - - m = { - "Windows": self._get_windows, - "Darwin": self._get_darwin, - "Linux": self._get_linux, - } - - system = platform.system() - func = m.get(system) - if func is None: - raise DvcException("not supported system '{}'".format(system)) - - return func() diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 538b7a97e1..b0cb6b3fff 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -232,7 +232,7 @@ def _to_chunks_by_chunks_number(list_to_split, num_chunks): def to_chunks(list_to_split, num_chunks=None, chunk_size=None): if (num_chunks and chunk_size) or (not num_chunks and not chunk_size): raise ValueError( - "One and only one of 'num_chunks', 'chunk_size' must be defined" + "Only one among `num_chunks` or `chunk_size` must be defined." ) if chunk_size: return _split(list_to_split, chunk_size) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 811b4195a9..6ded4cefdb 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -7,7 +7,7 @@ from dvc.exceptions import DvcException from dvc.system import System -from dvc.utils import dict_md5, walk_files +from dvc.utils import dict_md5, walk_files, fspath_py35 from dvc.utils.compat import str @@ -21,7 +21,7 @@ def get_inode(path): def get_mtime_and_size(path, dvcignore): - if os.path.isdir(path): + if os.path.isdir(fspath_py35(path)): size = 0 files_mtimes = {} for file_path in walk_files(path, dvcignore): @@ -39,7 +39,7 @@ def get_mtime_and_size(path, dvcignore): # max(mtime(f) for f in non_ignored_files) mtime = dict_md5(files_mtimes) else: - base_stat = os.stat(path) + base_stat = os.stat(fspath_py35(path)) size = base_stat.st_size mtime = base_stat.st_mtime mtime = int(nanotime.timestamp(mtime)) diff --git a/dvc/utils/pkg.py b/dvc/utils/pkg.py new file mode 100644 index 0000000000..a35bd73063 --- /dev/null +++ b/dvc/utils/pkg.py @@ -0,0 +1,51 @@ +from dvc.utils import is_binary + + +def get_linux(): + import distro + + if not is_binary(): + return "pip" + + package_managers = { + "rhel": "yum", + "centos": "yum", + "fedora": "yum", + "amazon": "yum", + "opensuse": "yum", + "ubuntu": "apt", + "debian": "apt", + } + + return package_managers.get(distro.id()) + + +def get_darwin(): + if not is_binary(): + if __file__.startswith("/usr/local/Cellar"): + return "formula" + else: + return "pip" + return None + + +def get_windows(): + return None if is_binary() else "pip" + + +def get_package_manager(): + import platform + from dvc.exceptions import DvcException + + m = { + "Windows": get_windows(), + "Darwin": get_darwin(), + "Linux": get_linux(), + } + + system = platform.system() + func = m.get(system) + if func is None: + raise DvcException("not supported system '{}'".format(system)) + + return func diff --git a/dvc/version.py b/dvc/version.py index 4fc71199cc..6561d954c1 100644 --- a/dvc/version.py +++ b/dvc/version.py @@ -7,7 +7,7 @@ import subprocess -_BASE_VERSION = "0.66.0" +_BASE_VERSION = "0.66.4" def _generate_version(base_version): diff --git a/setup.py b/setup.py index 47bd6c46bb..6817b8dea0 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ def run(self): "funcy>=1.12", "pathspec>=0.6.0", "shortuuid>=0.5.0", - "tqdm>=4.35.0,<5", + "tqdm>=4.36.1,<5", "packaging>=19.0", "win-unicode-console>=0.5; sys_platform == 'win32'", ] @@ -116,7 +116,7 @@ def run(self): "flaky>=3.5.3", "mock>=3.0.0", "xmltodict>=0.11.0", - "awscli>=1.16.125", + "awscli==1.16.266", "google-compute-engine==2.8.13", "pywin32; sys_platform == 'win32'", "Pygments", # required by collective.checkdocs, @@ -127,7 +127,7 @@ def run(self): "pydocstyle<4.0", "jaraco.windows==3.9.2", "mock-ssh-server>=0.6.0", - "moto>=1.3.14.dev55", + "moto==1.3.14.dev464", "rangehttpserver==1.2.0", ] diff --git a/tests/func/test_import.py b/tests/func/test_import.py index fc00ee7b1e..4fe8857aad 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -2,6 +2,7 @@ import os import filecmp +import shutil import pytest from mock import patch @@ -67,6 +68,23 @@ def test_pull_imported_stage(dvc_repo, erepo): assert os.path.isfile(dst_cache) +def test_pull_imported_directory_stage(dvc_repo, erepo): + src = erepo.DATA_DIR + dst = erepo.DATA_DIR + "_imported" + stage_file = dst + ".dvc" + + dvc_repo.imp(erepo.root_dir, src, dst) + + shutil.rmtree(dst) + shutil.rmtree(dvc_repo.cache.local.cache_dir) + + dvc_repo.pull([stage_file]) + + assert os.path.exists(dst) + assert os.path.isdir(dst) + trees_equal(src, dst) + + def test_download_error_pulling_imported_stage(dvc_repo, erepo): src = erepo.FOO dst = erepo.FOO + "_imported" diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py index b938f97f42..af9d041cf7 100644 --- a/tests/func/test_remote.py +++ b/tests/func/test_remote.py @@ -245,5 +245,5 @@ def unreliable_upload(self, from_file, to_info, name=None, **kwargs): def get_last_exc(caplog): - _, exc, _ = caplog.records[-1].exc_info + _, exc, _ = caplog.records[-2].exc_info return exc diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 148d2a006a..f75edce04b 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -964,8 +964,8 @@ def cmd(self, i, o): return "aws s3 cp {} {}".format(i, o) def write(self, bucket, key, body): - s3 = boto3.resource("s3") - s3.Bucket(bucket).put_object(Key=key, Body=body) + s3 = boto3.client("s3") + s3.put_object(Bucket=bucket, Key=key, Body=body) class TestReproExternalGS(TestReproExternalBase): diff --git a/tests/unit/command/test_imp_url.py b/tests/unit/command/test_imp_url.py index 3ec0efbaac..50cb6e5bd5 100644 --- a/tests/unit/command/test_imp_url.py +++ b/tests/unit/command/test_imp_url.py @@ -29,7 +29,7 @@ def test_failed_import_url(mocker, caplog, dvc_repo): assert cmd.run() == 1 expected_error = ( "failed to import http://somesite.com/file_name. " - "You could also try downloading it manually and " - "adding it with `dvc add` command." + "You could also try downloading it manually, and " + "adding it with `dvc add`." ) assert expected_error in caplog.text diff --git a/tests/unit/remote/ssh/test_connection.py b/tests/unit/remote/ssh/test_connection.py index f0c12ec219..6b1300ce30 100644 --- a/tests/unit/remote/ssh/test_connection.py +++ b/tests/unit/remote/ssh/test_connection.py @@ -111,3 +111,9 @@ def test_hardlink(repo_dir, ssh): def test_copy(repo_dir, ssh): ssh.copy("foo", "link") assert filecmp.cmp("foo", "link") + + +def test_move(repo_dir, ssh): + ssh.move("foo", "copy") + assert os.path.exists("copy") + assert not os.path.exists("foo") diff --git a/tests/unit/remote/test_local.py b/tests/unit/remote/test_local.py index c9681d50ff..461e37b79f 100644 --- a/tests/unit/remote/test_local.py +++ b/tests/unit/remote/test_local.py @@ -5,7 +5,7 @@ def test_status_download_optimization(mocker): """When comparing the status to pull a remote cache, And the desired files to fetch are already on the local cache, - Don't check the existance of the desired files on the remote cache + Don't check the existence of the desired files on the remote cache """ remote = RemoteLOCAL(None, {}) diff --git a/tests/unit/remote/test_s3.py b/tests/unit/remote/test_s3.py index 7861fb5a9a..36b9d1e236 100644 --- a/tests/unit/remote/test_s3.py +++ b/tests/unit/remote/test_s3.py @@ -83,3 +83,20 @@ def test_walk_files(remote): ] assert list(remote.walk_files(remote.path_info / "data")) == files + + +def test_copy_preserve_etag_across_buckets(remote): + s3 = remote.s3 + s3.create_bucket(Bucket="another") + + another = RemoteS3(None, {"url": "s3://another", "region": "us-east-1"}) + + from_info = remote.path_info / "foo" + to_info = another.path_info / "foo" + + remote.copy(from_info, to_info) + + from_etag = RemoteS3.get_etag(s3, "bucket", "foo") + to_etag = RemoteS3.get_etag(s3, "another", "foo") + + assert from_etag == to_etag diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py index 5560acd4ea..bcc061ff35 100644 --- a/tests/unit/test_logger.py +++ b/tests/unit/test_logger.py @@ -45,11 +45,7 @@ def test_error(self, caplog): with caplog.at_level(logging.INFO, logger="dvc"): logger.error("message") - expected = ( - "{red}ERROR{nc}: message\n" - "\n" - "{footer}".format(footer=formatter.footer, **colors) - ) + expected = "{red}ERROR{nc}: message\n".format(**colors) assert expected == formatter.format(caplog.records[0]) @@ -60,11 +56,7 @@ def test_exception(self, caplog): except Exception: logger.exception("message") - expected = ( - "{red}ERROR{nc}: message\n" - "\n" - "{footer}".format(footer=formatter.footer, **colors) - ) + expected = "{red}ERROR{nc}: message\n".format(**colors) assert expected == formatter.format(caplog.records[0]) @@ -75,11 +67,7 @@ def test_exception_with_description_and_without_message(self, caplog): except Exception: logger.exception("") - expected = ( - "{red}ERROR{nc}: description\n" - "\n" - "{footer}".format(footer=formatter.footer, **colors) - ) + expected = "{red}ERROR{nc}: description\n".format(**colors) assert expected == formatter.format(caplog.records[0]) @@ -90,10 +78,8 @@ def test_exception_with_description_and_message(self, caplog): except Exception: logger.exception("message") - expected = ( - "{red}ERROR{nc}: message - description\n" - "\n" - "{footer}".format(footer=formatter.footer, **colors) + expected = "{red}ERROR{nc}: message - description\n".format( + **colors ) assert expected == formatter.format(caplog.records[0]) @@ -110,13 +96,8 @@ def test_exception_under_verbose(self, caplog): "{red}ERROR{nc}: description\n" "{red}{line}{nc}\n" "{stack_trace}" - "{red}{line}{nc}\n" - "\n" - "{footer}".format( - footer=formatter.footer, - line="-" * 60, - stack_trace=stack_trace, - **colors + "{red}{line}{nc}\n".format( + line="-" * 60, stack_trace=stack_trace, **colors ) ) @@ -138,10 +119,7 @@ def test_nested_exceptions(self, caplog): "{red}ERROR{nc}: message - second: first\n" "{red}{line}{nc}\n" "{stack_trace}" - "{red}{line}{nc}\n" - "\n" - "{footer}".format( - footer=formatter.footer, + "{red}{line}{nc}\n".format( line="-" * 60, stack_trace="\n".join([first_traceback, second_traceback]), **colors diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 30a96b63fc..d479b10d99 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -14,6 +14,7 @@ contains_symlink_up_to, BasePathNotInCheckedPathException, get_parent_dirs_up_to, + get_inode, ) from mock import patch from tests.basic_env import TestDir @@ -149,3 +150,20 @@ def test_relpath_windows_different_drives(): rel_info = relpath(info1, info2) assert isinstance(rel_info, str) assert rel_info == path1 + + +def test_get_inode(repo_dir): + path = repo_dir.FOO + path_info = PathInfo(path) + assert get_inode(path) == get_inode(path_info) + + +@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR]) +def test_path_object_and_str_are_valid_types_get_mtime_and_size( + path, repo_dir +): + dvcignore = DvcIgnoreFilter(repo_dir.root_dir) + time, size = get_mtime_and_size(path, dvcignore) + object_time, object_size = get_mtime_and_size(PathInfo(path), dvcignore) + assert time == object_time + assert size == object_size From 4da42d188c0eaf15804d1c61d42361b619499e37 Mon Sep 17 00:00:00 2001 From: n3hrox Date: Mon, 4 Nov 2019 21:00:09 +0100 Subject: [PATCH 09/12] use patch heuristic to determine conda --- dvc/utils/pkg.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dvc/utils/pkg.py b/dvc/utils/pkg.py index a35bd73063..f080d1e1dc 100644 --- a/dvc/utils/pkg.py +++ b/dvc/utils/pkg.py @@ -1,9 +1,21 @@ from dvc.utils import is_binary +def is_conda(): + try: + from .build import PKG # patched during conda package build + + return PKG == "conda" + except ImportError: + return False + + def get_linux(): import distro + if is_conda(): + return "conda" + if not is_binary(): return "pip" @@ -21,6 +33,8 @@ def get_linux(): def get_darwin(): + if is_conda(): + return "conda" if not is_binary(): if __file__.startswith("/usr/local/Cellar"): return "formula" @@ -30,6 +44,8 @@ def get_darwin(): def get_windows(): + if is_conda(): + return "conda" return None if is_binary() else "pip" From e3417f78bea69425600d645ebbf24d7b0bdcb3e8 Mon Sep 17 00:00:00 2001 From: n3hrox Date: Mon, 4 Nov 2019 21:02:51 +0100 Subject: [PATCH 10/12] revert changes --- .github/PULL_REQUEST_TEMPLATE.md | 24 +--- .github/release-drafter.yml | 5 - .gitignore | 1 - .restyled.yaml | 11 -- dvc/command/destroy.py | 2 +- dvc/command/gc.py | 2 +- dvc/command/imp_url.py | 2 +- dvc/command/init.py | 2 +- dvc/command/remove.py | 2 +- dvc/command/run.py | 6 +- dvc/command/version.py | 6 +- dvc/exceptions.py | 6 +- dvc/lock.py | 11 -- dvc/logger.py | 27 ++-- dvc/main.py | 4 - dvc/output/base.py | 2 +- dvc/output/local.py | 2 +- dvc/path_info.py | 169 ++++++++++++----------- dvc/progress.py | 23 +++ dvc/remote/local.py | 7 +- dvc/remote/s3.py | 7 +- dvc/remote/slow_link_detection.py | 2 +- dvc/remote/ssh/__init__.py | 3 +- dvc/remote/ssh/connection.py | 21 +-- dvc/repo/checkout.py | 6 + dvc/repo/fetch.py | 28 ++-- dvc/repo/get.py | 1 + dvc/repo/get_url.py | 3 +- dvc/repo/imp.py | 1 + dvc/repo/init.py | 4 +- dvc/repo/pull.py | 7 +- dvc/scm/git/__init__.py | 8 +- dvc/stage.py | 12 +- dvc/state.py | 8 +- dvc/updater.py | 81 ++++++++++- dvc/utils/__init__.py | 2 +- dvc/utils/fs.py | 6 +- dvc/version.py | 2 +- setup.py | 6 +- tests/func/test_import.py | 18 --- tests/func/test_remote.py | 2 +- tests/func/test_repro.py | 4 +- tests/unit/command/test_imp_url.py | 4 +- tests/unit/remote/ssh/test_connection.py | 6 - tests/unit/remote/test_local.py | 2 +- tests/unit/remote/test_s3.py | 17 --- tests/unit/test_logger.py | 38 +++-- tests/unit/utils/test_fs.py | 18 --- 48 files changed, 322 insertions(+), 309 deletions(-) delete mode 100644 .github/release-drafter.yml delete mode 100644 .restyled.yaml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 92da7b4003..74975611dc 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,19 +1,9 @@ -* [ ] ❗ Have you followed the guidelines in the - [Contributing to DVC](https://dvc.org/doc/user-guide/contributing/core) - list? +* [ ] Have you followed the guidelines in our + [Contributing document](https://dvc.org/doc/user-guide/contributing/core)? -* [ ] 📖 Check this box if this PR **does not** require - [documentation](https://dvc.org/doc) updates, or if it does **and** you - have created a separate PR in - [dvc.org](https://github.com/iterative/dvc.org) - with such updates (or at least opened an issue about it in that repo). - Please link below to your PR (or issue) in the - [dvc.org](https://github.com/iterative/dvc.org) repo. - -* [ ] ❌ Have you checked DeepSource, CodeClimate, and other sanity checks - below? We consider their findings recommendatory and don't expect - everything to be addresses. Please review them carefully and fix those - that actually improve code or fix bugs. - -Thank you for the contribution - we'll try to review it as soon as possible. 🙏 +* [ ] Does your PR affect documented changes or does it add new functionality + that should be documented? If yes, have you created a PR for + [dvc.org](https://github.com/iterative/dvc.org) documenting it or at + least opened an issue for it? If so, please add a link to it. +----- diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml deleted file mode 100644 index abf195c69e..0000000000 --- a/.github/release-drafter.yml +++ /dev/null @@ -1,5 +0,0 @@ -# Config for https://github.com/apps/release-drafter -branches: - - master -template: | - $CHANGES diff --git a/.gitignore b/.gitignore index 33be5cb20c..d2a206fe3d 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,6 @@ neatlynx/__pycache__ cache *.pyc .env/ -.env2.7/ cache .dvc.conf.lock diff --git a/.restyled.yaml b/.restyled.yaml deleted file mode 100644 index d5eeec7157..0000000000 --- a/.restyled.yaml +++ /dev/null @@ -1,11 +0,0 @@ ---- -restylers: - - name: black - image: restyled/restyler-black:v19.3b0 - command: - - black - arguments: [] - include: - - "**/*.py" - interpreters: - - python diff --git a/dvc/command/destroy.py b/dvc/command/destroy.py index 3dc2513875..ca739de4ae 100644 --- a/dvc/command/destroy.py +++ b/dvc/command/destroy.py @@ -24,7 +24,7 @@ def run(self): if not self.args.force and not prompt.confirm(statement): raise DvcException( "cannot destroy without a confirmation from the user." - " Use `-f` to force." + " Use '-f' to force." ) self.repo.destroy() diff --git a/dvc/command/gc.py b/dvc/command/gc.py index 45d774f6d4..699f88304b 100644 --- a/dvc/command/gc.py +++ b/dvc/command/gc.py @@ -83,7 +83,7 @@ def add_parser(subparsers, parent_parser): "--all-commits", action="store_true", default=False, - help=argparse.SUPPRESS, + help="Keep data files for all commits.", ) gc_parser.add_argument( "-c", diff --git a/dvc/command/imp_url.py b/dvc/command/imp_url.py index 1b0b1cf87c..012ea80c0e 100644 --- a/dvc/command/imp_url.py +++ b/dvc/command/imp_url.py @@ -19,7 +19,7 @@ def run(self): except DvcException: logger.exception( "failed to import {}. You could also try downloading " - "it manually, and adding it with `dvc add`.".format( + "it manually and adding it with `dvc add` command.".format( self.args.url ) ) diff --git a/dvc/command/init.py b/dvc/command/init.py index 075e3be4c0..0e9bdf83f8 100644 --- a/dvc/command/init.py +++ b/dvc/command/init.py @@ -53,7 +53,7 @@ def add_parser(subparsers, parent_parser): action="store_true", default=False, help=( - "Overwrite existing '.dvc/' directory. " + "Overwrite existing '.dvc' directory. " "This operation removes local cache." ), ) diff --git a/dvc/command/remove.py b/dvc/command/remove.py index a004480290..814adcfae3 100644 --- a/dvc/command/remove.py +++ b/dvc/command/remove.py @@ -28,7 +28,7 @@ def _is_outs_only(self, target): raise DvcException( "Cannot purge without a confirmation from the user." - " Use `-f` to force." + " Use '-f' to force." ) def run(self): diff --git a/dvc/command/run.py b/dvc/command/run.py index c51f10baba..b0761435f3 100644 --- a/dvc/command/run.py +++ b/dvc/command/run.py @@ -27,9 +27,9 @@ def run(self): ] ): # pragma: no cover logger.error( - "too few arguments. Specify at least one: `-d`, `-o`, `-O`, " - "`-m`, `-M`, `--outs-persist`, `--outs-persist-no-cache`, " - "`command`." + "too few arguments. Specify at least one: '-d', '-o', '-O'," + " '-m', '-M', '--outs-persist', '--outs-persist-no-cache'," + " 'command'." ) return 1 diff --git a/dvc/command/version.py b/dvc/command/version.py index 7cf00c7516..078f1fdec1 100644 --- a/dvc/command/version.py +++ b/dvc/command/version.py @@ -18,7 +18,7 @@ from dvc.version import __version__ from dvc.exceptions import DvcException, NotDvcRepoError from dvc.system import System -from dvc.utils.pkg import get_package_manager + logger = logging.getLogger(__name__) @@ -31,19 +31,17 @@ def run(self): python_version = platform.python_version() platform_type = platform.platform() binary = is_binary() - package_manager = get_package_manager() + info = ( "DVC version: {dvc_version}\n" "Python version: {python_version}\n" "Platform: {platform_type}\n" "Binary: {binary}\n" - "Package manager: {package_manager}\n" ).format( dvc_version=dvc_version, python_version=python_version, platform_type=platform_type, binary=binary, - package_manager=package_manager, ) try: diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 16b8b837b3..2309bcdbf8 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -179,7 +179,7 @@ class ConfirmRemoveError(DvcException): def __init__(self, path): super(ConfirmRemoveError, self).__init__( "unable to remove '{}' without a confirmation from the user. Use " - "`-f` to force.".format(path) + "'-f' to force.".format(path) ) @@ -209,7 +209,7 @@ class NoMetricsError(DvcException): def __init__(self): super(NoMetricsError, self).__init__( "no metric files in this repository. " - "Use `dvc metrics add` to add a metric file to track." + "Use 'dvc metrics add' to add a metric file to track." ) @@ -247,7 +247,7 @@ def __init__(self, out_1, out_2): class CheckoutErrorSuggestGit(DvcException): def __init__(self, target, cause): super(CheckoutErrorSuggestGit, self).__init__( - "Did you mean `git checkout {}`?".format(target), cause=cause + "Did you mean 'git checkout {}'?".format(target), cause=cause ) diff --git a/dvc/lock.py b/dvc/lock.py index d3bc83d199..8859ff9e72 100644 --- a/dvc/lock.py +++ b/dvc/lock.py @@ -87,17 +87,6 @@ def _set_claimfile(self, pid=None): self._tmp_dir, filename + ".lock" ) - # Fix for __del__ bug in flufl.lock [1] which is causing errors on - # Python shutdown [2]. - # [1] https://gitlab.com/warsaw/flufl.lock/issues/7 - # [2] https://github.com/iterative/dvc/issues/2573 - def __del__(self): - try: - if self._owned: - self.finalize() - except ImportError: - pass - else: import zc.lockfile diff --git a/dvc/logger.py b/dvc/logger.py index f26ef31cee..71f9d9147c 100644 --- a/dvc/logger.py +++ b/dvc/logger.py @@ -11,17 +11,6 @@ import colorama -FOOTER = ( - "\n{yellow}Having any troubles?{nc}" - " Hit us up at {blue}https://dvc.org/support{nc}," - " we are always happy to help!" -).format( - blue=colorama.Fore.BLUE, - nc=colorama.Fore.RESET, - yellow=colorama.Fore.YELLOW, -) - - class LoggingException(Exception): def __init__(self, record): msg = "failed to log {}".format(str(record)) @@ -59,6 +48,16 @@ class ColorFormatter(logging.Formatter): "CRITICAL": colorama.Fore.RED, } + footer = ( + "{yellow}Having any troubles?{nc}" + " Hit us up at {blue}https://dvc.org/support{nc}," + " we are always happy to help!" + ).format( + blue=colorama.Fore.BLUE, + nc=colorama.Fore.RESET, + yellow=colorama.Fore.YELLOW, + ) + def format(self, record): if record.levelname == "INFO": return record.msg @@ -67,7 +66,10 @@ def format(self, record): exception, stack_trace = self._parse_exc(record.exc_info) return ( - "{color}{levelname}{nc}: {description}" "{stack_trace}\n" + "{color}{levelname}{nc}: {description}" + "{stack_trace}\n" + "\n" + "{footer}" ).format( color=self.color_code.get(record.levelname, ""), nc=colorama.Fore.RESET, @@ -75,6 +77,7 @@ def format(self, record): description=self._description(record.msg, exception), msg=record.msg, stack_trace=stack_trace, + footer=self.footer, ) return "{color}{levelname}{nc}: {msg}".format( diff --git a/dvc/main.py b/dvc/main.py index 5d3f01193d..f8b94b8d31 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -6,7 +6,6 @@ from dvc.cli import parse_args from dvc.lock import LockError -from dvc.logger import FOOTER from dvc.config import ConfigError from dvc.analytics import Analytics from dvc.exceptions import NotDvcRepoError, DvcParserError @@ -76,9 +75,6 @@ def main(argv=None): # so won't be reused by any other subsequent run anyway. clean_repos() - if ret != 0: - logger.info(FOOTER) - Analytics().send_cmd(cmd, args, ret) return ret diff --git a/dvc/output/base.py b/dvc/output/base.py index 8fa3ad367c..f940d3f52e 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -334,7 +334,7 @@ def unprotect(self): self.remote.unprotect(self.path_info) def _collect_used_dir_cache(self, remote=None, force=False, jobs=None): - """Get a list of `info`s related to the given directory. + """Get a list of `info`s retaled to the given directory. - Pull the directory entry from the remote cache if it was changed. diff --git a/dvc/output/local.py b/dvc/output/local.py index cbd9c9503b..777ac37569 100644 --- a/dvc/output/local.py +++ b/dvc/output/local.py @@ -26,7 +26,7 @@ def _parse_path(self, remote, path): # so we should expect both posix and windows style paths. # PathInfo accepts both, i.e. / works everywhere, \ only on win. # - # FIXME: if we have Windows path containing / or posix one with \ + # FIXME: if we have Windows path containig / or posix one with \ # then we have #2059 bug and can't really handle that. p = self.REMOTE.path_cls(path) if not p.is_absolute(): diff --git a/dvc/path_info.py b/dvc/path_info.py index 6707c8d81c..3db44a4393 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -8,12 +8,13 @@ from dvc.utils.compat import str, builtin_str, basestring, is_py2 from dvc.utils.compat import pathlib, urlparse -from dvc.utils import relpath # On Python 2.7/Windows sys.getfilesystemencoding() is set to mbcs, # which is lossy, thus we can't use that, # see https://github.com/mcmtroffaes/pathlib2/issues/56. +from dvc.utils import relpath + if is_py2: fs_encoding = "utf-8" @@ -111,80 +112,52 @@ class PosixPathInfo(PathInfo, pathlib.PurePosixPath): pass -class _URLPathInfo(PosixPathInfo): - def __str__(self): - return self.__fspath__() - - __unicode__ = __str__ - - class _URLPathParents(object): - def __init__(self, src): - self.src = src - self._parents = self.src._path.parents + def __init__(self, pathcls, scheme, netloc, path): + self._scheme = scheme + self._netloc = netloc + self._parents = path.parents + self._pathcls = pathcls def __len__(self): return len(self._parents) def __getitem__(self, idx): - return self.src.replace(path=self._parents[idx]) + return self._pathcls.from_parts( + scheme=self._scheme, + netloc=self._netloc, + path=self._parents[idx].fspath, + ) def __repr__(self): - return "<{}.parents>".format(self.src) + return "<{}.parents>".format(self._pathcls.__name__) class URLInfo(object): DEFAULT_PORTS = {"http": 80, "https": 443, "ssh": 22, "hdfs": 0} def __init__(self, url): - p = urlparse(url) - assert not p.query and not p.params and not p.fragment - assert p.password is None - - self.fill_parts(p.scheme, p.hostname, p.username, p.port, p.path) + self.parsed = urlparse(url) + assert self.parsed.scheme != "remote" @classmethod def from_parts( - cls, scheme=None, host=None, user=None, port=None, path="", netloc=None + cls, scheme=None, netloc=None, host=None, user=None, port=None, path="" ): - assert bool(host) ^ bool(netloc) - - if netloc is not None: - return cls("{}://{}{}".format(scheme, netloc, path)) - - obj = cls.__new__(cls) - obj.fill_parts(scheme, host, user, port, path) - return obj - - def fill_parts(self, scheme, host, user, port, path): - assert scheme != "remote" - assert isinstance(path, (basestring, _URLPathInfo)) - - self.scheme, self.host, self.user = scheme, host, user - self.port = int(port) if port else self.DEFAULT_PORTS.get(self.scheme) - - if isinstance(path, _URLPathInfo): - self._spath = builtin_str(path) - self._path = path - else: - if path and path[0] != "/": - path = "/" + path - self._spath = path - - @property - def _base_parts(self): - return (self.scheme, self.host, self.user, self.port) - - @property - def parts(self): - return self._base_parts + self._path.parts + assert scheme and (bool(host) ^ bool(netloc)) - def replace(self, path=None): - return self.from_parts(*self._base_parts, path=path) + if netloc is None: + netloc = host + if user: + netloc = user + "@" + host + if port: + netloc += ":" + str(port) + return cls("{}://{}{}".format(scheme, netloc, path)) @cached_property def url(self): - return "{}://{}{}".format(self.scheme, self.netloc, self._spath) + p = self.parsed + return "{}://{}{}".format(p.scheme, self.netloc, p.path) def __str__(self): return self.url @@ -197,60 +170,92 @@ def __eq__(self, other): other = self.__class__(other) return ( self.__class__ == other.__class__ - and self._base_parts == other._base_parts + and self.scheme == other.scheme + and self.netloc == other.netloc and self._path == other._path ) def __hash__(self): - return hash(self.parts) + return hash(self.url) def __div__(self, other): - return self.replace(path=posixpath.join(self._spath, other)) + p = self.parsed + new_path = posixpath.join(p.path, str(other)) + if not new_path.startswith("/"): + new_path = "/" + new_path + new_url = "{}://{}{}".format(p.scheme, p.netloc, new_path) + return self.__class__(new_url) __truediv__ = __div__ + def __getattr__(self, name): + # When deepcopy is called, it creates and object without __init__, + # self.parsed is not initialized and it causes infinite recursion. + # More on this special casing here: + # https://stackoverflow.com/a/47300262/298182 + if name.startswith("__"): + raise AttributeError(name) + return getattr(self.parsed, name) + + @cached_property + def netloc(self): + p = self.parsed + netloc = p.hostname + if p.username: + netloc = p.username + "@" + netloc + if p.port and int(p.port) != self.DEFAULT_PORTS.get(p.scheme): + netloc += ":" + str(p.port) + return netloc + @property - def path(self): - return self._spath + def port(self): + return self.parsed.port or self.DEFAULT_PORTS.get(self.parsed.scheme) + + @property + def host(self): + return self.parsed.hostname + + @property + def user(self): + return self.parsed.username @cached_property def _path(self): - return _URLPathInfo(self._spath) + return PosixPathInfo(self.parsed.path) @property def name(self): return self._path.name - @cached_property - def netloc(self): - netloc = self.host - if self.user: - netloc = self.user + "@" + netloc - if self.port and int(self.port) != self.DEFAULT_PORTS.get(self.scheme): - netloc += ":" + str(self.port) - return netloc + @property + def parts(self): + return (self.scheme, self.netloc) + self._path.parts @property def bucket(self): - return self.netloc + return self.parsed.netloc @property def parent(self): - return self.replace(path=self._path.parent) + return self.from_parts( + scheme=self.scheme, + netloc=self.parsed.netloc, + path=self._path.parent.fspath, + ) @property def parents(self): - return _URLPathParents(self) + return _URLPathParents( + type(self), self.scheme, self.parsed.netloc, self._path + ) def relative_to(self, other): - if isinstance(other, basestring): - other = self.__class__(other) - if self.__class__ != other.__class__: - msg = "'{}' has incompatible class with '{}'".format(self, other) - raise ValueError(msg) - if self._base_parts != other._base_parts: - msg = "'{}' does not start with '{}'".format(self, other) - raise ValueError(msg) + if isinstance(other, str): + other = URLInfo(other) + if self.scheme != other.scheme or self.netloc != other.netloc: + raise ValueError( + "'{}' does not start with '{}'".format(self, other) + ) return self._path.relative_to(other._path) def isin(self, other): @@ -258,12 +263,14 @@ def isin(self, other): other = self.__class__(other) elif self.__class__ != other.__class__: return False - return self._base_parts == other._base_parts and self._path.isin( - other._path + return ( + self.scheme == other.scheme + and self.netloc == other.netloc + and self._path.isin(other._path) ) class CloudURLInfo(URLInfo): @property def path(self): - return self._spath.lstrip("/") + return self.parsed.path.lstrip("/") diff --git a/dvc/progress.py b/dvc/progress.py index 68c6ddd594..7e85b6e18c 100644 --- a/dvc/progress.py +++ b/dvc/progress.py @@ -3,12 +3,35 @@ import logging import sys from tqdm import tqdm +from concurrent.futures import ThreadPoolExecutor from funcy import merge from dvc.utils import env2bool logger = logging.getLogger(__name__) +class TqdmThreadPoolExecutor(ThreadPoolExecutor): + """ + Ensure worker progressbars are cleared away properly. + """ + + def __enter__(self): + """ + Creates a blank initial dummy progress bar if needed so that workers + are forced to create "nested" bars. + """ + blank_bar = Tqdm(bar_format="Multi-Threaded:", leave=False) + if blank_bar.pos > 0: + # already nested - don't need a placeholder bar + blank_bar.close() + self.bar = blank_bar + return super(TqdmThreadPoolExecutor, self).__enter__() + + def __exit__(self, *a, **k): + super(TqdmThreadPoolExecutor, self).__exit__(*a, **k) + self.bar.close() + + class Tqdm(tqdm): """ maximum-compatibility tqdm-based progressbars diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 7f950404ab..dff4337566 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -4,7 +4,6 @@ import errno import logging from functools import partial -from concurrent.futures import ThreadPoolExecutor from shortuuid import uuid @@ -30,7 +29,7 @@ ) from dvc.config import Config from dvc.exceptions import DvcException, DownloadError, UploadError -from dvc.progress import Tqdm +from dvc.progress import Tqdm, TqdmThreadPoolExecutor from dvc.path_info import PathInfo @@ -264,7 +263,7 @@ def status( # This is a performance optimization. We can safely assume that, # if the resources that we want to fetch are already cached, - # there's no need to check the remote storage for the existence of + # there's no need to check the remote storage for the existance of # those files. if download and sorted(local_exists) == sorted(md5s): remote_exists = local_exists @@ -360,7 +359,7 @@ def _process( return 0 if jobs > 1: - with ThreadPoolExecutor(max_workers=jobs) as executor: + with TqdmThreadPoolExecutor(max_workers=jobs) as executor: fails = sum(executor.map(func, *plans)) else: fails = sum(map(func, *plans)) diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index ded688b3c7..7557caecd6 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -4,6 +4,7 @@ import os import logging +import posixpath from funcy import cached_property from dvc.progress import Tqdm @@ -144,13 +145,13 @@ def _copy(cls, s3, from_info, to_info, extra_args): # multipart. # # If an object's etag looks like '8978c98bb5a48c2fb5f2c4c905768afa', - # then it was transferred as a single part, which means that the chunk + # then it was transfered as a single part, which means that the chunk # size used to transfer it was greater or equal to the ContentLength # of that object. So to preserve that tag over the next transfer, we # could use any value >= ContentLength. # # If an object's etag looks like '50d67013a5e1a4070bef1fc8eea4d5f9-13', - # then it was transferred as a multipart, which means that the chunk + # then it was transfered as a multipart, which means that the chunk # size used to transfer it was less than ContentLength of that object. # Unfortunately, in general, it doesn't mean that the chunk size was # the same throughout the transfer, so it means that in order to @@ -272,4 +273,4 @@ def _generate_download_url(self, path_info, expires=3600): def walk_files(self, path_info, max_items=None): for fname in self._list_paths(path_info, max_items): - yield path_info.replace(path=fname) + yield path_info / posixpath.relpath(fname, path_info.path) diff --git a/dvc/remote/slow_link_detection.py b/dvc/remote/slow_link_detection.py index bffa6e9e8a..0dd2285d60 100644 --- a/dvc/remote/slow_link_detection.py +++ b/dvc/remote/slow_link_detection.py @@ -18,7 +18,7 @@ "See {blue}https://dvc.org/doc/commands-reference/config#cache{reset} " "for more information.\n" "To disable this message, run:\n" - "`dvc config cache.slow_link_warning false`".format( + "'dvc config cache.slow_link_warning false'".format( blue=colorama.Fore.BLUE, reset=colorama.Fore.RESET ) ) diff --git a/dvc/remote/ssh/__init__.py b/dvc/remote/ssh/__init__.py index 7230f016ab..c6b39b5b5a 100644 --- a/dvc/remote/ssh/__init__.py +++ b/dvc/remote/ssh/__init__.py @@ -4,6 +4,7 @@ import itertools import io import os +import posixpath import getpass import logging import threading @@ -267,7 +268,7 @@ def list_cache_paths(self): def walk_files(self, path_info): with self.ssh(path_info) as ssh: for fname in ssh.walk_files(path_info.path): - yield path_info.replace(path=fname) + yield path_info / posixpath.relpath(fname, path_info.path) def makedirs(self, path_info): with self.ssh(path_info) as ssh: diff --git a/dvc/remote/ssh/connection.py b/dvc/remote/ssh/connection.py index b2da0b67ac..dcbb1727cc 100644 --- a/dvc/remote/ssh/connection.py +++ b/dvc/remote/ssh/connection.py @@ -185,27 +185,8 @@ def download(self, src, dest, no_progress_bar=False, progress_title=None): self.sftp.get(src, dest, callback=pbar.update_to) def move(self, src, dst): - """Rename src to dst, if it is not possible (in case src and dst are - on different filesystems) and actual physical copying of data is - happening. - """ self.makedirs(posixpath.dirname(dst)) - - try: - self.sftp.rename(src, dst) - except OSError: - self._atomic_copy(src, dst) - - self.remove(src) - - def _atomic_copy(self, src, dst): - tmp = tmp_fname(dst) - - try: - self.copy(src, tmp) - self.sftp.rename(tmp, dst) - finally: - self.remove(tmp) + self.sftp.rename(src, dst) def upload(self, src, dest, no_progress_bar=False, progress_title=None): self.makedirs(posixpath.dirname(dest)) diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index f753c14190..6f8abde42b 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -50,6 +50,12 @@ def _checkout( total=total, unit="file", desc="Checkout", disable=total == 0 ) as pbar: for stage in stages: + if stage.locked: + logger.warning( + "DVC-file '{path}' is locked. Its dependencies are" + " not going to be checked out.".format(path=stage.relpath) + ) + failed.extend( stage.checkout(force=force, progress_callback=pbar.update_desc) ) diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 95eba34b10..7029461630 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -26,7 +26,7 @@ def _fetch( """Download data items from a cloud and imported repositories Returns: - int: number of successfully downloaded files + int: number of succesfully downloaded files Raises: DownloadError: thrown when there are failed downloads, either @@ -76,20 +76,20 @@ def _fetch_external(self, repo_url, repo_rev, files): cache_dir = self.cache.local.cache_dir try: with external_repo(repo_url, repo_rev, cache_dir=cache_dir) as repo: - with repo.state: - cache = NamedCache() - for name in files: - try: - out = repo.find_out_by_relpath(name) - except OutputNotFoundError: - failed += 1 - logger.exception( - "failed to fetch data for '{}'".format(name) - ) - continue - else: - cache.update(out.get_used_cache()) + cache = NamedCache() + for name in files: + try: + out = repo.find_out_by_relpath(name) + except OutputNotFoundError: + failed += 1 + logger.exception( + "failed to fetch data for '{}'".format(name) + ) + continue + else: + cache.update(out.get_used_cache()) + with repo.state: try: return repo.cloud.pull(cache), failed except DownloadError as exc: diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 9096216c9d..258f6d8840 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -21,6 +21,7 @@ @staticmethod def get(url, path, out=None, rev=None): out = resolve_output(path, out) + path = path.lstrip("/") if Stage.is_valid_filename(out): raise GetDVCFileError() diff --git a/dvc/repo/get_url.py b/dvc/repo/get_url.py index 8ba1878653..ff191e149e 100644 --- a/dvc/repo/get_url.py +++ b/dvc/repo/get_url.py @@ -12,8 +12,7 @@ def get_url(url, out=None): if os.path.exists(url): url = os.path.abspath(url) - - out = os.path.abspath(out) + out = os.path.abspath(out) dep, = dependency.loads_from(None, [url]) out, = output.loads_from(None, [out], use_cache=False) diff --git a/dvc/repo/imp.py b/dvc/repo/imp.py index 68ead389b6..270d4caa6a 100644 --- a/dvc/repo/imp.py +++ b/dvc/repo/imp.py @@ -2,5 +2,6 @@ def imp(self, url, path, out=None, rev=None): erepo = {"url": url} if rev is not None: erepo["rev"] = rev + path = path.lstrip("/") return self.imp_url(path, out=out, erepo=erepo, locked=True) diff --git a/dvc/repo/init.py b/dvc/repo/init.py index e2f6b4702a..19dc20d1bf 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -65,7 +65,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False): if isinstance(scm, NoSCM) and not no_scm: raise InitError( "{repo} is not tracked by any supported scm tool (e.g. git). " - "Use `--no-scm` if you don't want to use any scm.".format( + "Use '--no-scm' if you don't want to use any scm.".format( repo=root_dir ) ) @@ -73,7 +73,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False): if os.path.isdir(dvc_dir): if not force: raise InitError( - "'{repo}' exists. Use `-f` to force.".format( + "'{repo}' exists. Use '-f' to force.".format( repo=relpath(dvc_dir) ) ) diff --git a/dvc/repo/pull.py b/dvc/repo/pull.py index 822897e292..7c92588247 100644 --- a/dvc/repo/pull.py +++ b/dvc/repo/pull.py @@ -1,11 +1,6 @@ from __future__ import unicode_literals -import logging - -from dvc.repo import locked - - -logger = logging.getLogger(__name__) +from . import locked @locked diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index fd4e7ad10f..05bb2faf10 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -200,10 +200,10 @@ def add(self, paths): self.repo.index.add(paths) except AssertionError: msg = ( - "failed to add '{}' to git. You can add those files " - "manually using `git add`. See " - "https://github.com/iterative/dvc/issues/610 for more " - "details.".format(str(paths)) + "failed to add '{}' to git. You can add those files" + " manually using 'git add'." + " See 'https://github.com/iterative/dvc/issues/610'" + " for more details.".format(str(paths)) ) logger.exception(msg) diff --git a/dvc/stage.py b/dvc/stage.py index 94ec95c210..4d3b7d3d0e 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -242,7 +242,7 @@ def _changed_deps(self): if self.is_callback: logger.warning( - "DVC-file '{fname}' is a \"callback\" stage " + "DVC-file '{fname}' is a 'callback' stage " "(has a command and no dependencies) and thus always " "considered as changed.".format(fname=self.relpath) ) @@ -451,8 +451,8 @@ def create(repo, **kwargs): if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "DVC-file name '{fname}' may not contain subdirectories" - " if `-c|--cwd` (deprecated) is specified. Use `-w|--wdir`" - " along with `-f` to specify DVC-file path with working" + " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'" + " along with '-f' to specify DVC-file path with working" " directory.".format(fname=fname) ) wdir = cwd @@ -571,8 +571,8 @@ def _fill_stage_outputs(stage, **kwargs): def _check_dvc_filename(fname): if not Stage.is_valid_filename(fname): raise StageFileBadNameError( - "bad DVC-file name '{}'. DVC-files should be named " - "'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( + "bad DVC-file name '{}'. DVC-files should be named" + " 'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( relpath(fname), os.path.basename(fname) ) ) @@ -736,7 +736,7 @@ def check_can_commit(self, force): if not force and not prompt.confirm(msg): raise StageCommitError( "unable to commit changed '{}'. Use `-f|--force` to " - "force.".format(self.relpath) + "force.`".format(self.relpath) ) self.save() diff --git a/dvc/state.py b/dvc/state.py index dacc07693c..61f82549d9 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -369,12 +369,14 @@ def save(self, path_info, checksum): """ assert path_info.scheme == "local" assert checksum is not None - assert os.path.exists(fspath_py35(path_info)) + + path = fspath_py35(path_info) + assert os.path.exists(path) actual_mtime, actual_size = get_mtime_and_size( - path_info, self.repo.dvcignore + path, self.repo.dvcignore ) - actual_inode = get_inode(path_info) + actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) if not existing_record: diff --git a/dvc/updater.py b/dvc/updater.py index 8b13754e85..d2cc16fc4c 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -9,8 +9,8 @@ from dvc import __version__ from dvc.lock import Lock, LockError -from dvc.utils import boxify, env2bool -from dvc.utils.pkg import get_package_manager +from dvc.utils import is_binary, boxify, env2bool + logger = logging.getLogger(__name__) @@ -123,6 +123,7 @@ def _get_update_instructions(self): "yum": "Run {yellow}yum{reset} update dvc", "yay": "Run {yellow}yay{reset} {blue}-S{reset} dvc", "formula": "Run {yellow}brew{reset} upgrade dvc", + "cask": "Run {yellow}brew cask{reset} upgrade dvc", "apt": ( "Run {yellow}apt-get{reset} install" " {blue}--only-upgrade{reset} dvc" @@ -141,6 +142,80 @@ def _get_update_instructions(self): ), } - package_manager = get_package_manager() + package_manager = self._get_package_manager() return instructions[package_manager] + + @staticmethod + def _get_dvc_path(system): + if system in ("linux", "darwin"): + output = os.popen("which dvc") + else: + output = os.popen("where dvc") + + return output.read().lower() + + @staticmethod + def _is_conda(path): + return "conda" in path + + def _get_linux(self): + import distro + + if not is_binary(): + dvc_path = self._get_dvc_path("linux") + return "conda" if self._is_conda(dvc_path) else "pip" + + package_managers = { + "rhel": "yum", + "centos": "yum", + "fedora": "yum", + "amazon": "yum", + "opensuse": "yum", + "ubuntu": "apt", + "debian": "apt", + } + + return package_managers.get(distro.id()) + + def _get_darwin(self): + if is_binary(): + return None + + package_manager = None + + if __file__.startswith("/usr/local/Cellar"): + package_manager = "formula" + + dvc_path = self._get_dvc_path("darwin") + if self._is_conda(dvc_path): + package_manager = "conda" + + return package_manager or "pip" + + def _get_windows(self): + if is_binary(): + return None + + dvc_path = self._get_dvc_path("windows") + if self._is_conda(dvc_path): + return "conda" + + return "pip" + + def _get_package_manager(self): + import platform + from dvc.exceptions import DvcException + + m = { + "Windows": self._get_windows, + "Darwin": self._get_darwin, + "Linux": self._get_linux, + } + + system = platform.system() + func = m.get(system) + if func is None: + raise DvcException("not supported system '{}'".format(system)) + + return func() diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index b0cb6b3fff..538b7a97e1 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -232,7 +232,7 @@ def _to_chunks_by_chunks_number(list_to_split, num_chunks): def to_chunks(list_to_split, num_chunks=None, chunk_size=None): if (num_chunks and chunk_size) or (not num_chunks and not chunk_size): raise ValueError( - "Only one among `num_chunks` or `chunk_size` must be defined." + "One and only one of 'num_chunks', 'chunk_size' must be defined" ) if chunk_size: return _split(list_to_split, chunk_size) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 6ded4cefdb..811b4195a9 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -7,7 +7,7 @@ from dvc.exceptions import DvcException from dvc.system import System -from dvc.utils import dict_md5, walk_files, fspath_py35 +from dvc.utils import dict_md5, walk_files from dvc.utils.compat import str @@ -21,7 +21,7 @@ def get_inode(path): def get_mtime_and_size(path, dvcignore): - if os.path.isdir(fspath_py35(path)): + if os.path.isdir(path): size = 0 files_mtimes = {} for file_path in walk_files(path, dvcignore): @@ -39,7 +39,7 @@ def get_mtime_and_size(path, dvcignore): # max(mtime(f) for f in non_ignored_files) mtime = dict_md5(files_mtimes) else: - base_stat = os.stat(fspath_py35(path)) + base_stat = os.stat(path) size = base_stat.st_size mtime = base_stat.st_mtime mtime = int(nanotime.timestamp(mtime)) diff --git a/dvc/version.py b/dvc/version.py index 6561d954c1..4fc71199cc 100644 --- a/dvc/version.py +++ b/dvc/version.py @@ -7,7 +7,7 @@ import subprocess -_BASE_VERSION = "0.66.4" +_BASE_VERSION = "0.66.0" def _generate_version(base_version): diff --git a/setup.py b/setup.py index 6817b8dea0..47bd6c46bb 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ def run(self): "funcy>=1.12", "pathspec>=0.6.0", "shortuuid>=0.5.0", - "tqdm>=4.36.1,<5", + "tqdm>=4.35.0,<5", "packaging>=19.0", "win-unicode-console>=0.5; sys_platform == 'win32'", ] @@ -116,7 +116,7 @@ def run(self): "flaky>=3.5.3", "mock>=3.0.0", "xmltodict>=0.11.0", - "awscli==1.16.266", + "awscli>=1.16.125", "google-compute-engine==2.8.13", "pywin32; sys_platform == 'win32'", "Pygments", # required by collective.checkdocs, @@ -127,7 +127,7 @@ def run(self): "pydocstyle<4.0", "jaraco.windows==3.9.2", "mock-ssh-server>=0.6.0", - "moto==1.3.14.dev464", + "moto>=1.3.14.dev55", "rangehttpserver==1.2.0", ] diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 4fe8857aad..fc00ee7b1e 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -2,7 +2,6 @@ import os import filecmp -import shutil import pytest from mock import patch @@ -68,23 +67,6 @@ def test_pull_imported_stage(dvc_repo, erepo): assert os.path.isfile(dst_cache) -def test_pull_imported_directory_stage(dvc_repo, erepo): - src = erepo.DATA_DIR - dst = erepo.DATA_DIR + "_imported" - stage_file = dst + ".dvc" - - dvc_repo.imp(erepo.root_dir, src, dst) - - shutil.rmtree(dst) - shutil.rmtree(dvc_repo.cache.local.cache_dir) - - dvc_repo.pull([stage_file]) - - assert os.path.exists(dst) - assert os.path.isdir(dst) - trees_equal(src, dst) - - def test_download_error_pulling_imported_stage(dvc_repo, erepo): src = erepo.FOO dst = erepo.FOO + "_imported" diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py index af9d041cf7..b938f97f42 100644 --- a/tests/func/test_remote.py +++ b/tests/func/test_remote.py @@ -245,5 +245,5 @@ def unreliable_upload(self, from_file, to_info, name=None, **kwargs): def get_last_exc(caplog): - _, exc, _ = caplog.records[-2].exc_info + _, exc, _ = caplog.records[-1].exc_info return exc diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index f75edce04b..148d2a006a 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -964,8 +964,8 @@ def cmd(self, i, o): return "aws s3 cp {} {}".format(i, o) def write(self, bucket, key, body): - s3 = boto3.client("s3") - s3.put_object(Bucket=bucket, Key=key, Body=body) + s3 = boto3.resource("s3") + s3.Bucket(bucket).put_object(Key=key, Body=body) class TestReproExternalGS(TestReproExternalBase): diff --git a/tests/unit/command/test_imp_url.py b/tests/unit/command/test_imp_url.py index 50cb6e5bd5..3ec0efbaac 100644 --- a/tests/unit/command/test_imp_url.py +++ b/tests/unit/command/test_imp_url.py @@ -29,7 +29,7 @@ def test_failed_import_url(mocker, caplog, dvc_repo): assert cmd.run() == 1 expected_error = ( "failed to import http://somesite.com/file_name. " - "You could also try downloading it manually, and " - "adding it with `dvc add`." + "You could also try downloading it manually and " + "adding it with `dvc add` command." ) assert expected_error in caplog.text diff --git a/tests/unit/remote/ssh/test_connection.py b/tests/unit/remote/ssh/test_connection.py index 6b1300ce30..f0c12ec219 100644 --- a/tests/unit/remote/ssh/test_connection.py +++ b/tests/unit/remote/ssh/test_connection.py @@ -111,9 +111,3 @@ def test_hardlink(repo_dir, ssh): def test_copy(repo_dir, ssh): ssh.copy("foo", "link") assert filecmp.cmp("foo", "link") - - -def test_move(repo_dir, ssh): - ssh.move("foo", "copy") - assert os.path.exists("copy") - assert not os.path.exists("foo") diff --git a/tests/unit/remote/test_local.py b/tests/unit/remote/test_local.py index 461e37b79f..c9681d50ff 100644 --- a/tests/unit/remote/test_local.py +++ b/tests/unit/remote/test_local.py @@ -5,7 +5,7 @@ def test_status_download_optimization(mocker): """When comparing the status to pull a remote cache, And the desired files to fetch are already on the local cache, - Don't check the existence of the desired files on the remote cache + Don't check the existance of the desired files on the remote cache """ remote = RemoteLOCAL(None, {}) diff --git a/tests/unit/remote/test_s3.py b/tests/unit/remote/test_s3.py index 36b9d1e236..7861fb5a9a 100644 --- a/tests/unit/remote/test_s3.py +++ b/tests/unit/remote/test_s3.py @@ -83,20 +83,3 @@ def test_walk_files(remote): ] assert list(remote.walk_files(remote.path_info / "data")) == files - - -def test_copy_preserve_etag_across_buckets(remote): - s3 = remote.s3 - s3.create_bucket(Bucket="another") - - another = RemoteS3(None, {"url": "s3://another", "region": "us-east-1"}) - - from_info = remote.path_info / "foo" - to_info = another.path_info / "foo" - - remote.copy(from_info, to_info) - - from_etag = RemoteS3.get_etag(s3, "bucket", "foo") - to_etag = RemoteS3.get_etag(s3, "another", "foo") - - assert from_etag == to_etag diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py index bcc061ff35..5560acd4ea 100644 --- a/tests/unit/test_logger.py +++ b/tests/unit/test_logger.py @@ -45,7 +45,11 @@ def test_error(self, caplog): with caplog.at_level(logging.INFO, logger="dvc"): logger.error("message") - expected = "{red}ERROR{nc}: message\n".format(**colors) + expected = ( + "{red}ERROR{nc}: message\n" + "\n" + "{footer}".format(footer=formatter.footer, **colors) + ) assert expected == formatter.format(caplog.records[0]) @@ -56,7 +60,11 @@ def test_exception(self, caplog): except Exception: logger.exception("message") - expected = "{red}ERROR{nc}: message\n".format(**colors) + expected = ( + "{red}ERROR{nc}: message\n" + "\n" + "{footer}".format(footer=formatter.footer, **colors) + ) assert expected == formatter.format(caplog.records[0]) @@ -67,7 +75,11 @@ def test_exception_with_description_and_without_message(self, caplog): except Exception: logger.exception("") - expected = "{red}ERROR{nc}: description\n".format(**colors) + expected = ( + "{red}ERROR{nc}: description\n" + "\n" + "{footer}".format(footer=formatter.footer, **colors) + ) assert expected == formatter.format(caplog.records[0]) @@ -78,8 +90,10 @@ def test_exception_with_description_and_message(self, caplog): except Exception: logger.exception("message") - expected = "{red}ERROR{nc}: message - description\n".format( - **colors + expected = ( + "{red}ERROR{nc}: message - description\n" + "\n" + "{footer}".format(footer=formatter.footer, **colors) ) assert expected == formatter.format(caplog.records[0]) @@ -96,8 +110,13 @@ def test_exception_under_verbose(self, caplog): "{red}ERROR{nc}: description\n" "{red}{line}{nc}\n" "{stack_trace}" - "{red}{line}{nc}\n".format( - line="-" * 60, stack_trace=stack_trace, **colors + "{red}{line}{nc}\n" + "\n" + "{footer}".format( + footer=formatter.footer, + line="-" * 60, + stack_trace=stack_trace, + **colors ) ) @@ -119,7 +138,10 @@ def test_nested_exceptions(self, caplog): "{red}ERROR{nc}: message - second: first\n" "{red}{line}{nc}\n" "{stack_trace}" - "{red}{line}{nc}\n".format( + "{red}{line}{nc}\n" + "\n" + "{footer}".format( + footer=formatter.footer, line="-" * 60, stack_trace="\n".join([first_traceback, second_traceback]), **colors diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index d479b10d99..30a96b63fc 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -14,7 +14,6 @@ contains_symlink_up_to, BasePathNotInCheckedPathException, get_parent_dirs_up_to, - get_inode, ) from mock import patch from tests.basic_env import TestDir @@ -150,20 +149,3 @@ def test_relpath_windows_different_drives(): rel_info = relpath(info1, info2) assert isinstance(rel_info, str) assert rel_info == path1 - - -def test_get_inode(repo_dir): - path = repo_dir.FOO - path_info = PathInfo(path) - assert get_inode(path) == get_inode(path_info) - - -@pytest.mark.parametrize("path", [TestDir.DATA, TestDir.DATA_DIR]) -def test_path_object_and_str_are_valid_types_get_mtime_and_size( - path, repo_dir -): - dvcignore = DvcIgnoreFilter(repo_dir.root_dir) - time, size = get_mtime_and_size(path, dvcignore) - object_time, object_size = get_mtime_and_size(PathInfo(path), dvcignore) - assert time == object_time - assert size == object_size From 1c60eea3f593964c8bb45e1cee2c67d2daf0cfaa Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Mon, 4 Nov 2019 20:03:13 +0000 Subject: [PATCH 11/12] Restyled by reorder-python-imports --- dvc/updater.py | 12 ++++++++---- tests/func/test_updater.py | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/dvc/updater.py b/dvc/updater.py index d2cc16fc4c..6bc031c00b 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -1,15 +1,19 @@ from __future__ import unicode_literals -import sys +import logging import os +import sys import time -import logging + import colorama from packaging import version from dvc import __version__ -from dvc.lock import Lock, LockError -from dvc.utils import is_binary, boxify, env2bool +from dvc.lock import Lock +from dvc.lock import LockError +from dvc.utils import boxify +from dvc.utils import env2bool +from dvc.utils import is_binary logger = logging.getLogger(__name__) diff --git a/tests/func/test_updater.py b/tests/func/test_updater.py index 0ae1263ab0..2876b6f836 100644 --- a/tests/func/test_updater.py +++ b/tests/func/test_updater.py @@ -1,4 +1,5 @@ import os + import mock import pytest From ab08f2281a7900e57ecbbf0a0e998ade48eb233a Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Mon, 4 Nov 2019 20:03:14 +0000 Subject: [PATCH 12/12] Restyled by yapf --- dvc/updater.py | 68 ++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/dvc/updater.py b/dvc/updater.py index 6bc031c00b..e7710a9b59 100644 --- a/dvc/updater.py +++ b/dvc/updater.py @@ -15,7 +15,6 @@ from dvc.utils import env2bool from dvc.utils import is_binary - logger = logging.getLogger(__name__) @@ -28,9 +27,8 @@ class Updater(object): # pragma: no cover def __init__(self, dvc_dir): self.dvc_dir = dvc_dir self.updater_file = os.path.join(dvc_dir, self.UPDATER_FILE) - self.lock = Lock( - self.updater_file + ".lock", tmp_dir=os.path.join(dvc_dir, "tmp") - ) + self.lock = Lock(self.updater_file + ".lock", + tmp_dir=os.path.join(dvc_dir, "tmp")) self.current = version.parse(__version__).base_version def _is_outdated_file(self): @@ -107,43 +105,41 @@ def _notify(self): message = ( "Update available {red}{current}{reset} -> {green}{latest}{reset}" - + "\n" - + self._get_update_instructions() - ).format( - red=colorama.Fore.RED, - reset=colorama.Fore.RESET, - green=colorama.Fore.GREEN, - yellow=colorama.Fore.YELLOW, - blue=colorama.Fore.BLUE, - current=self.current, - latest=self.latest, - ) + + "\n" + self._get_update_instructions()).format( + red=colorama.Fore.RED, + reset=colorama.Fore.RESET, + green=colorama.Fore.GREEN, + yellow=colorama.Fore.YELLOW, + blue=colorama.Fore.BLUE, + current=self.current, + latest=self.latest, + ) logger.info(boxify(message, border_color="yellow")) def _get_update_instructions(self): instructions = { - "pip": "Run {yellow}pip{reset} install dvc {blue}--upgrade{reset}", - "yum": "Run {yellow}yum{reset} update dvc", - "yay": "Run {yellow}yay{reset} {blue}-S{reset} dvc", - "formula": "Run {yellow}brew{reset} upgrade dvc", - "cask": "Run {yellow}brew cask{reset} upgrade dvc", - "apt": ( - "Run {yellow}apt-get{reset} install" - " {blue}--only-upgrade{reset} dvc" - ), - "binary": ( - "To upgrade follow this steps:\n" - "1. Uninstall dvc binary\n" - "2. Go to {blue}https://dvc.org{reset}\n" - "3. Download and install new binary" - ), - "conda": "Run {yellow}conda{reset} {update}update{reset} dvc", - None: ( - "Find the latest release at\n{blue}" - "https://github.com/iterative/dvc/releases/latest" - "{reset}" - ), + "pip": + "Run {yellow}pip{reset} install dvc {blue}--upgrade{reset}", + "yum": + "Run {yellow}yum{reset} update dvc", + "yay": + "Run {yellow}yay{reset} {blue}-S{reset} dvc", + "formula": + "Run {yellow}brew{reset} upgrade dvc", + "cask": + "Run {yellow}brew cask{reset} upgrade dvc", + "apt": ("Run {yellow}apt-get{reset} install" + " {blue}--only-upgrade{reset} dvc"), + "binary": ("To upgrade follow this steps:\n" + "1. Uninstall dvc binary\n" + "2. Go to {blue}https://dvc.org{reset}\n" + "3. Download and install new binary"), + "conda": + "Run {yellow}conda{reset} {update}update{reset} dvc", + None: ("Find the latest release at\n{blue}" + "https://github.com/iterative/dvc/releases/latest" + "{reset}"), } package_manager = self._get_package_manager()