From dfc2cf7e03b3eead1ccf59011ea05adc4f32804f Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Wed, 5 Aug 2020 15:34:51 +0900 Subject: [PATCH 1/5] tests: test importing into subdirectory --- tests/func/test_import.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 4e38f70d89..216622b42e 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -132,6 +132,23 @@ def test_import_file_from_dir(tmp_dir, scm, dvc, erepo_dir): assert (tmp_dir / "X.dvc").exists() +@pytest.mark.parametrize("dir_exists", [True, False]) +def test_import_file_from_dir_to_dir(tmp_dir, scm, dvc, erepo_dir, dir_exists): + with erepo_dir.chdir(): + erepo_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="create dir") + if dir_exists: + tmp_dir.gen({"dir": {}}) + + dvc.imp( + os.fspath(erepo_dir), + os.path.join("dir", "foo"), + out=os.path.join("dir", "foo"), + ) + assert not (tmp_dir / "foo.dvc").exists() + assert (tmp_dir / "dir" / "foo").read_text() == "foo" + assert (tmp_dir / "dir" / "foo.dvc").exists() + + def test_import_non_cached(erepo_dir, tmp_dir, dvc, scm): src = "non_cached_output" dst = src + "_imported" From d9b7e8f2dc9f9acd36f3db4180904205bf6ec22b Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Wed, 5 Aug 2020 15:37:47 +0900 Subject: [PATCH 2/5] import: fix bug when output subdir doesnt already exist --- dvc/repo/imp_url.py | 5 ++++- dvc/utils/__init__.py | 8 +++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dvc/repo/imp_url.py b/dvc/repo/imp_url.py index c20e57e653..fde70f1ac5 100644 --- a/dvc/repo/imp_url.py +++ b/dvc/repo/imp_url.py @@ -2,7 +2,7 @@ from dvc.repo.scm_context import scm_context from dvc.utils import relpath, resolve_output, resolve_paths -from dvc.utils.fs import path_isin +from dvc.utils.fs import makedirs, path_isin from ..exceptions import OutputDuplicationError from . import locked @@ -27,6 +27,9 @@ def imp_url( ): url = relpath(url, wdir) + if not os.path.exists(wdir): + makedirs(wdir) + stage = create_stage( Stage, self, diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 95e1278768..8c5811ced4 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -373,11 +373,9 @@ def resolve_paths(repo, out): # NOTE: `out` might not exist yet, so using `dirname`(aka `wdir`) to check # if it is a local path. - if ( - os.path.exists(dirname) # out might not exist yet, so - and PathInfo(abspath).isin_or_eq(repo.root_dir) - and not contains_symlink_up_to(abspath, repo.root_dir) - ): + if PathInfo(abspath).isin_or_eq( + repo.root_dir + ) and not contains_symlink_up_to(abspath, repo.root_dir): wdir = dirname out = base else: From d4de5055af5b23eec7e685308b54b5de9b473fac Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Wed, 5 Aug 2020 15:53:30 +0900 Subject: [PATCH 3/5] use urlparse to determine if output path is local, not os.path.exists --- dvc/utils/__init__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 8c5811ced4..9e6f4a137b 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -363,6 +363,7 @@ def resolve_output(inp, out): def resolve_paths(repo, out): + from urllib.parse import urlparse from ..dvcfile import DVC_FILE_SUFFIX from ..path_info import PathInfo from .fs import contains_symlink_up_to @@ -371,11 +372,11 @@ def resolve_paths(repo, out): dirname = os.path.dirname(abspath) base = os.path.basename(os.path.normpath(out)) - # NOTE: `out` might not exist yet, so using `dirname`(aka `wdir`) to check - # if it is a local path. - if PathInfo(abspath).isin_or_eq( - repo.root_dir - ) and not contains_symlink_up_to(abspath, repo.root_dir): + if ( + not urlparse(out).scheme + and PathInfo(abspath).isin_or_eq(repo.root_dir) + and not contains_symlink_up_to(abspath, repo.root_dir) + ): wdir = dirname out = base else: From 7c51b2f6be46b48cd3c10dbc0f75558316cb8b1a Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Wed, 5 Aug 2020 16:52:17 +0900 Subject: [PATCH 4/5] account for windows drive letters --- dvc/utils/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 9e6f4a137b..8fece228ff 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -368,13 +368,17 @@ def resolve_paths(repo, out): from ..path_info import PathInfo from .fs import contains_symlink_up_to - abspath = os.path.abspath(out) + abspath = PathInfo(os.path.abspath(out)) dirname = os.path.dirname(abspath) base = os.path.basename(os.path.normpath(out)) + scheme = urlparse(out).scheme + if os.name == "nt" and scheme == abspath.drive[0].lower(): + # urlparse interprets windows drive letters as URL scheme + scheme = "" if ( - not urlparse(out).scheme - and PathInfo(abspath).isin_or_eq(repo.root_dir) + not scheme + and abspath.isin_or_eq(repo.root_dir) and not contains_symlink_up_to(abspath, repo.root_dir) ): wdir = dirname From e432438a771b91fb79ca5b769dfb4739300f1efa Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 6 Aug 2020 15:03:06 +0900 Subject: [PATCH 5/5] import: error out if output subdirectory does not exist --- dvc/repo/imp_url.py | 5 +---- tests/func/test_import.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/dvc/repo/imp_url.py b/dvc/repo/imp_url.py index fde70f1ac5..c20e57e653 100644 --- a/dvc/repo/imp_url.py +++ b/dvc/repo/imp_url.py @@ -2,7 +2,7 @@ from dvc.repo.scm_context import scm_context from dvc.utils import relpath, resolve_output, resolve_paths -from dvc.utils.fs import makedirs, path_isin +from dvc.utils.fs import path_isin from ..exceptions import OutputDuplicationError from . import locked @@ -27,9 +27,6 @@ def imp_url( ): url = relpath(url, wdir) - if not os.path.exists(wdir): - makedirs(wdir) - stage = create_stage( Stage, self, diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 216622b42e..5af2247750 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -9,6 +9,7 @@ from dvc.config import NoRemoteError from dvc.dvcfile import Dvcfile from dvc.exceptions import DownloadError, PathMissingError +from dvc.stage.exceptions import StagePathNotFoundError from dvc.system import System from dvc.utils.fs import makedirs, remove @@ -132,13 +133,18 @@ def test_import_file_from_dir(tmp_dir, scm, dvc, erepo_dir): assert (tmp_dir / "X.dvc").exists() -@pytest.mark.parametrize("dir_exists", [True, False]) -def test_import_file_from_dir_to_dir(tmp_dir, scm, dvc, erepo_dir, dir_exists): +def test_import_file_from_dir_to_dir(tmp_dir, scm, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="create dir") - if dir_exists: - tmp_dir.gen({"dir": {}}) + with pytest.raises(StagePathNotFoundError): + dvc.imp( + os.fspath(erepo_dir), + os.path.join("dir", "foo"), + out=os.path.join("dir", "foo"), + ) + + tmp_dir.gen({"dir": {}}) dvc.imp( os.fspath(erepo_dir), os.path.join("dir", "foo"),