From 48495d027963073fe260f596d85c3c179fee037f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 2 Mar 2020 18:30:09 +0000 Subject: [PATCH 1/8] import-url: allow queries in URL Fixes #3424 --- dvc/path_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/path_info.py b/dvc/path_info.py index 6b4c300b5e..11fdf014f8 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -112,7 +112,7 @@ class URLInfo(_BasePath): def __init__(self, url): p = urlparse(url) - assert not p.query and not p.params and not p.fragment + assert not p.params and not p.fragment assert p.password is None self.fill_parts(p.scheme, p.hostname, p.username, p.port, p.path) From 84e41d51bf73809574957b923ac43f7186dc8ff7 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 11 Mar 2020 16:08:16 +0000 Subject: [PATCH 2/8] Revert "import-url: allow queries in URL" This reverts commit 48495d027963073fe260f596d85c3c179fee037f. --- dvc/path_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/path_info.py b/dvc/path_info.py index 11fdf014f8..6b4c300b5e 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -112,7 +112,7 @@ class URLInfo(_BasePath): def __init__(self, url): p = urlparse(url) - assert not p.params and not p.fragment + assert not p.query and not p.params and not p.fragment assert p.password is None self.fill_parts(p.scheme, p.hostname, p.username, p.port, p.path) From 650c2b4b3d570e4f6309a5a66d99a7f59ecf3a6a Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 11 Mar 2020 16:48:04 +0000 Subject: [PATCH 3/8] path_info: add HTTPURLInfo base --- dvc/path_info.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/dvc/path_info.py b/dvc/path_info.py index 6b4c300b5e..c13915b6a2 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -241,3 +241,47 @@ class CloudURLInfo(URLInfo): @property def path(self): return self._spath.lstrip("/") + + +class HTTPURLInfo(URLInfo): + def __init__(self, url): + p = urlparse(url) + stripped = p._replace(params=None, query=None, fragment=None) + super().__init__(stripped.geturl()) + self.params = p.params + self.query = p.query + self.fragment = p.fragment + + @classmethod + def from_parts( + cls, + scheme=None, + host=None, + user=None, + port=None, + path="", + netloc=None, + params=None, + query=None, + fragment=None, + ): + assert bool(host) ^ bool(netloc) + + if netloc is not None: + return cls( + "{}://{}{}{}{}{}".format( + scheme, + netloc, + path, + (";" + params) if params else "", + ("?" + query) if query else "", + ("#" + fragment) if fragment else "", + ) + ) + + obj = cls.__new__(cls) + obj.fill_parts(scheme, host, user, port, path) + obj.params = params + obj.query = query + obj.fragment = fragment + return obj From dd4d090ba060da0a77e56d802c3812f4d9a49176 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 12 Mar 2020 20:17:18 +0000 Subject: [PATCH 4/8] path_info: HTTPURLInfo complete --- dvc/path_info.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/dvc/path_info.py b/dvc/path_info.py index c13915b6a2..9fea04e4a3 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -285,3 +285,32 @@ def from_parts( obj.query = query obj.fragment = fragment return obj + + @property + def _extra_parts(self): + return (self.params, self.query, self.fragment) + + @property + def parts(self): + return self._base_parts + self._path.parts + self._extra_parts + + @cached_property + def url(self): + return "{}://{}{}{}{}{}".format( + self.scheme, + self.netloc, + self._spath, + (";" + self.params) if self.params else "", + ("?" + self.query) if self.query else "", + ("#" + self.fragment) if self.fragment else "", + ) + + def __eq__(self, other): + if isinstance(other, (str, bytes)): + other = self.__class__(other) + return ( + self.__class__ == other.__class__ + and self._base_parts == other._base_parts + and self._path == other._path + and self._extra_parts == other._extra_parts + ) From ca2d90f23f7d511b0df69bf8c0cc3f16ddb500a6 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 12 Mar 2020 21:08:44 +0000 Subject: [PATCH 5/8] remote: http(s): support params, queries, fragments Fixes #3424 Closes #3432 --- dvc/path_info.py | 3 +++ dvc/remote/http.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/dvc/path_info.py b/dvc/path_info.py index 9fea04e4a3..613a76c014 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -314,3 +314,6 @@ def __eq__(self, other): and self._path == other._path and self._extra_parts == other._extra_parts ) + + def __hash__(self): + return hash(self.parts) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index d0f35029bb..fa4966685e 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -4,6 +4,7 @@ from funcy import cached_property, memoize, wrap_prop, wrap_with +from dvc.path_info import HTTPURLInfo import dvc.prompt as prompt from dvc.config import ConfigError from dvc.exceptions import DvcException, HTTPError @@ -25,6 +26,7 @@ def ask_password(host, user): class RemoteHTTP(RemoteBASE): scheme = Schemes.HTTP + path_cls = HTTPURLInfo SESSION_RETRIES = 5 SESSION_BACKOFF_FACTOR = 0.1 REQUEST_TIMEOUT = 10 From 4031ab0758e52303715b9832ac6e8bbecbeb814e Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 12 Mar 2020 22:06:43 +0000 Subject: [PATCH 6/8] tests: path_info: HTTPURLInfo --- tests/unit/test_path_info.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_path_info.py b/tests/unit/test_path_info.py index 99476ff951..875408d716 100644 --- a/tests/unit/test_path_info.py +++ b/tests/unit/test_path_info.py @@ -4,6 +4,7 @@ import pytest from dvc.path_info import CloudURLInfo +from dvc.path_info import HTTPURLInfo from dvc.path_info import PathInfo from dvc.path_info import URLInfo @@ -44,13 +45,21 @@ def test_url_info_parents(cls): ] -@pytest.mark.parametrize("cls", [URLInfo, CloudURLInfo]) +@pytest.mark.parametrize("cls", [URLInfo, CloudURLInfo, HTTPURLInfo]) def test_url_info_deepcopy(cls): u1 = cls("ssh://user@test.com:/test1/test2/test3") u2 = copy.deepcopy(u1) assert u1 == u2 +@pytest.mark.parametrize("cls", [HTTPURLInfo]) +def test_https_url_info_str(cls): + url = "https://user@test.com/test1;p=par?q=quer#frag" + u = cls("https://user@test.com/test1;p=par?q=quer#frag") + assert u.url == url + assert str(u) == u.url + + @pytest.mark.parametrize( "path, as_posix, osname", [ From b6d79300884d2114b341f22058cc4c1593fcbc23 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 12 Mar 2020 22:07:42 +0000 Subject: [PATCH 7/8] lint --- tests/unit/test_path_info.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_path_info.py b/tests/unit/test_path_info.py index 875408d716..64421d1157 100644 --- a/tests/unit/test_path_info.py +++ b/tests/unit/test_path_info.py @@ -52,10 +52,9 @@ def test_url_info_deepcopy(cls): assert u1 == u2 -@pytest.mark.parametrize("cls", [HTTPURLInfo]) -def test_https_url_info_str(cls): +def test_https_url_info_str(): url = "https://user@test.com/test1;p=par?q=quer#frag" - u = cls("https://user@test.com/test1;p=par?q=quer#frag") + u = HTTPURLInfo(url) assert u.url == url assert str(u) == u.url From 6e325d79c958f556c8f40e6f3a3fd03d67450dce Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 12 Mar 2020 22:12:20 +0000 Subject: [PATCH 8/8] tests: path_info: HTTPURLInfo Slightly more thorough testing --- tests/unit/test_path_info.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/test_path_info.py b/tests/unit/test_path_info.py index 64421d1157..0b202fa124 100644 --- a/tests/unit/test_path_info.py +++ b/tests/unit/test_path_info.py @@ -57,6 +57,9 @@ def test_https_url_info_str(): u = HTTPURLInfo(url) assert u.url == url assert str(u) == u.url + assert u.params == "p=par" + assert u.query == "q=quer" + assert u.fragment == "frag" @pytest.mark.parametrize(