Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions dvc/path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,79 @@ class CloudURLInfo(URLInfo):
@property
def path(self):
return self._spath.lstrip("/")


class HTTPURLInfo(URLInfo):
def __init__(self, url):
p = urlparse(url)
stripped = p._replace(params=None, query=None, fragment=None)
super().__init__(stripped.geturl())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we use restringification? May use .from_parts() or .fill_parts().

Copy link
Contributor Author

@casperdcl casperdcl Mar 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More future-proof to use super() to use the parent's logic. Otherwise we'd need

p = urlparse(url)
-stripped = p._replace(params=None, query=None, fragment=None)
-super().__init__(stripped.geturl())
+assert p.password is None
+self.fill_parts(p.scheme, p.hostname, p.username, p.port, p.path)

and not call super(), which is allowed but not great practice.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First, we can ignore all of this as long as it works. Some tech debt, but might be resolved later.

Needing to parse, restringify and parse is a kludge.

self.params = p.params
self.query = p.query
self.fragment = p.fragment

@classmethod
def from_parts(
cls,
scheme=None,
host=None,
user=None,
port=None,
path="",
netloc=None,
params=None,
query=None,
fragment=None,
):
assert bool(host) ^ bool(netloc)

if netloc is not None:
return cls(
"{}://{}{}{}{}{}".format(
scheme,
netloc,
path,
(";" + params) if params else "",
("?" + query) if query else "",
("#" + fragment) if fragment else "",
)
)

obj = cls.__new__(cls)
obj.fill_parts(scheme, host, user, port, path)
obj.params = params
obj.query = query
obj.fragment = fragment
Comment on lines +283 to +286
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't bother overriding fill_parts as it's not used publicly anywhere else

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then don't. It used via .replace(), .__div__(), .parent, If the inherited implementation works then you may skip though.

Copy link
Contributor Author

@casperdcl casperdcl Mar 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those use from_parts (which would use this overridden method). The point is nothing else explicitly uses fill_parts directly so there's no need to override fill_parts for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You keep saying don't need to override, but you override. Not sure I understand what you are up to here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean technically the correct way to do things would be:

def fill_parts(self, scheme, host, user, port, path, params, query, fragment):
    super().fill_parts(self, scheme, host, user, port, path)
    self.params = params
    self.query = query
    self.fragment = fragment

but this isn't required right now as fill_parts is really a private method not used anywhere else.

return obj

@property
def _extra_parts(self):
return (self.params, self.query, self.fragment)

@property
def parts(self):
return self._base_parts + self._path.parts + self._extra_parts

@cached_property
def url(self):
return "{}://{}{}{}{}{}".format(
self.scheme,
self.netloc,
self._spath,
(";" + self.params) if self.params else "",
("?" + self.query) if self.query else "",
("#" + self.fragment) if self.fragment else "",
)

def __eq__(self, other):
if isinstance(other, (str, bytes)):
other = self.__class__(other)
return (
self.__class__ == other.__class__
and self._base_parts == other._base_parts
and self._path == other._path
and self._extra_parts == other._extra_parts
)

def __hash__(self):
return hash(self.parts)
2 changes: 2 additions & 0 deletions dvc/remote/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from funcy import cached_property, memoize, wrap_prop, wrap_with

from dvc.path_info import HTTPURLInfo
import dvc.prompt as prompt
from dvc.config import ConfigError
from dvc.exceptions import DvcException, HTTPError
Expand All @@ -25,6 +26,7 @@ def ask_password(host, user):

class RemoteHTTP(RemoteBASE):
scheme = Schemes.HTTP
path_cls = HTTPURLInfo
SESSION_RETRIES = 5
SESSION_BACKOFF_FACTOR = 0.1
REQUEST_TIMEOUT = 10
Expand Down
13 changes: 12 additions & 1 deletion tests/unit/test_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from dvc.path_info import CloudURLInfo
from dvc.path_info import HTTPURLInfo
from dvc.path_info import PathInfo
from dvc.path_info import URLInfo

Expand Down Expand Up @@ -44,13 +45,23 @@ def test_url_info_parents(cls):
]


@pytest.mark.parametrize("cls", [URLInfo, CloudURLInfo])
@pytest.mark.parametrize("cls", [URLInfo, CloudURLInfo, HTTPURLInfo])
def test_url_info_deepcopy(cls):
u1 = cls("ssh://user@test.com:/test1/test2/test3")
u2 = copy.deepcopy(u1)
assert u1 == u2


def test_https_url_info_str():
url = "https://user@test.com/test1;p=par?q=quer#frag"
u = HTTPURLInfo(url)
assert u.url == url
assert str(u) == u.url
assert u.params == "p=par"
assert u.query == "q=quer"
assert u.fragment == "frag"


@pytest.mark.parametrize(
"path, as_posix, osname",
[
Expand Down