Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-26378: Refactor S3/WebDAV datastores #355

Merged
merged 14 commits into from
Aug 20, 2020
24 changes: 12 additions & 12 deletions python/lsst/daf/butler/_butlerConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,22 @@ def __init__(self, other=None, searchPaths=None):
return

if isinstance(other, str):
# This will only allow supported schemes
uri = ButlerURI(other)
if uri.scheme == "file" or not uri.scheme:
# Check explicitly that we have a directory
if os.path.isdir(uri.ospath):
other = os.path.join(uri.ospath, "butler.yaml")
elif uri.scheme == "s3":
if not uri.dirLike and "." not in uri.basename():
uri = ButlerURI(other, forceDirectory=True)
uri.updateFile("butler.yaml")
other = uri.geturl()
elif uri.scheme == "https":
if not uri.dirLike and "." not in uri.basename():
uri = ButlerURI(other, forceDirectory=True)
uri.updateFile("butler.yaml")
other = uri.geturl()
other = uri.join("butler.yaml")
else:
raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
# For generic URI assume that we have a directory
# if the basename does not have a file extension
# This heuristic is needed since we can not rely on
# external users to include the trailing / and we
# can't always check that the remote resource is a directory.
if not uri.dirLike and not uri.getExtension():
# Force to a directory and add the default config name
uri = ButlerURI(other, forceDirectory=True).join("butler.yaml")
other = uri

# Create an empty config for us to populate
super().__init__()
Expand Down
68 changes: 56 additions & 12 deletions python/lsst/daf/butler/core/_butlerUri.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,10 @@ class ButlerURI:
uri : `str` or `urllib.parse.ParseResult`
URI in string form. Can be scheme-less if referring to a local
filesystem path.
root : `str`, optional
root : `str` or `ButlerURI`, optional
When fixing up a relative path in a ``file`` scheme or if scheme-less,
use this as the root. Must be absolute. If `None` the current
working directory will be used.
working directory will be used. Can be a file URI.
forceAbsolute : `bool`, optional
If `True`, scheme-less relative URI will be converted to an absolute
path using a ``file`` scheme. If `False` scheme-less URI will remain
Expand Down Expand Up @@ -207,7 +207,7 @@ class ButlerURI:
_uri: urllib.parse.ParseResult

def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI],
root: Optional[str] = None, forceAbsolute: bool = True,
root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True,
forceDirectory: bool = False) -> ButlerURI:
parsed: urllib.parse.ParseResult
dirLike: bool
Expand Down Expand Up @@ -595,6 +595,17 @@ def mkdir(self) -> None:
"""
raise NotImplementedError()

def size(self) -> int:
"""For non-dir-like URI, return the size of the resource.

Returns
-------
sz : `int`
The size in bytes of the resource associated with this URI.
Returns 0 if dir-like.
"""
raise NotImplementedError()

def __str__(self) -> str:
return self.geturl()

Expand All @@ -618,7 +629,7 @@ def __getnewargs__(self) -> Tuple:
return (str(self),)

@staticmethod
def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
forceAbsolute: bool = False,
forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
"""Correct any issues with the supplied URI.
Expand All @@ -627,7 +638,7 @@ def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
----------
parsed : `~urllib.parse.ParseResult`
The result from parsing a URI using `urllib.parse`.
root : `str`, ignored
root : `str` or `ButlerURI`, ignored
Not used by the this implementation since all URIs are
absolute except for those representing the local file system.
forceAbsolute : `bool`, ignored.
Expand Down Expand Up @@ -730,6 +741,14 @@ def exists(self) -> bool:
# to a file that no longer exists this will return False
return os.path.exists(self.ospath)

def size(self) -> int:
if not os.path.isdir(self.ospath):
stat = os.stat(self.ospath)
sz = stat.st_size
else:
sz = 0
return sz

def remove(self) -> None:
"""Remove the resource."""
os.remove(self.ospath)
Expand Down Expand Up @@ -968,7 +987,7 @@ def transfer_from(self, src: ButlerURI, transfer: str,
os.remove(local_src)

@staticmethod
def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
forceAbsolute: bool = False,
forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
"""Fix up relative paths in URI instances.
Expand All @@ -977,10 +996,10 @@ def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
----------
parsed : `~urllib.parse.ParseResult`
The result from parsing a URI using `urllib.parse`.
root : `str`, optional
root : `str` or `ButlerURI`, optional
Path to use as root when converting relative to absolute.
If `None`, it will be the current working directory. This
is a local file system path, not a URI. It is only used if
is a local file system path, or a file URI. It is only used if
a file-scheme is used incorrectly with a relative path.
forceAbsolute : `bool`, ignored
Has no effect for this subclass. ``file`` URIs are always
Expand Down Expand Up @@ -1037,6 +1056,10 @@ def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,

if root is None:
root = os.path.abspath(os.path.curdir)
elif isinstance(root, ButlerURI):
if root.scheme and root.scheme != "file":
raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
root = os.path.abspath(root.ospath)

replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))

Expand Down Expand Up @@ -1071,14 +1094,22 @@ def exists(self) -> bool:
exists, _ = s3CheckFileExists(self, client=self.client)
return exists

def size(self) -> int:
# s3utils itself imports ButlerURI so defer this import
from .s3utils import s3CheckFileExists
if self.dirLike:
return 0
_, sz = s3CheckFileExists(self, client=self.client)
return sz

def remove(self) -> None:
"""Remove the resource."""

# https://github.com/boto/boto3/issues/507 - there is no
# way of knowing if the file was actually deleted except
# for checking all the keys again, reponse is HTTP 204 OK
# response all the time
self.client.delete(Bucket=self.netloc, Key=self.relativeToPathRoot)
self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot)

def read(self, size: int = -1) -> bytes:
args = {}
Expand Down Expand Up @@ -1224,6 +1255,15 @@ def exists(self) -> bool:

return True if r.status_code == 200 else False

def size(self) -> int:
if self.dirLike:
return 0
r = self.session.head(self.geturl())
if r.status_code == 200:
return int(r.headers['Content-Length'])
else:
raise FileNotFoundError(f"Resource {self} does not exist")

def mkdir(self) -> None:
"""For a dir-like URI, create the directory resource if it does not
already exist.
Expand Down Expand Up @@ -1407,7 +1447,7 @@ def _force_to_file(self) -> ButlerFileURI:
return ButlerURI(uri, forceDirectory=self.dirLike) # type: ignore

@staticmethod
def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
forceAbsolute: bool = False,
forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
"""Fix up relative paths for local file system.
Expand All @@ -1416,10 +1456,10 @@ def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
----------
parsed : `~urllib.parse.ParseResult`
The result from parsing a URI using `urllib.parse`.
root : `str`, optional
root : `str` or `ButlerURI`, optional
Path to use as root when converting relative to absolute.
If `None`, it will be the current working directory. This
is a local file system path, not a URI.
is a local file system path, or a file URI.
forceAbsolute : `bool`, optional
If `True`, scheme-less relative URI will be converted to an
absolute path using a ``file`` scheme. If `False` scheme-less URI
Expand Down Expand Up @@ -1454,6 +1494,10 @@ def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,

if root is None:
root = os.path.abspath(os.path.curdir)
elif isinstance(root, ButlerURI):
if root.scheme and root.scheme != "file":
raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
root = os.path.abspath(root.ospath)

# this is a local OS file path which can support tilde expansion.
# we quoted it in the constructor so unquote here
Expand Down
6 changes: 3 additions & 3 deletions python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,11 @@ class Config(collections.abc.MutableMapping):

Parameters
----------
other : `str` or `Config` or `dict`
other : `str` or `Config` or `dict` or `ButlerURI`
Other source of configuration, can be:

- (`str`) Treated as a path to a config file on disk. Must end with
".yaml".
- (`str` or `ButlerURI`) Treated as a URI to a config file. Must end
with ".yaml".
- (`Config`) Copies the other Config's values into this one.
- (`dict`) Copies the values from the dict into this Config.

Expand Down
8 changes: 7 additions & 1 deletion python/lsst/daf/butler/core/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def __init__(self, datastoreRootUri: Union[ButlerURI, str], path: str):
# Internal cache of the full location as a ButlerURI
self._uri: Optional[ButlerURI] = None

# Check that the resulting URI is inside the datastore
# This can go wrong if we were given ../dir as path
pathInStore = self.uri.relative_to(self._datastoreRootUri)
if pathInStore is None:
raise ValueError(f"Unexpectedly {path} jumps out of {self._datastoreRootUri}")

def __str__(self) -> str:
return str(self.uri)

Expand Down Expand Up @@ -194,7 +200,7 @@ class LocationFactory:
be treated as a posixpath but then converted to an absolute path.
"""

def __init__(self, datastoreRoot: str):
def __init__(self, datastoreRoot: Union[ButlerURI, str]):
self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True,
forceDirectory=True)

Expand Down