Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-24290: PostgreSQL database handles case when namespace is not provided incorrectly. #247

Merged
merged 10 commits into from
May 5, 2020
4 changes: 2 additions & 2 deletions config/datastores/s3Datastore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ datastore:
cls: lsst.daf.butler.datastores.s3Datastore.S3Datastore
root: <butlerRoot>/datastore
records:
table: s3datastorerecords
table: s3_datastore_records
create: true
templates:
# valid_first and valid_last here are YYYYMMDD; we assume we'll switch to
# MJD (DM-15890) before we need more than day resolution, since that's all
# Gen2 has.
default: "{collection:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{label:?}/{abstract_filter:?}/{subfilter:?}/{physical_filter:?}/{visit:?}/{datasetType}_{component:?}_{tract:?}_{patch:?}_{label:?}_{abstract_filter:?}_{physical_filter:?}_{calibration_label:?}_{visit:?}_{exposure:?}_{detector:?}_{instrument:?}_{skymap:?}_{skypix:?}_{run}"
default: "{run:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{label:?}/{abstract_filter:?}/{subfilter:?}/{physical_filter:?}/{visit:?}/{datasetType}_{component:?}_{tract:?}_{patch:?}_{label:?}_{abstract_filter:?}_{physical_filter:?}_{calibration_label:?}_{visit:?}_{exposure:?}_{detector:?}_{instrument:?}_{skymap:?}_{skypix:?}_{run}"
formatters: !include formatters.yaml
composites: !include ../composites.yaml
20 changes: 12 additions & 8 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
from .core.repoRelocation import BUTLER_ROOT_TAG
from .core.safeFileIo import safeMakeDir
from .core.utils import transactional, getClassOf
from .core.s3utils import bucketExists
from ._deferredDatasetHandle import DeferredDatasetHandle
from ._butlerConfig import ButlerConfig
from .registry import Registry, RegistryConfig, CollectionType
Expand Down Expand Up @@ -260,9 +261,9 @@ def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: boo

Parameters
----------
root : `str`
Filesystem path to the root of the new repository. Will be created
if it does not exist.
root : `str` or `ButlerURI`
Path or URI to the root location of the new repository. Will be
created if it does not exist.
config : `Config` or `str`, optional
Configuration to write to the repository, after setting any
root-dependent Registry or Datastore config options. Can not
Expand Down Expand Up @@ -329,15 +330,18 @@ def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: boo

# for "file" schemes we are assuming POSIX semantics for paths, for
# schemeless URIs we are assuming os.path semantics.
uri = ButlerURI(root)
uri = ButlerURI(root, forceDirectory=True)
if uri.scheme == "file" or not uri.scheme:
if not os.path.isdir(uri.ospath):
safeMakeDir(uri.ospath)
elif uri.scheme == "s3":
s3 = boto3.resource("s3")
# implies bucket exists, if not another level of checks
bucket = s3.Bucket(uri.netloc)
bucket.put_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
# bucket must already exist
if not bucketExists(uri.netloc):
raise ValueError(f"Bucket {uri.netloc} does not exist!")
s3 = boto3.client("s3")
# don't create S3 key when root is at the top-level of an Bucket
if not uri.path == "/":
s3.put_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
else:
raise ValueError(f"Unrecognized scheme: {uri.scheme}")
config = Config(config)
Expand Down
14 changes: 9 additions & 5 deletions python/lsst/daf/butler/_butlerConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
__all__ = ("ButlerConfig",)

import os.path
import posixpath

from .core import (
ButlerURI,
Expand Down Expand Up @@ -86,9 +85,9 @@ def __init__(self, other=None, searchPaths=None):
if os.path.isdir(uri.ospath):
other = os.path.join(uri.ospath, "butler.yaml")
elif uri.scheme == "s3":
head, filename = posixpath.split(uri.path)
if "." not in filename:
uri.updateFile("butler.yaml")
if not uri.dirLike and "." not in uri.basename():
uri = ButlerURI(other, forceDirectory=True)
uri.updateFile("butler.yaml")
other = uri.geturl()
else:
raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
Expand All @@ -102,7 +101,12 @@ def __init__(self, other=None, searchPaths=None):

configFile = butlerConfig.configFile
if configFile is not None:
self.configDir = os.path.dirname(os.path.abspath(configFile))
uri = ButlerURI(configFile)
if uri.scheme == "s3":
uri.updateFile("")
self.configDir = uri.geturl()
else:
self.configDir = os.path.dirname(os.path.abspath(configFile))

# A Butler config contains defaults defined by each of the component
# configuration classes. We ask each of them to apply defaults to
Expand Down
9 changes: 4 additions & 5 deletions python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import sys
from yaml.representer import Representer
import io
import posixpath
from typing import Sequence, Optional, ClassVar

try:
Expand Down Expand Up @@ -251,6 +250,7 @@ def __initFromFile(self, path):
self.__initFromYamlFile(uri.ospath)
else:
raise RuntimeError("Unhandled config file type:%s" % uri)
self.configFile = str(path)
DinoBektesevic marked this conversation as resolved.
Show resolved Hide resolved

def __initFromS3YamlFile(self, url):
"""Load a file at a given S3 Bucket uri and attempts to load it from
Expand Down Expand Up @@ -290,7 +290,6 @@ def __initFromYamlFile(self, path):
log.debug("Opening YAML config file: %s", path)
with open(path, "r") as f:
self.__initFromYaml(f)
self.configFile = path

def __initFromYaml(self, stream):
"""Loads a YAML config from any readable stream that contains one.
Expand Down Expand Up @@ -807,9 +806,9 @@ def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml",
uri = ButlerURI(os.path.join(uri.ospath, defaultFileName))
self.dumpToFile(uri.ospath, overwrite=overwrite)
elif uri.scheme == "s3":
head, filename = posixpath.split(uri.path)
if "." not in filename:
uri.updateFile(defaultFileName)
if not uri.dirLike and "." not in uri.basename():
uri = ButlerURI(uri.geturl(), forceDirectory=True)
uri.updateFile(defaultFileName)
self.dumpToS3File(uri, overwrite=overwrite)
else:
raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
Expand Down
130 changes: 113 additions & 17 deletions python/lsst/daf/butler/core/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,24 @@ class ButlerURI:
If `True`, scheme-less relative URI will be converted to an absolute
path using a ``file`` scheme. If `False` scheme-less URI will remain
scheme-less and will not be updated to ``file`` or absolute path.
forceDirectory: `bool`, optional
If `True` forces the URI to end with a separator, otherwise given URI
is interpreted as is.
"""

def __init__(self, uri, root=None, forceAbsolute=True):
def __init__(self, uri, root=None, forceAbsolute=True, forceDirectory=False):
if isinstance(uri, str):
parsed = urllib.parse.urlparse(uri)
elif isinstance(uri, urllib.parse.ParseResult):
parsed = copy.copy(uri)
else:
raise ValueError("Supplied URI must be either string or ParseResult")

parsed = self._fixupFileUri(parsed, root=root, forceAbsolute=forceAbsolute)
parsed, dirLike = self._fixupPathUri(parsed, root=root,
forceAbsolute=forceAbsolute,
forceDirectory=forceDirectory)

self.dirLike = dirLike
self._uri = parsed

@property
Expand Down Expand Up @@ -159,7 +166,10 @@ def relativeToPathRoot(self):
p = PurePath(self.path)
else:
p = PurePosixPath(self.path)
return str(p.relative_to(p.root))
relToRoot = str(p.relative_to(p.root))
if self.dirLike and not relToRoot.endswith("/"):
relToRoot += "/"
return relToRoot

@property
def fragment(self):
Expand All @@ -186,6 +196,55 @@ def geturl(self):
"""
return self._uri.geturl()

def split(self):
"""Splits URI into head and tail. Equivalent to os.path.split where
head preserves the URI components.

Returns
-------
head: `ButlerURI`
Everything leading up to tail, expanded and normalized as per
ButlerURI rules.
tail : `str`
Last `self.path` component. Tail will be empty if path ends on a
separator. Tail will never contain separators.
"""
if self.scheme:
head, tail = posixpath.split(self.path)
else:
head, tail = os.path.split(self.path)
headuri = self._uri._replace(path=head)
return self.__class__(headuri, forceDirectory=True), tail

def basename(self):
"""Returns the base name, last element of path, of the URI. If URI ends
on a slash returns an empty string. This is the second element returned
by split().

Equivalent of os.path.basename().

Returns
-------
tail : `str`
Last part of the path attribute. Trail will be empty if path ends
on a separator.
"""
return self.split()[1]

def dirname(self):
"""Returns a ButlerURI containing all the directories of the path
attribute.

Equivalent of os.path.dirname()

Returns
-------
head : `ButlerURI`
Everything except the tail of path attribute, expanded and
normalized as per ButlerURI rules.
"""
return self.split()[0]

def replace(self, **kwargs):
"""Replace components in a URI with new values and return a new
instance.
Expand All @@ -209,6 +268,7 @@ def updateFile(self, newfile):
Notes
-----
Updates the URI in place.
Updates the ButlerURI.dirLike attribute.
"""
if self.scheme:
# POSIX
Expand All @@ -219,14 +279,15 @@ def updateFile(self, newfile):
dir, _ = pathclass.split(self.path)
newpath = pathclass.join(dir, newfile)

self.dirLike = False
self._uri = self._uri._replace(path=newpath)

def __str__(self):
return self.geturl()

@staticmethod
def _fixupFileUri(parsed, root=None, forceAbsolute=False):
"""Fix up relative paths in file URI instances.
def _fixupPathUri(parsed, root=None, forceAbsolute=False, forceDirectory=False):
"""Fix up relative paths in URI instances.

Parameters
----------
Expand All @@ -236,17 +297,23 @@ def _fixupFileUri(parsed, root=None, forceAbsolute=False):
Path to use as root when converting relative to absolute.
If `None`, it will be the current working directory. This
is a local file system path, not a URI.
forceAbsolute : `bool`
forceAbsolute : `bool`, optional
If `True`, scheme-less relative URI will be converted to an
absolute path using a ``file`` scheme. If `False` scheme-less URI
will remain scheme-less and will not be updated to ``file`` or
absolute path. URIs with a defined scheme will not be affected
by this parameter.
forceDirectory : `bool`, optional
If `True` forces the URI to end with a separator, otherwise given
URI is interpreted as is.

Returns
-------
modified : `~urllib.parse.ParseResult`
Update result if a file URI is being handled.
Update result if a URI is being handled.
dirLike : `bool`
`True` if given parsed URI has a trailing separator or
forceDirectory is True. Otherwise `False`.

Notes
-----
Expand All @@ -255,8 +322,13 @@ def _fixupFileUri(parsed, root=None, forceAbsolute=False):
to be turned into absolute paths before they can be used. This is
always done regardless of the ``forceAbsolute`` parameter.

AWS S3 differentiates between keys with trailing POSIX separators (i.e
`/dir` and `/dir/`) whereas POSIX does not neccessarily.

Scheme-less paths are normalized.
"""
# assume we are not dealing with a directory like URI
dirLike = False
if not parsed.scheme or parsed.scheme == "file":

# Replacement values for the URI
Expand All @@ -282,40 +354,63 @@ def _fixupFileUri(parsed, root=None, forceAbsolute=False):
# No change needed for relative local path staying relative
# except normalization
replacements["path"] = os.path.normpath(expandedPath)
# normalization of empty path returns "." so we are dirLike
if expandedPath == "":
dirLike = True

# normpath strips trailing "/" which makes it hard to keep
# track of directory vs file when calling replaceFile
# put it back.
# find the appropriate separator
if "scheme" in replacements:
sep = posixpath.sep
else:
sep = os.sep

if expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep):
# add the trailing separator only if explicitly required or
# if it was stripped by normpath. Acknowledge that trailing
# separator exists.
endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep)
if (forceDirectory or endsOnSep or dirLike):
dirLike = True
replacements["path"] += sep

elif parsed.scheme == "file":
# file URI implies POSIX path separators so split as POSIX,
# then join as os, and convert to abspath. Do not handle
# home directories since "file" scheme is explicitly documented
# to not do tilde expansion.
sep = posixpath.sep
if posixpath.isabs(parsed.path):
# No change needed
return copy.copy(parsed)
if forceDirectory:
parsed = parsed._replace(path=parsed.path+sep)
dirLike = True
return copy.copy(parsed), dirLike

replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))

# normpath strips trailing "/" so put it back if necessary
if parsed.path.endswith(posixpath.sep) and not replacements["path"].endswith(posixpath.sep):
replacements["path"] += posixpath.sep

# Acknowledge that trailing separator exists.
if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
replacements["path"] += sep
dirLike = True
else:
raise RuntimeError("Unexpectedly got confused by URI scheme")

# ParseResult is a NamedTuple so _replace is standard API
parsed = parsed._replace(**replacements)

return parsed
# URI is dir-like if explicitly stated or if it ends on a separator
endsOnSep = parsed.path.endswith(posixpath.sep)
if forceDirectory or endsOnSep:
dirLike = True
# only add the separator if it's not already there
if not endsOnSep:
parsed = parsed._replace(path=parsed.path+posixpath.sep)

if dirLike is None:
raise RuntimeError("ButlerURI.dirLike attribute not set successfully.")

return parsed, dirLike


class Location:
Expand All @@ -335,7 +430,7 @@ class Location:

def __init__(self, datastoreRootUri, path):
if isinstance(datastoreRootUri, str):
datastoreRootUri = ButlerURI(datastoreRootUri)
datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True)
elif not isinstance(datastoreRootUri, ButlerURI):
raise ValueError("Datastore root must be a ButlerURI instance")

Expand Down Expand Up @@ -454,7 +549,8 @@ class LocationFactory:
"""

def __init__(self, datastoreRoot):
self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True)
self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True,
forceDirectory=True)

def __str__(self):
return f"{self.__class__.__name__}@{self._datastoreRootUri}"
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/s3utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def s3CheckFileExists(path, bucket=None, client=None):

Parameters
----------
path : `Location`, `ButlerURI`, `str`
path : `Location`, `ButlerURI` or `str`
Location or ButlerURI containing the bucket name and filepath.
bucket : `str`, optional
Name of the bucket in which to look. If provided, path will be assumed
Expand Down