Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM 21146: Add support for PostgreSQL Registry #189

Merged
merged 13 commits into from
Aug 31, 2019
Merged
6 changes: 5 additions & 1 deletion config/registry.yaml
@@ -1,6 +1,10 @@
# Default with a sqlLite registry
registry:
cls: lsst.daf.butler.registries.sqliteRegistry.SqliteRegistry
clsMap:
sql: lsst.daf.butler.registries.sqlRegistry.SqlRegistry
sqlite: lsst.daf.butler.registries.sqliteRegistry.SqliteRegistry
postgresql: lsst.daf.butler.registries.postgresqlRegistry.PostgreSqlRegistry
oracle: lsst.daf.butler.registries.oracleRegistry.OracleRegistry
db: 'sqlite:///:memory:'
limited: false
skypix:
Expand Down
12 changes: 11 additions & 1 deletion python/lsst/daf/butler/butler.py
Expand Up @@ -42,6 +42,7 @@
from .core import deferredDatasetHandle as dDH
from .core.datastore import Datastore
from .core.registry import Registry
from .core.registryConfig import RegistryConfig
from .core.run import Run
from .core.storageClass import StorageClassFactory
from .core.config import Config, ConfigSubset
Expand Down Expand Up @@ -205,8 +206,17 @@ def makeRepo(root, config=None, standalone=False, createRegistry=True, searchPat
full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
datastoreClass = doImport(full["datastore", "cls"])
datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
registryClass = doImport(full["registry", "cls"])

# if "cls" or "db" keys exist in given config, parse them, otherwise
# parse the defaults in the expanded config
if any((config.get(("registry", "cls")), config.get(("registry", "db")))):
registryConfig = RegistryConfig(config)
registryClass = registryConfig.getRegistryClass()
else:
registryConfig = RegistryConfig(full)
registryClass = registryConfig.getRegistryClass()
registryClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)

if standalone:
config.merge(full)
config.dumpToUri(uri)
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/butlerConfig.py
Expand Up @@ -32,7 +32,7 @@
from .config import Config
from .datastore import DatastoreConfig
from .schema import SchemaConfig
from .registry import RegistryConfig
from .registryConfig import RegistryConfig
from .storageClass import StorageClassConfig
from .dimensions import DimensionConfig
from .composites import CompositesConfig
Expand Down
91 changes: 91 additions & 0 deletions python/lsst/daf/butler/core/connectionString.py
@@ -0,0 +1,91 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("DB_AUTH_ENVVAR", "DB_AUTH_PATH", "ConnectionStringFactory")

from sqlalchemy.engine import url
from lsst.daf.butler.core.dbAuth import DbAuth

DB_AUTH_ENVVAR = "LSST_DB_AUTH"
"""Default name of the environmental variable that will be used to locate DB
credentials configuration file. """

DB_AUTH_PATH = "~/.lsst/db-auth.yaml"
"""Default path at which it is expected that DB credentials are found."""


class ConnectionStringFactory:
"""Factory for `sqlalchemy.engine.url.URL` instances.

The factory constructs a connection string URL object by parsing the
connection string, the 'db' key in the registry configuration.
Username, password, host, port or database can be specified as keys in the
config explicitly. If username or password are missing a matching DB is
found in the credentials file pointed to by `DB_AUTH_ENVVAR` or
`DB_AUTH_PATH` values.
"""

keys = ('username', 'password', 'host', 'port', 'database')

@classmethod
def fromConfig(cls, registryConfig):
"""Parses the 'db' key in the config, and if they exist username,
password, host, port and database keys, and returns an connection
string object.

If no username and password are found in the connection string, or in
the config, they are retrieved from a file at `DB_AUTH_PATH` or
`DB_AUTH_ENVVAR`. Sqlite dialect does not require a password.

Parameters
----------
config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
Registry configuration

Returns
-------
connectionString : `sqlalchemy.engine.url.URL`
URL object representing the connection string.

Raises
------
DBAuthError
If the credentials file has incorrect permissions, doesn't exist at
the given location or is formatted incorrectly.
"""
# this import can not live on the top because of circular import issue
from lsst.daf.butler.core.registryConfig import RegistryConfig
regConf = RegistryConfig(registryConfig)
conStr = url.make_url(regConf['db'])

for key in cls.keys:
if getattr(conStr, key) is None:
setattr(conStr, key, regConf.get(key))

# sqlite with users and passwords not supported
if None in (conStr.username, conStr.password) and "sqlite" not in conStr.drivername:
dbAuth = DbAuth(DB_AUTH_PATH, DB_AUTH_ENVVAR)
auth = dbAuth.getAuth(conStr.drivername, conStr.username, conStr.host,
conStr.port, conStr.database)
conStr.username = auth[0]
conStr.password = auth[1]

return conStr
19 changes: 8 additions & 11 deletions python/lsst/daf/butler/core/registry.py
Expand Up @@ -19,20 +19,21 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("RegistryConfig", "Registry", "disableWhenLimited",
"AmbiguousDatasetError", "ConflictingDefinitionError", "OrphanedRecordError")
__all__ = ("Registry", "disableWhenLimited", "AmbiguousDatasetError",
"ConflictingDefinitionError", "OrphanedRecordError")

from abc import ABCMeta, abstractmethod
from collections.abc import Mapping
import contextlib
import functools

from lsst.utils import doImport
from .config import Config, ConfigSubset
from .config import Config
from .dimensions import DimensionConfig, DimensionUniverse, DataId, DimensionKeyDict
from .schema import SchemaConfig
from .utils import transactional
from .dataIdPacker import DataIdPackerFactory
from .registryConfig import RegistryConfig


class AmbiguousDatasetError(Exception):
Expand Down Expand Up @@ -70,12 +71,6 @@ def inner(self, *args, **kwargs):
return inner


class RegistryConfig(ConfigSubset):
component = "registry"
requiredKeys = ("cls",)
defaultConfigFile = "registry.yaml"


class Registry(metaclass=ABCMeta):
"""Registry interface.

Expand Down Expand Up @@ -182,8 +177,10 @@ def fromConfig(registryConfig, schemaConfig=None, dimensionConfig=None, create=F
else:
raise ValueError("Incompatible Registry configuration: {}".format(registryConfig))

cls = doImport(registryConfig["cls"])
return cls(registryConfig, schemaConfig, dimensionConfig, create=create, butlerRoot=butlerRoot)
cls = registryConfig.getRegistryClass()

return cls(registryConfig, schemaConfig, dimensionConfig, create=create,
butlerRoot=butlerRoot)

def __init__(self, registryConfig, schemaConfig=None, dimensionConfig=None, create=False,
butlerRoot=None):
Expand Down
72 changes: 72 additions & 0 deletions python/lsst/daf/butler/core/registryConfig.py
@@ -0,0 +1,72 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("RegistryConfig",)

from .connectionString import ConnectionStringFactory
from .config import ConfigSubset
from lsst.utils import doImport


class RegistryConfig(ConfigSubset):
component = "registry"
requiredKeys = ("db",)
defaultConfigFile = "registry.yaml"

def getDialect(self):
"""Parses the `db` key of the config and returns the database dialect.

Returns
-------
dialect : `str`
Dialect found in the connection string.
"""
conStr = ConnectionStringFactory.fromConfig(self)
return conStr.get_backend_name()

def getRegistryClass(self):
"""Returns registry class targeted by configuration values.

The appropriate class is determined from the `cls` key, if it exists.
Otherwise the `db` key is parsed and the correct class is determined
from a list of aliases found under `clsMap` key of the registry config.

Returns
-------
registry : `type`
Class of type `Registry` targeted by the registry configuration.
"""
if self.get("cls") is not None:
registryClass = self.get("cls")
else:
dialect = self.getDialect()
if dialect not in self["clsMap"]:
raise ValueError(f"Connection string dialect has no known aliases. Received: {dialect}")
registryClass = self.get(("clsMap", dialect))

return doImport(registryClass)

@property
def connectionString(self):
"""Return the connection string to the underlying database
(`sqlalchemy.engine.url.URL`).
"""
return ConnectionStringFactory.fromConfig(self)
8 changes: 8 additions & 0 deletions python/lsst/daf/butler/core/views.py
Expand Up @@ -68,6 +68,14 @@ def compileCreateView(element, compiler, **kw):
compiler.sql_compiler.process(element.selectable).replace(";\n FROM DUAL", ""))


# Ignoring PEP8 redefinition of function, as this is the sqlalchemy
# recommended procedure for dealing with multiple dialects
@compiler.compiles(CreateView, 'postgresql') # noqa: F811
def compileCreateView(element, compiler, **kw):
return "CREATE OR REPLACE VIEW %s AS %s" % (element.name,
compiler.sql_compiler.process(element.selectable))


@compiler.compiles(DropView)
def compileDropView(element, compiler, **kw):
return "DROP VIEW %s" % (element.name)
Expand Down
6 changes: 3 additions & 3 deletions python/lsst/daf/butler/registries/oracleRegistry.py
Expand Up @@ -25,7 +25,7 @@


from lsst.daf.butler.core.config import Config
from lsst.daf.butler.core.registry import RegistryConfig
from lsst.daf.butler.core.registryConfig import RegistryConfig

from .sqlRegistry import SqlRegistry, SqlRegistryConfig

Expand Down Expand Up @@ -69,7 +69,7 @@ def setConfigRoot(cls, root, config, full, overwrite=True):
"""
super().setConfigRoot(root, config, full, overwrite=overwrite)
Config.updateParameters(RegistryConfig, config, full,
toCopy=("cls",), overwrite=overwrite)
toCopy=("db",), overwrite=overwrite)

def __init__(self, registryConfig, schemaConfig, dimensionConfig, create=False,
butlerRoot=None):
Expand All @@ -79,4 +79,4 @@ def __init__(self, registryConfig, schemaConfig, dimensionConfig, create=False,
butlerRoot=butlerRoot)

def _createEngine(self):
return create_engine(self.config["db"], pool_size=1)
return create_engine(self.config.connectionString, pool_size=1)
81 changes: 81 additions & 0 deletions python/lsst/daf/butler/registries/postgresqlRegistry.py
@@ -0,0 +1,81 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("PostgreSqlRegistry", )

from sqlalchemy import create_engine

from lsst.daf.butler.core.config import Config
from lsst.daf.butler.core.registryConfig import RegistryConfig

from .sqlRegistry import SqlRegistry, SqlRegistryConfig


class PostgreSqlRegistry(SqlRegistry):
"""Registry backed by an PostgreSQL Amazon RDS service.

Parameters
----------
config : `SqlRegistryConfig` or `str`
Load configuration
"""

@classmethod
def setConfigRoot(cls, root, config, full, overwrite=True):
"""Set any filesystem-dependent config options for this Registry to
be appropriate for a new empty repository with the given root.

Parameters
----------
root : `str`
Filesystem path to the root of the data repository.
config : `Config`
A Butler-level config object to update (but not a
`ButlerConfig`, to avoid included expanded defaults).
full : `ButlerConfig`
A complete Butler config with all defaults expanded;
repository-specific options that should not be obtained
from defaults when Butler instances are constructed
should be copied from ``full`` to ``config``.
overwrite : `bool`, optional
If `False`, do not modify a value in ``config`` if the value
already exists. Default is always to overwrite with the provided
``root``.

Notes
-----
If a keyword is explicitly defined in the supplied ``config`` it
will not be overridden by this method if ``overwrite`` is `False`.
This allows explicit values set in external configs to be retained.
"""
super().setConfigRoot(root, config, full, overwrite=overwrite)
Config.updateParameters(RegistryConfig, config, full,
overwrite=overwrite)

def __init__(self, registryConfig, schemaConfig, dimensionConfig, create=False,
butlerRoot=None):
registryConfig = SqlRegistryConfig(registryConfig)
self.schemaConfig = schemaConfig
super().__init__(registryConfig, schemaConfig, dimensionConfig, create,
butlerRoot=butlerRoot)

def _createEngine(self):
return create_engine(self.config.connectionString, pool_size=1)