Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-38235: Remove unused code related to schema digest calculation #806

Merged
merged 2 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/changes/DM-38235.removal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Code that calculates schema digests was removed, registry will not store digests in the database.
Previously we saved schema digests, but we did not verify them since w_2022_22.
11 changes: 3 additions & 8 deletions python/lsst/daf/butler/registry/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,9 @@
VersionTuple,
)

# This manager is supposed to have super-stable schema that never changes
# but there may be cases when we need data migration on this table so we
# keep version for it as well.
_VERSION = VersionTuple(1, 0, 0)
# Schema version 1.0.1 signifies that we do not write schema digests. Writing
# is done by the `versions` module, but table is controlled by this manager.
_VERSION = VersionTuple(1, 0, 1)


class DefaultButlerAttributeManager(ButlerAttributeManager):
Expand Down Expand Up @@ -136,7 +135,3 @@ def empty(self) -> bool:
def currentVersion(cls) -> Optional[VersionTuple]:
# Docstring inherited from VersionedExtension.
return _VERSION

def schemaDigest(self) -> Optional[str]:
# Docstring inherited from VersionedExtension.
return self._defaultSchemaDigest([self._table], self._db.dialect)
4 changes: 0 additions & 4 deletions python/lsst/daf/butler/registry/bridge/monolithic.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,3 @@ def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]:
def currentVersion(cls) -> Optional[VersionTuple]:
# Docstring inherited from VersionedExtension.
return _VERSION

def schemaDigest(self) -> Optional[str]:
# Docstring inherited from VersionedExtension.
return self._defaultSchemaDigest(self._tables, self._db.dialect)
4 changes: 0 additions & 4 deletions python/lsst/daf/butler/registry/collections/nameKey.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,3 @@ def _getByName(self, name: str) -> CollectionRecord | None:
def currentVersion(cls) -> VersionTuple | None:
# Docstring inherited from VersionedExtension.
return _VERSION

def schemaDigest(self) -> str | None:
# Docstring inherited from VersionedExtension.
return self._defaultSchemaDigest(self._tables, self._db.dialect)
4 changes: 0 additions & 4 deletions python/lsst/daf/butler/registry/collections/synthIntKey.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,3 @@ def _getByName(self, name: str) -> CollectionRecord | None:
def currentVersion(cls) -> VersionTuple | None:
# Docstring inherited from VersionedExtension.
return _VERSION

def schemaDigest(self) -> str | None:
# Docstring inherited from VersionedExtension.
return self._defaultSchemaDigest(self._tables, self._db.dialect)
Original file line number Diff line number Diff line change
Expand Up @@ -457,10 +457,6 @@ def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummar
# Docstring inherited from DatasetRecordStorageManager.
return self._summaries.get(collection)

def schemaDigest(self) -> str | None:
# Docstring inherited from VersionedExtension.
return self._defaultSchemaDigest(self._static, self._db.dialect)

_version: VersionTuple
"""Schema version for this class."""

Expand Down
9 changes: 0 additions & 9 deletions python/lsst/daf/butler/registry/dimensions/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,15 +276,6 @@ def currentVersion(cls) -> VersionTuple | None:
# Docstring inherited from VersionedExtension.
return _VERSION

def schemaDigest(self) -> str | None:
# Docstring inherited from VersionedExtension.
tables: list[sqlalchemy.schema.Table] = []
for recStorage in self._records.values():
tables += recStorage.digestTables()
for overlapStorage in self._overlaps.values():
tables += overlapStorage.digestTables()
return self._defaultSchemaDigest(tables, self._db.dialect)


class _DimensionGraphStorage:
"""Helper object that manages saved DimensionGraph definitions.
Expand Down
89 changes: 1 addition & 88 deletions python/lsst/daf/butler/registry/interfaces/_versioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,8 @@
"VersionedExtension",
]

import hashlib
from abc import ABC, abstractmethod
from typing import Iterable, NamedTuple, Optional, cast

import sqlalchemy
from typing import NamedTuple, Optional


class VersionTuple(NamedTuple):
Expand Down Expand Up @@ -111,87 +108,3 @@ def extensionName(cls) -> str:
Full extension name.
"""
return f"{cls.__module__}.{cls.__name__}"

@abstractmethod
def schemaDigest(self) -> Optional[str]:
"""Return digest for schema piece managed by this extension.

Returns
-------
digest : `str` or `None`
String representation of the digest of the schema, ``None`` should
be returned if schema digest is not to be saved or checked. The
length of the returned string cannot exceed the length of the
"value" column of butler attributes table, currently 65535
characters.

Notes
-----
There is no exact definition of digest format, any string should work.
The only requirement for string contents is that it has to remain
stable over time if schema does not change but it should produce
different string for any change in the schema. In many cases default
implementation in `_defaultSchemaDigest` can be used as a reasonable
choice.
"""
raise NotImplementedError()

def _defaultSchemaDigest(
self, tables: Iterable[sqlalchemy.schema.Table], dialect: sqlalchemy.engine.Dialect
) -> str:
"""Calculate digest for a schema based on list of tables schemas.

Parameters
----------
tables : iterable [`sqlalchemy.schema.Table`]
Set of tables comprising the schema.
dialect : `sqlalchemy.engine.Dialect`, optional
Dialect used to stringify types; needed to support dialect-specific
types.

Returns
-------
digest : `str`
String representation of the digest of the schema.

Notes
-----
It is not specified what kind of implementation is used to calculate
digest string. The only requirement for that is that result should be
stable over time as this digest string will be stored in the database.
It should detect (by producing different digests) sensible changes to
the schema, but it also should be stable w.r.t. changes that do
not actually change the schema (e.g. change in the order of columns or
keys.) Current implementation is likely incomplete in that it does not
detect all possible changes (e.g. some constraints may not be included
into total digest).
"""

def tableSchemaRepr(table: sqlalchemy.schema.Table) -> str:
"""Make string representation of a single table schema."""
tableSchemaRepr = [table.name]
schemaReps = []
for column in table.columns:
columnRep = f"COL,{column.name},{column.type.compile(dialect=dialect)}"
if column.primary_key:
columnRep += ",PK"
if column.nullable:
columnRep += ",NULL"
schemaReps += [columnRep]
for fkConstr in table.foreign_key_constraints:
# for foreign key we include only one side of relations into
# digest, other side could be managed by different extension
fkReps = ["FK", cast(str, fkConstr.name)] + [fk.column.name for fk in fkConstr.elements]
fkRep = ",".join(fkReps)
schemaReps += [fkRep]
# sort everything to keep it stable
schemaReps.sort()
tableSchemaRepr += schemaReps
return ";".join(tableSchemaRepr)

md5 = hashlib.md5()
tableSchemas = sorted(tableSchemaRepr(table) for table in tables)
for tableRepr in tableSchemas:
md5.update(tableRepr.encode())
digest = md5.hexdigest()
return digest
4 changes: 0 additions & 4 deletions python/lsst/daf/butler/registry/obscore/_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,6 @@ def currentVersion(cls) -> VersionTuple | None:
# Docstring inherited from base class.
return _VERSION

def schemaDigest(self) -> str | None:
# Docstring inherited from base class.
return None

def add_datasets(self, refs: Iterable[DatasetRef], context: SqlQueryContext) -> int:
# Docstring inherited from base class.

Expand Down
4 changes: 0 additions & 4 deletions python/lsst/daf/butler/registry/opaque.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,3 @@ def register(self, name: str, spec: TableSpec) -> OpaqueTableStorage:
def currentVersion(cls) -> Optional[VersionTuple]:
# Docstring inherited from VersionedExtension.
return _VERSION

def schemaDigest(self) -> Optional[str]:
# Docstring inherited from VersionedExtension.
return self._defaultSchemaDigest([self._metaTable], self._db.dialect)
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,8 +1316,8 @@ def testAbstractQuery(self):
def testAttributeManager(self):
"""Test basic functionality of attribute manager."""
# number of attributes with schema versions in a fresh database,
# 6 managers with 3 records per manager, plus config for dimensions
VERSION_COUNT = 6 * 3 + 1
# 6 managers with 2 records per manager, plus config for dimensions
VERSION_COUNT = 6 * 2 + 1

registry = self.makeRegistry()
attributes = registry._managers.attributes
Expand Down
70 changes: 7 additions & 63 deletions python/lsst/daf/butler/registry/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
"MissingVersionError",
"MissingManagerError",
"ManagerMismatchError",
"DigestMismatchError",
]

import logging
from typing import TYPE_CHECKING, Any, Mapping, MutableMapping, Optional
from collections.abc import Mapping, MutableMapping
from typing import TYPE_CHECKING, Any

from .interfaces import VersionedExtension, VersionTuple

Expand Down Expand Up @@ -72,37 +72,6 @@ class ManagerMismatchError(RuntimeError):
pass


class DigestMismatchError(RuntimeError):
"""Exception raised when schema digest is not equal to stored digest."""

pass


class VersionInfo:
"""Representation of version information as defined by configuration.

Parameters
----------
version : `VersionTuple`
Version number in parsed format.
digest : `str`, optional
Optional digest of the corresponding part of the schema definition.

Notes
-----
Schema digest is supposed to help with detecting unintentional schema
changes in the code without upgrading schema version. Digest is
constructed whom the set of table definitions and is compared to a digest
defined in configuration, if two digests differ it means schema was
changed. Intentional schema updates will need to update both configured
schema version and schema digest.
"""

def __init__(self, version: VersionTuple, digest: Optional[str] = None):
self.version = version
self.digest = digest


class ButlerVersionsManager:
"""Utility class to manage and verify schema version compatibility.

Expand All @@ -125,7 +94,7 @@ def __init__(self, attributes: ButlerAttributeManager, managers: Mapping[str, An
elif manager is not None:
# All regular managers need to support versioning mechanism.
_LOG.warning("extension %r does not implement VersionedExtension", name)
self._emptyFlag: Optional[bool] = None
self._emptyFlag: bool | None = None

@classmethod
def _managerConfigKey(cls, name: str) -> str:
Expand Down Expand Up @@ -159,22 +128,6 @@ def _managerVersionKey(cls, extension: VersionedExtension) -> str:
"""
return "version:" + extension.extensionName()

@classmethod
def _managerDigestKey(cls, extension: VersionedExtension) -> str:
"""Return key used to store manager schema digest.

Parameters
----------
extension : `VersionedExtension`
Instance of the extension.

Returns
-------
key : `str`
Name of the key in attributes table.
"""
return "schema_digest:" + extension.extensionName()

@staticmethod
def checkCompatibility(old_version: VersionTuple, new_version: VersionTuple, update: bool) -> bool:
"""Compare two versions for compatibility.
Expand Down Expand Up @@ -213,14 +166,11 @@ def storeManagersConfig(self) -> None:
self._emptyFlag = False

def storeManagersVersions(self) -> None:
"""Store current manager versions in registry arttributes.

For each extension we store two records:
"""Store current manager versions in registry attributes.

- record with the key "version:{fullExtensionName}" and version
number in its string format as a value,
- record with the key "schema_digest:{fullExtensionName}" and
schema digest as a value.
For each extension we store single record with a key
"version:{fullExtensionName}" and version number in its string format
as a value.
"""
for extension in self._managers.values():
version = extension.currentVersion()
Expand All @@ -230,12 +180,6 @@ def storeManagersVersions(self) -> None:
self._attributes.set(key, value)
_LOG.debug("saved manager version %s=%s", key, value)

digest = extension.schemaDigest()
if digest is not None:
key = self._managerDigestKey(extension)
self._attributes.set(key, digest)
_LOG.debug("saved manager schema digest %s=%s", key, digest)

self._emptyFlag = False

@property
Expand Down
8 changes: 0 additions & 8 deletions python/lsst/daf/butler/tests/_dummyRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,6 @@ def currentVersion(cls) -> VersionTuple | None:
# Docstring inherited from VersionedExtension.
return None

def schemaDigest(self) -> str | None:
# Docstring inherited from VersionedExtension.
return None


class DummyDatastoreRegistryBridgeManager(DatastoreRegistryBridgeManager):
def __init__(
Expand Down Expand Up @@ -168,10 +164,6 @@ def currentVersion(cls) -> VersionTuple | None:
# Docstring inherited from VersionedExtension.
return None

def schemaDigest(self) -> str | None:
# Docstring inherited from VersionedExtension.
return None


class DummyRegistry:
"""Dummy Registry, for Datastore test purposes."""
Expand Down