Skip to content

Commit

Permalink
Add special context manager to enable caching in registry.
Browse files Browse the repository at this point in the history
Adds special cache classes for collection and summary records and
an additiona structure that holds caches. New registry method is
a context manager that enables caches temporarily for the duration
of that context.
  • Loading branch information
andy-slac committed Nov 13, 2023
1 parent 5833c91 commit 14721a2
Show file tree
Hide file tree
Showing 14 changed files with 434 additions and 106 deletions.
6 changes: 6 additions & 0 deletions python/lsst/daf/butler/_registry_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ def refresh(self) -> None:
# Docstring inherited from a base class.
self._registry.refresh()

@contextlib.contextmanager
def caching_context(self) -> Iterator[None]:
# Docstring inherited from a base class.
with self._registry.caching_context():
yield

@contextlib.contextmanager
def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
# Docstring inherited from a base class.
Expand Down
64 changes: 64 additions & 0 deletions python/lsst/daf/butler/registry/_caching_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = ["CachingContext"]

from ._collection_record_cache import CollectionRecordCache
from ._collection_summary_cache import CollectionSummaryCache


class CachingContext:
"""Collection of caches for various types of records retrieved from
database.
Notes
-----
Caching is usually disabled for most of the record types, but it can be
explicitly and temporarily enabled in some context (e.g. quantum graph
building) using Registry method. This class is a collection of cache
instances which will be `None` when caching is disabled. Instance of this
class is passed to the relevant managers that can use it to query or
populate caches when caching is enabled.
"""

collection_records: CollectionRecordCache | None = None
"""Cache for collection records (`CollectionRecordCache`)."""

collection_summaries: CollectionSummaryCache | None = None
"""Cache for collection summary records (`CollectionSummaryCache`)."""

def enable(self) -> None:
"""Enable caches, initializes all caches."""
self.collection_records = CollectionRecordCache()
self.collection_summaries = CollectionSummaryCache()

def disable(self) -> None:
"""Disable caches, sets all caches to `None`."""
self.collection_records = None
self.collection_summaries = None
165 changes: 165 additions & 0 deletions python/lsst/daf/butler/registry/_collection_record_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = ("CollectionRecordCache",)

from collections.abc import Iterable, Iterator
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
from .interfaces import CollectionRecord


class CollectionRecordCache:
"""Cache for collection records.
Notes
-----
This class stores collection records and can retrieve them using either
collection name or collection key. One complication is that key type can be
either collection name or a distinct integer value. To optimize storage
when the key is the same as collection name, this class only stores key to
record mapping when key is of a non-string type.
In come contexts (e.g. ``resolve_wildcard``) a full list of collections is
needed. To signify that cache content can be used in such contexts, cache
defines special ``full`` flag that needs to be set by client.
"""

def __init__(self) -> None:
self._by_name: dict[str, CollectionRecord] = {}
# This dict is only used for records whose key type is not str.
self._by_key: dict[Any, CollectionRecord] = {}
self._full = False

@property
def full(self) -> bool:
"""`True` if cache holds all known collection records (`bool`)."""
return self._full

def add(self, record: CollectionRecord) -> None:
"""Add one record to the cache.
Parameters
----------
record : `CollectionRecord`
Collection record, replaces any existing record with the same name
or key.
"""
# In case we replace same record name with different key, find the
# existing record and drop its key.
if (old_record := self._by_name.get(record.name)) is not None:
self._by_key.pop(old_record.key)
if (old_record := self._by_key.get(record.key)) is not None:
self._by_name.pop(old_record.name)
self._by_name[record.name] = record
if not isinstance(record.key, str):
self._by_key[record.key] = record

def set(self, records: Iterable[CollectionRecord], *, full: bool = False) -> None:
"""Replace cache contents with the new set of records.
Parameters
----------
records : `~collections.abc.Iterable` [`CollectionRecord`]
Collection records.
full : `bool`
If `True` then ``records`` contain all known collection records.
"""
self.clear()
for record in records:
self._by_name[record.name] = record
if not isinstance(record.key, str):
self._by_key[record.key] = record
self._full = full

def clear(self) -> None:
"""Remove all records from the cache."""
self._by_name = {}
self._by_key = {}
self._full = False

def discard(self, record: CollectionRecord) -> None:
"""Remove single record from the cache.
Parameters
----------
record : `CollectionRecord`
Collection record to remove.
"""
self._by_name.pop(record.name, None)
if not isinstance(record.key, str):
self._by_key.pop(record.key, None)

def get_by_name(self, name: str) -> CollectionRecord | None:
"""Return collection record given its name.
Parameters
----------
name : `str`
Collection name.
Returns
-------
record : `CollectionRecord` or `None`
Collection record, `None` is returned if the name is not in the
cache.
"""
return self._by_name.get(name)

def get_by_key(self, key: Any) -> CollectionRecord | None:
"""Return collection record given its key.
Parameters
----------
key : `Any`
Collection key.
Returns
-------
record : `CollectionRecord` or `None`
Collection record, `None` is returned if the key is not in the
cache.
"""
if isinstance(key, str):
return self._by_name.get(key)
return self._by_key.get(key)

def records(self) -> Iterator[CollectionRecord]:
"""Return iterator for the set of records in the cache, can only be
used if `full` is true.
Raises
------
RuntimeError
Raised if ``self.full`` is `False`.
"""
if not self._full:
raise RuntimeError("cannot call records() if cache is not full")
return iter(self._by_name.values())
86 changes: 86 additions & 0 deletions python/lsst/daf/butler/registry/_collection_summary_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = ("CollectionSummaryCache",)

from collections.abc import Iterable, Mapping
from typing import Any

from ._collection_summary import CollectionSummary


class CollectionSummaryCache:
"""Cache for collection summaries.
Notes
-----
This class stores `CollectionSummary` records indexed by collection keys.
For cache to be usable the records that are given to `update` method have
to include all dataset types, i.e. the query that produces records should
not be constrained by dataset type.
"""

def __init__(self) -> None:
self._cache: dict[Any, CollectionSummary] = {}

def update(self, summaries: Mapping[Any, CollectionSummary]) -> None:
"""Add records to the cache.
Parameters
----------
summaries : `~collections.abc.Mapping` [`Any`, `CollectionSummary`]
Summary records indexed by collection key, records must include all
dataset types.
"""
self._cache.update(summaries)

def find_summaries(self, keys: Iterable[Any]) -> tuple[dict[Any, CollectionSummary], set[Any]]:
"""Return summary records given a set of keys.
Parameters
----------
keys : `~collections.abc.Iterable` [`Any`]
Sequence of collection keys.
Returns
-------
summaries : `dict` [`Any`, `CollectionSummary`]
Dictionary of summaries indexed by collection keys, includes
records found in the cache.
missing_keys : `set` [`Any`]
Collection keys that are not present in the cache.
"""
found = {}
not_found = set()
for key in keys:
if (summary := self._cache.get(key)) is not None:
found[key] = summary
else:
not_found.add(key)
return found, not_found
6 changes: 6 additions & 0 deletions python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ def refresh(self) -> None:
"""
raise NotImplementedError()

@contextlib.contextmanager
@abstractmethod
def caching_context(self) -> Iterator[None]:
"""Context manager that enables caching."""
raise NotImplementedError()

@contextlib.contextmanager
@abstractmethod
def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
Expand Down

0 comments on commit 14721a2

Please sign in to comment.