Skip to content

Commit

Permalink
Prefetch dimension record cache in Butler factory
Browse files Browse the repository at this point in the history
In DM-42324, the dimension record cache was changed so that it is no longer shared between Butler instances.  This cache is somewhat expensive to fetch and is used for almost every Butler operation.

LabeledButlerFactory now downloads this cache into its 'template' DirectButler instances when they are first created, so it is available for copying into clones.
  • Loading branch information
dhirving committed Jan 11, 2024
1 parent 7fde13c commit a99fc00
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 3 deletions.
9 changes: 9 additions & 0 deletions python/lsst/daf/butler/_labeled_butler_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,16 @@ def _get_config_uri(self, label: str) -> ResourcePathExpression:


def _create_direct_butler_factory(config: ButlerConfig) -> _FactoryFunction:
import lsst.daf.butler.direct_butler

# Create a 'template' Butler that will be cloned when callers request an
# instance.
butler = Butler.from_config(config)
assert isinstance(butler, lsst.daf.butler.direct_butler.DirectButler)

# Load caches so that data is available in cloned instances without
# needing to refetch it from the database for every instance.
butler._preload_cache()

def create_butler(access_token: str | None) -> Butler:
# Access token is ignored because DirectButler does not use Gafaelfawr
Expand Down
11 changes: 8 additions & 3 deletions python/lsst/daf/butler/dimensions/record_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ def load_from(self, other: DimensionRecordCache) -> None:
"""
self._records = copy.deepcopy(other._records)

def preload_cache(self) -> None:
"""Fetch the cache from the DB if it has not already been fetched."""
if self._records is None:
self._records = self._fetch()
assert self._records.keys() == set(self._keys), "Logic bug in fetch callback."

def __contains__(self, key: object) -> bool:
if not isinstance(key, str):
return False
Expand All @@ -145,9 +151,8 @@ def __contains__(self, key: object) -> bool:
return False

def __getitem__(self, element: str) -> DimensionRecordSet:
if self._records is None:
self._records = self._fetch()
assert self._records.keys() == set(self._keys), "Logic bug in fetch callback."
self.preload_cache()
assert self._records is not None
return self._records[element]

def __iter__(self) -> Iterator[str]:
Expand Down
4 changes: 4 additions & 0 deletions python/lsst/daf/butler/direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2196,6 +2196,10 @@ def _query_dimension_records(
raise EmptyQueryResultError(list(result.explain_no_results()))
return data_ids

def _preload_cache(self) -> None:
"""Immediately load caches that are used for common operations."""
self._registry.preload_cache()

_config: ButlerConfig
"""Configuration for this Butler instance."""

Expand Down
4 changes: 4 additions & 0 deletions python/lsst/daf/butler/registry/sql_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2581,6 +2581,10 @@ def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> N
pass
self._datastore_record_classes = datastore_record_classes

def preload_cache(self) -> None:
"""Immediately load caches that are used for common operations."""
self.dimension_record_cache.preload_cache()

@property
def obsCoreTableManager(self) -> ObsCoreTableManager | None:
"""The ObsCore manager instance for this registry
Expand Down

0 comments on commit a99fc00

Please sign in to comment.