Skip to content

Commit

Permalink
Change move_to_cache to also check should_be_cached
Browse files Browse the repository at this point in the history
This simplifies the usage in file datastore.
  • Loading branch information
timj committed Mar 26, 2021
1 parent 59d2e89 commit d9fc41e
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ datastore:
# Instantiating a Butler from another Butler could propagate temporary
# location. This can be left out or null used
# to indicate a temporary directory.
root: null
root: /Volumes/MediaHD/Users/timj/work/lsstsw/build/daf_butler_test/cache
# Use a dict over list to simplify merging logic.
# Later could change it to be the timescale for caching:
# - Quantum (clear at end of quantum)
Expand Down
27 changes: 13 additions & 14 deletions python/lsst/daf/butler/core/datastoreCacheManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,12 @@ def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass])
raise NotImplementedError()

@abstractmethod
def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> ButlerURI:
def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
"""Move a file to the cache.
Move the given file into the cache, using the supplied DatasetRef
for naming.
for naming. A call is made to `should_be_cached()` and if the
DatasetRef should not be accepted `None` will be returned.
Parameters
----------
Expand All @@ -115,8 +116,9 @@ def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> ButlerURI:
Returns
-------
new : `ButlerURI`
URI to the file within the cache.
new : `ButlerURI` or `None`
URI to the file within the cache, or `None` if the dataset
was not accepted by the cache.
"""
raise NotImplementedError()

Expand Down Expand Up @@ -201,11 +203,14 @@ def _construct_cache_name(self, ref: DatasetRef, extension: str) -> ButlerURI:
"""
return self.cache_directory.join(f"{ref.id}{extension}")

def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> ButlerURI:
def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
# Docstring inherited
if ref.id is None:
raise ValueError(f"Can not cache a file associated with an unresolved reference ({ref})")

if not self.should_be_cached(ref):
return None

# Write the file using the id of the dataset ref and the file
# extension.
cached_location = self._construct_cache_name(ref, uri.getExtension())
Expand Down Expand Up @@ -250,15 +255,9 @@ def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass])
"""
return False

def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> ButlerURI:
"""Move a file to the cache.
Always raises to indicate that there has been a logic error if
this is called without first checking that it should be cached.
"""
# If this is a no-op what URI would it return?
raise RuntimeError("Caching is disabled. Please use should_be_cached() method prior to"
" trying to cache a dataset")
def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
"""Move dataset to cache but always refuse and returns `None`."""
return None

def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]:
"""Look for a dataset in the cache and return its location.
Expand Down
8 changes: 4 additions & 4 deletions python/lsst/daf/butler/datastores/fileDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,8 +1009,7 @@ def _removeFileExists(uri: ButlerURI) -> None:
uri.transfer_from(tmpLocation.uri, transfer="copy", overwrite=True)

# Cache if required
if self.cacheManager.should_be_cached(ref):
self.cacheManager.move_to_cache(tmpLocation.uri, ref)
self.cacheManager.move_to_cache(tmpLocation.uri, ref)

log.debug("Successfully wrote dataset to %s via a temporary file.", uri)

Expand Down Expand Up @@ -1096,8 +1095,9 @@ def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
location_updated = True

# Cache the downloaded file if needed.
if self.cacheManager.should_be_cached(ref):
local_uri = self.cacheManager.move_to_cache(local_uri, ref)
cached_uri = self.cacheManager.move_to_cache(local_uri, ref)
if cached_uri is not None:
local_uri = cached_uri
cache_msg = " and cached"

msg = f"(via download to local file{cache_msg})"
Expand Down

0 comments on commit d9fc41e

Please sign in to comment.