Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-43499: Add timespan to Butler.get #982

Merged
merged 10 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 6 additions & 5 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ jobs:
python-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
# Need to clone everything for the git tags.
fetch-depth: 0

- uses: conda-incubator/setup-miniconda@v2
- uses: conda-incubator/setup-miniconda@v3
with:
python-version: ${{ matrix.python-version }}
channels: conda-forge,defaults
Expand Down Expand Up @@ -92,9 +92,10 @@ jobs:
run: |
pytest -r a -v -n 3 --open-files --cov=lsst.daf.butler --cov=tests --cov-report=xml --cov-report=term --cov-branch
- name: Upload coverage to codecov
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
with:
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}

pypi:
runs-on: ubuntu-latest
Expand All @@ -104,13 +105,13 @@ jobs:
if: startsWith(github.ref, 'refs/tags') && ! startsWith(github.ref, 'refs/tags/s')

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
# Need to clone everything to embed the version.
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.11"

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build_docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ jobs:
build_sphinx_docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
# Need to clone everything for the git tags.
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
Expand All @@ -57,7 +57,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@v3

# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
Expand All @@ -71,4 +71,4 @@ jobs:
# make release

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
2 changes: 1 addition & 1 deletion .github/workflows/do_not_merge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
'DO NOT MERGE'. Remove this commit from the branch before merging
or change the commit summary."
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Check requirements.txt for branches
shell: bash
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
packages: write

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
# Needed to fetch tags, used by Python install process to
# figure out version number
Expand All @@ -65,7 +65,7 @@ jobs:


- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.11"

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docstyle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ jobs:
numpydoc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5

- name: Install numpydoc
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,18 @@ RUN python -m venv $VIRTUAL_ENV
# Make sure we use the virtualenv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# Put the latest pip and setuptools in the virtualenv
RUN pip install --upgrade --no-cache-dir pip setuptools wheel
RUN pip install --upgrade --no-cache-dir pip setuptools wheel uv

# Install the app's Python runtime dependencies
COPY requirements/docker.txt ./docker-requirements.txt
RUN pip install --no-cache-dir -r docker-requirements.txt
RUN uv pip install --no-cache-dir -r docker-requirements.txt

# Install dependencies only required by unit tests in a separate image for better caching
FROM dependencies-image AS test-dependencies-image
RUN apt-get update
RUN apt-get install -y --no-install-recommends postgresql postgresql-pgsphere
COPY requirements/docker-test.txt ./docker-test-requirements.txt
RUN pip install --no-cache-dir -r docker-test-requirements.txt
RUN uv pip install --no-cache-dir -r docker-test-requirements.txt

# Run unit tests
FROM test-dependencies-image AS unit-test
Expand Down
4 changes: 4 additions & 0 deletions doc/changes/DM-43499.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Added a ``timespan`` parameter to ``Butler.get()`` (for direct and remote butler).
This parameter can be used to specify an explicit time for calibration selection without requiring a temporal coordinate be included in the data ID.
Additionally, if no timespan is specified and no timespan can be found in the data ID a default full-range time span will be used for calibration selection.
This allows a calibration to be selected if there is only one matching calibration in the collection.
44 changes: 38 additions & 6 deletions python/lsst/daf/butler/direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ def _findDatasetRef(
predict: bool = False,
run: str | None = None,
datastore_records: bool = False,
timespan: Timespan | None = None,
**kwargs: Any,
) -> DatasetRef:
"""Shared logic for methods that start with a search for a dataset in
Expand Down Expand Up @@ -806,6 +807,11 @@ def _findDatasetRef(
datasets. Only used if ``predict`` is `True`.
datastore_records : `bool`, optional
If `True` add datastore records to returned `DatasetRef`.
timespan : `Timespan` or `None`, optional
A timespan that the validity range of the dataset must overlap.
If not provided and this is a calibration dataset type, an attempt
will be made to find the timespan from any temporal coordinate
in the data ID.
**kwargs
Additional keyword arguments used to augment or construct a
`DataId`. See `DataId` parameters.
Expand Down Expand Up @@ -836,7 +842,6 @@ def _findDatasetRef(
if datastore_records and datasetRefOrType._datastore_records is None:
datasetRefOrType = self._registry.get_datastore_records(datasetRefOrType)
return datasetRefOrType
timespan: Timespan | None = None

dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs)

Expand All @@ -849,9 +854,17 @@ def _findDatasetRef(
dataId = DataCoordinate.standardize(
dataId, universe=self.dimensions, defaults=self._registry.defaults.dataId, **kwargs
)
if dataId.dimensions.temporal:
dataId = self._registry.expandDataId(dataId)
timespan = dataId.timespan
if timespan is None:
if dataId.dimensions.temporal:
dataId = self._registry.expandDataId(dataId)
# Use the timespan from the data ID to constrain the
# calibration lookup, but only if the caller has not
# specified an explicit timespan.
timespan = dataId.timespan
else:
# Try an arbitrary timespan. Downstream will fail if this
# results in more than one matching dataset.
timespan = Timespan(None, None)
else:
# Standardize the data ID to just the dimensions of the dataset
# type instead of letting registry.findDataset do it, so we get the
Expand Down Expand Up @@ -988,6 +1001,7 @@ def getDeferred(
parameters: dict | None = None,
collections: Any = None,
storageClass: str | StorageClass | None = None,
timespan: Timespan | None = None,
**kwargs: Any,
) -> DeferredDatasetHandle:
"""Create a `DeferredDatasetHandle` which can later retrieve a dataset,
Expand Down Expand Up @@ -1015,6 +1029,11 @@ def getDeferred(
the dataset type definition for this dataset. Specifying a
read `StorageClass` can force a different type to be returned.
This type must be compatible with the original type.
timespan : `Timespan` or `None`, optional
A timespan that the validity range of the dataset must overlap.
If not provided and this is a calibration dataset type, an attempt
will be made to find the timespan from any temporal coordinate
in the data ID.
**kwargs
Additional keyword arguments used to augment or construct a
`DataId`. See `DataId` parameters.
Expand Down Expand Up @@ -1045,7 +1064,9 @@ def getDeferred(
else:
raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
else:
ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
ref = self._findDatasetRef(
datasetRefOrType, dataId, collections=collections, timespan=timespan, **kwargs
)
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)

def get(
Expand All @@ -1057,6 +1078,7 @@ def get(
parameters: dict[str, Any] | None = None,
collections: Any = None,
storageClass: StorageClass | str | None = None,
timespan: Timespan | None = None,
**kwargs: Any,
) -> Any:
"""Retrieve a stored dataset.
Expand Down Expand Up @@ -1085,6 +1107,11 @@ def get(
the dataset type definition for this dataset. Specifying a
read `StorageClass` can force a different type to be returned.
This type must be compatible with the original type.
timespan : `Timespan` or `None`, optional
A timespan that the validity range of the dataset must overlap.
If not provided and this is a calibration dataset type, an attempt
will be made to find the timespan from any temporal coordinate
in the data ID.
**kwargs
Additional keyword arguments used to augment or construct a
`DataCoordinate`. See `DataCoordinate.standardize`
Expand Down Expand Up @@ -1114,7 +1141,12 @@ def get(
"""
_LOG.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters)
ref = self._findDatasetRef(
datasetRefOrType, dataId, collections=collections, datastore_records=True, **kwargs
datasetRefOrType,
dataId,
collections=collections,
datastore_records=True,
timespan=timespan,
**kwargs,
)
return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)

Expand Down
12 changes: 8 additions & 4 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,10 @@ def getDeferred(
parameters: dict | None = None,
collections: Any = None,
storageClass: str | StorageClass | None = None,
timespan: Timespan | None = None,
**kwargs: Any,
) -> DeferredDatasetHandle:
response = self._get_file_info(datasetRefOrType, dataId, collections, kwargs)
response = self._get_file_info(datasetRefOrType, dataId, collections, timespan, kwargs)
# Check that artifact information is available.
_to_file_payload(response)
ref = DatasetRef.from_simple(response.dataset_ref, universe=self.dimensions)
Expand All @@ -261,10 +262,11 @@ def get(
parameters: dict[str, Any] | None = None,
collections: Any = None,
storageClass: StorageClass | str | None = None,
timespan: Timespan | None = None,
**kwargs: Any,
) -> Any:
# Docstring inherited.
model = self._get_file_info(datasetRefOrType, dataId, collections, kwargs)
model = self._get_file_info(datasetRefOrType, dataId, collections, timespan, kwargs)

ref = DatasetRef.from_simple(model.dataset_ref, universe=self.dimensions)
# If the caller provided a DatasetRef, they may have overridden the
Expand All @@ -287,6 +289,7 @@ def _get_file_info(
datasetRefOrType: DatasetRef | DatasetType | str,
dataId: DataId | None,
collections: CollectionArgType,
timespan: Timespan | None,
kwargs: dict[str, DataIdValue],
) -> GetFileResponseModel:
"""Send a request to the server for the file URLs and metadata
Expand All @@ -301,6 +304,7 @@ def _get_file_info(
dataset_type_name=self._normalize_dataset_type_name(datasetRefOrType),
collections=self._normalize_collections(collections),
data_id=self._simplify_dataId(dataId, kwargs),
timespan=timespan.to_simple() if timespan is not None else None,
)
response = self._post("get_file_by_data_id", request)
return self._parse_model(response, GetFileResponseModel)
Expand All @@ -324,7 +328,7 @@ def getURIs(
if predict or run:
raise NotImplementedError("Predict mode is not supported by RemoteButler")

response = self._get_file_info(datasetRefOrType, dataId, collections, kwargs)
response = self._get_file_info(datasetRefOrType, dataId, collections, None, kwargs)
file_info = _to_file_payload(response).file_info
if len(file_info) == 1:
return DatasetRefURIs(primaryURI=ResourcePath(str(file_info[0].url)))
Expand Down Expand Up @@ -449,7 +453,7 @@ def exists(
) -> DatasetExistence:
try:
response = self._get_file_info(
dataset_ref_or_type, dataId=data_id, collections=collections, kwargs=kwargs
dataset_ref_or_type, dataId=data_id, collections=collections, timespan=None, kwargs=kwargs
)
except DatasetNotFoundError:
return DatasetExistence.UNRECOGNIZED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,13 @@ def get_file_by_data_id(
factory: Factory = Depends(factory_dependency),
) -> GetFileResponseModel:
butler = factory.create_butler()
timespan = Timespan.from_simple(request.timespan) if request.timespan is not None else None
ref = butler._findDatasetRef(
datasetRefOrType=request.dataset_type_name,
dataId=request.data_id,
collections=request.collections,
datastore_records=True,
timespan=timespan,
)
return _get_file_by_ref(butler, ref)

Expand Down
1 change: 1 addition & 0 deletions python/lsst/daf/butler/remote_butler/server_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class GetFileByDataIdRequestModel(pydantic.BaseModel):
dataset_type_name: DatasetTypeName
data_id: SerializedDataId
collections: CollectionList
timespan: SerializedTimespan | None = None
timj marked this conversation as resolved.
Show resolved Hide resolved


class GetFileResponseModel(pydantic.BaseModel):
Expand Down