lsst · dhirving · Dec 19, 2023 · Dec 15, 2023 · Dec 15, 2023 · Dec 19, 2023
diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml
@@ -49,7 +49,7 @@ jobs:
         run: sudo apt-get install graphviz
 
       - name: Install documenteer
-        run: pip install 'documenteer[pipelines]>=0.8'
+        run: pip install 'documenteer[pipelines]>=0.8,<1.0'
 
       - name: Build documentation
         working-directory: ./doc

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -746,7 +746,6 @@ def getURIs(
         """
         raise NotImplementedError()
 
-    @abstractmethod
     def getURI(
         self,
         datasetRefOrType: DatasetRef | DatasetType | str,
@@ -808,7 +807,16 @@ def getURI(
             Raised if a URI is requested for a dataset that consists of
             multiple artifacts.
         """
-        raise NotImplementedError()
+        primary, components = self.getURIs(
+            datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
+        )
+
+        if primary is None or components:
+            raise RuntimeError(
+                f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
+                "Use Butler.getURIs() instead."
+            )
+        return primary
 
     @abstractmethod
     def get_dataset_type(self, name: str) -> DatasetType:

diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py
@@ -1259,78 +1259,6 @@ def getURIs(
         )
         return self._datastore.getURIs(ref, predict)
 
-    def getURI(
-        self,
-        datasetRefOrType: DatasetRef | DatasetType | str,
-        /,
-        dataId: DataId | None = None,
-        *,
-        predict: bool = False,
-        collections: Any = None,
-        run: str | None = None,
-        **kwargs: Any,
-    ) -> ResourcePath:
-        """Return the URI to the Dataset.
-
-        Parameters
-        ----------
-        datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
-            When `DatasetRef` the `dataId` should be `None`.
-            Otherwise the `DatasetType` or name thereof.
-        dataId : `dict` or `DataCoordinate`
-            A `dict` of `Dimension` link name, value pairs that label the
-            `DatasetRef` within a Collection. When `None`, a `DatasetRef`
-            should be provided as the first argument.
-        predict : `bool`
-            If `True`, allow URIs to be returned of datasets that have not
-            been written.
-        collections : Any, optional
-            Collections to be searched, overriding ``self.collections``.
-            Can be any of the types supported by the ``collections`` argument
-            to butler construction.
-        run : `str`, optional
-            Run to use for predictions, overriding ``self.run``.
-        **kwargs
-            Additional keyword arguments used to augment or construct a
-            `DataCoordinate`.  See `DataCoordinate.standardize`
-            parameters.
-
-        Returns
-        -------
-        uri : `lsst.resources.ResourcePath`
-            URI pointing to the Dataset within the datastore. If the
-            Dataset does not exist in the datastore, and if ``predict`` is
-            `True`, the URI will be a prediction and will include a URI
-            fragment "#predicted".
-            If the datastore does not have entities that relate well
-            to the concept of a URI the returned URI string will be
-            descriptive. The returned URI is not guaranteed to be obtainable.
-
-        Raises
-        ------
-        LookupError
-            A URI has been requested for a dataset that does not exist and
-            guessing is not allowed.
-        ValueError
-            Raised if a resolved `DatasetRef` was passed as an input, but it
-            differs from the one found in the registry.
-        TypeError
-            Raised if no collections were provided.
-        RuntimeError
-            Raised if a URI is requested for a dataset that consists of
-            multiple artifacts.
-        """
-        primary, components = self.getURIs(
-            datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
-        )
-
-        if primary is None or components:
-            raise RuntimeError(
-                f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
-                "Use Butler.getURIs() instead."
-            )
-        return primary
-
     def get_dataset_type(self, name: str) -> DatasetType:
         return self._registry.getDatasetType(name)
 

diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py
@@ -48,6 +48,7 @@
 from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef
 from .._dataset_type import DatasetType, SerializedDatasetType
 from .._storage_class import StorageClass
+from ..datastore import DatasetRefURIs
 from ..dimensions import DataCoordinate, DataIdValue, DimensionConfig, DimensionUniverse, SerializedDataId
 from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, RegistryDefaults
 from ..registry.wildcards import CollectionWildcard
@@ -68,13 +69,16 @@
     from .._limited_butler import LimitedButler
     from .._query import Query
     from .._timespan import Timespan
-    from ..datastore import DatasetRefURIs
     from ..dimensions import DataId, DimensionGroup, DimensionRecord
     from ..registry import CollectionArgType, Registry
     from ..transfers import RepoExportContext
 
 
 _AnyPydanticModel = TypeVar("_AnyPydanticModel", bound=_BaseModelCompat)
+"""Generic type variable that accepts any Pydantic model class."""
+_InputCollectionList = str | Sequence[str] | None
+"""The possible types of the ``collections`` parameter of most Butler methods.
+"""
 
 
 class RemoteButler(Butler):
@@ -269,26 +273,7 @@
         **kwargs: Any,
     ) -> Any:
         # Docstring inherited.
-        if isinstance(datasetRefOrType, DatasetRef):
-            dataset_id = datasetRefOrType.id
-            response = self._get(f"get_file/{dataset_id}")
-            if response.status_code == 404:
-                raise LookupError(f"Dataset not found: {datasetRefOrType}")
-        else:
-            request = GetFileByDataIdRequestModel(
-                dataset_type_name=self._normalize_dataset_type_name(datasetRefOrType),
-                collections=self._normalize_collections(collections),
-                data_id=self._simplify_dataId(dataId, kwargs),
-            )
-            response = self._post("get_file_by_data_id", request)
-            if response.status_code == 404:
-                raise LookupError(
-                    f"Dataset not found with DataId: {dataId} DatasetType: {datasetRefOrType}"
-                    f" collections: {collections}"
-                )
-
-        response.raise_for_status()
-        model = self._parse_model(response, GetFileResponseModel)
+        model = self._get_file_info(datasetRefOrType, dataId, collections, kwargs)
 
         # If the caller provided a DatasetRef or DatasetType, they may have
         # overridden the storage class on it.  We need to respect this, if they
@@ -313,21 +298,38 @@
             component=componentOverride,
         )
 
-    def getURIs(
+    def _get_file_info(
         self,
         datasetRefOrType: DatasetRef | DatasetType | str,
-        /,
-        dataId: DataId | None = None,
-        *,
-        predict: bool = False,
-        collections: Any = None,
-        run: str | None = None,
-        **kwargs: Any,
-    ) -> DatasetRefURIs:
-        # Docstring inherited.
-        raise NotImplementedError()
+        dataId: DataId | None,
+        collections: _InputCollectionList,
+        kwargs: dict[str, DataIdValue],
+    ) -> GetFileResponseModel:
+        """Send a request to the server for the file URLs and metadata
+        associated with a dataset.
+        """
+        if isinstance(datasetRefOrType, DatasetRef):
+            dataset_id = datasetRefOrType.id
+            response = self._get(f"get_file/{dataset_id}")
+            if response.status_code == 404:
+                raise LookupError(f"Dataset not found: {datasetRefOrType}")
+        else:
+            request = GetFileByDataIdRequestModel(
+                dataset_type_name=self._normalize_dataset_type_name(datasetRefOrType),
+                collections=self._normalize_collections(collections),
+                data_id=self._simplify_dataId(dataId, kwargs),
+            )
+            response = self._post("get_file_by_data_id", request)
+            if response.status_code == 404:
+                raise LookupError(
+                    f"Dataset not found with DataId: {dataId} DatasetType: {datasetRefOrType}"
+                    f" collections: {collections}"
+                )
 
-    def getURI(
+        response.raise_for_status()
+        return self._parse_model(response, GetFileResponseModel)
+
+    def getURIs(
         self,
         datasetRefOrType: DatasetRef | DatasetType | str,
         /,
@@ -337,9 +339,26 @@
         collections: Any = None,
         run: str | None = None,
         **kwargs: Any,
-    ) -> ResourcePath:
+    ) -> DatasetRefURIs:
         # Docstring inherited.
-        raise NotImplementedError()
+        if predict or run:
+            raise NotImplementedError("Predict mode is not supported by RemoteButler")
+
+        response = self._get_file_info(datasetRefOrType, dataId, collections, kwargs)
+        file_info = response.file_info
+        if len(file_info) == 1:
+            return DatasetRefURIs(primaryURI=ResourcePath(str(file_info[0].url)))
+        else:
+            components = {}
+            for f in file_info:
+                component = f.datastoreRecords.component
+                if component is None:
+                    raise ValueError(
+                        f"DatasetId {response.dataset_ref.id} has a component file"
+                        " with no component name defined"
+                    )
+                components[component] = ResourcePath(str(f.url))
+            return DatasetRefURIs(componentURIs=components)
 
     def get_dataset_type(self, name: str) -> DatasetType:
         # In future implementation this should directly access the cache
@@ -627,7 +646,7 @@
         """Deserialize a Pydantic model from the body of an HTTP response."""
         return model.model_validate_json(response.content)
 
-    def _normalize_collections(self, collections: str | Sequence[str] | None) -> CollectionList:
+    def _normalize_collections(self, collections: _InputCollectionList) -> CollectionList:
         """Convert the ``collections`` parameter in the format used by Butler
         methods to a standardized format for the REST API.
         """

diff --git a/tests/test_server.py b/tests/test_server.py
@@ -53,8 +53,10 @@
     NoDefaultCollectionError,
     StorageClassFactory,
 )
-from lsst.daf.butler.tests import DatastoreMock
+from lsst.daf.butler.datastore import DatasetRefURIs
+from lsst.daf.butler.tests import DatastoreMock, addDatasetType
 from lsst.daf.butler.tests.utils import MetricsExample, MetricTestRepo, makeTestTempDir, removeTestTempDir
+from lsst.resources import ResourcePath
 from lsst.resources.http import HttpResourcePath
 
 TESTDIR = os.path.abspath(os.path.dirname(__file__))
@@ -104,9 +106,11 @@ def setUpClass(cls):
             configFile=os.path.join(TESTDIR, "config/basic/butler-s3store.yaml"),
             forceConfigRoot=False,
         )
-
         # Add a file with corrupted data for testing error conditions
         cls.dataset_with_corrupted_data = _create_corrupted_dataset(cls.repo)
+        # All of the datasets that come with MetricTestRepo are disassembled
+        # composites.  Add a simple dataset for testing the common case.
+        cls.simple_dataset_ref = _create_simple_dataset(cls.repo.butler)
 
         # Override the server's Butler initialization to point at our test repo
         server_butler = Butler.from_config(cls.root, writeable=True)
@@ -131,8 +135,9 @@ def create_factory_dependency():
             _make_test_client(app, raise_server_exceptions=False)
         )
 
-        # Populate the test server.  The DatastoreMock is required because the
-        # datasets referenced in these imports do not point at real files
+        # Populate the test server.
+        # The DatastoreMock is required because the datasets referenced in
+        # these imports do not point at real files.
         DatastoreMock.apply(server_butler)
         server_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
         server_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
@@ -323,6 +328,50 @@ def check_sc_override(converted):
         )
         self.assertEqual(dataset_type_component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
 
+    def test_getURIs_no_components(self):
+        # This dataset does not have components, and should return one URI.
+        def check_uri(uri: ResourcePath):
+            self.assertIsNotNone(uris.primaryURI)
+            self.assertEqual(uris.primaryURI.scheme, "https")
+            self.assertEqual(uris.primaryURI.read(), b"123")
+
+        uris = self.butler.getURIs(self.simple_dataset_ref)
+        self.assertEqual(len(uris.componentURIs), 0)
+        check_uri(uris.primaryURI)
+
+        check_uri(self.butler.getURI(self.simple_dataset_ref))
+
+    def test_getURIs_multiple_components(self):
+        # This dataset has multiple components, so we should get back multiple
+        # URIs.
+        dataset_type = "test_metric_comp"
+        data_id = {"instrument": "DummyCamComp", "visit": 423}
+        collections = "ingest/run"
+
+        def check_uris(uris: DatasetRefURIs):
+            self.assertIsNone(uris.primaryURI)
+            self.assertEqual(len(uris.componentURIs), 3)
+            path = uris.componentURIs["summary"]
+            self.assertEqual(path.scheme, "https")
+            data = path.read()
+            self.assertEqual(data, b"AM1: 5.2\nAM2: 30.6\n")
+
+        uris = self.butler.getURIs(dataset_type, dataId=data_id, collections=collections)
+        check_uris(uris)
+
+        # Calling getURI on a multi-file dataset raises an exception
+        with self.assertRaises(RuntimeError):
+            self.butler.getURI(dataset_type, dataId=data_id, collections=collections)
+
+        # getURIs does NOT respect component overrides on the DatasetRef,
+        # instead returning the parent's URIs.  Unclear if this is "correct"
+        # from a conceptual point of view, but this matches DirectButler
+        # behavior.
+        ref = self.butler.find_dataset(dataset_type, data_id=data_id, collections=collections)
+        componentRef = ref.makeComponentRef("summary")
+        componentUris = self.butler.getURIs(componentRef)
+        check_uris(componentUris)
+
 
 def _create_corrupted_dataset(repo: MetricTestRepo) -> DatasetRef:
     run = "corrupted-run"
@@ -333,5 +382,11 @@ def _create_corrupted_dataset(repo: MetricTestRepo) -> DatasetRef:
     return ref
 
 
+def _create_simple_dataset(butler: Butler) -> DatasetRef:
+    dataset_type = addDatasetType(butler, "test_int", {"instrument", "visit"}, "int")
+    ref = butler.put(123, dataset_type, dataId={"instrument": "DummyCamComp", "visit": 423})
+    return ref
+
+
 if __name__ == "__main__":
     unittest.main()