lsst · timj · Jan 19, 2022 · Jan 14, 2022 · Jan 18, 2022 · Jan 14, 2022
diff --git a/doc/changes/DM-33278.api.rst b/doc/changes/DM-33278.api.rst
@@ -0,0 +1,2 @@
+Added a new `DatasetType.is_compatible_with` method.
+This method determines if two dataset types are compatible with each other, taking into account whether the storage classes allow type conversion.
diff --git a/doc/lsst.daf.butler/formatters.rst b/doc/lsst.daf.butler/formatters.rst
@@ -88,6 +88,18 @@ It does not make sense for subsetting to be applied to the integer pixel count.
 
 For this reason read parameters for derived components are processed prior to calculating the derived component.
 
+Type Converters
+^^^^^^^^^^^^^^^
+
+.. warning::
+   The storage class conversion API is currently deemed to be experimental.
+   It was developed to support dataset type migration.
+   Do not add further converters without consultation.
+
+It is sometimes convenient to be able to call `Butler.put` with a Python type that is not a match to the storage class defined for that dataset type in the registry.
+Storage classes can be defined with converters that declare which Python types can be coerced into the required type, and what functions or method should be used to perform that conversion.
+Butler can support this on `Butler.put` and `Butler.get`, the latter being required if the dataset type definition has been changed in registry after a dataset was stored.
+
 Defining a Storage Class
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -139,6 +151,22 @@ You can do this using YAML anchors and references but the preferred approach is
 
 If this approach is used the `StorageClass` Python class created by `StorageClassFactory` will inherit from the specific parent class and not the generic `StorageClass`.
 
+Type converters are specified with a ``converters`` section.
+
+.. code-block:: yaml
+
+    StructuredDataDict:
+      pytype: dict
+      converters:
+        lsst.daf.base.PropertySet: lsst.daf.base.PropertySet.toDict
+    TaskMetadata:
+      pytype: lsst.pipe.base.TaskMetadata
+      converters:
+        lsst.daf.base.PropertySet: lsst.pipe.base.TaskMetadata.from_metadata
+
+In the first definition, the configuration says that if a ``PropertySet`` object is given then the unbound method ``lsst.daf.base.PropertSety.toDict`` can be called with the ``PropertySet`` as the only parameter and the returned value will be a `dict`.
+In the second definition a ``PropertySet`` is again specified but this time the ``from_metadata`` class method will be called with the ``PropertySet`` as the first parameter and a ``TaskMetadata`` will be returned.
+
 Storage Class Delegates
 =======================
 

diff --git a/python/lsst/daf/butler/configs/storageClasses.yaml b/python/lsst/daf/butler/configs/storageClasses.yaml
@@ -234,6 +234,8 @@ storageClasses:
     pytype: lsst.pex.config.Config
   Packages:
     pytype: lsst.base.Packages
+    converters:
+      dict: lsst.base.Packages
   NumpyArray:
     pytype: numpy.ndarray
   Thumbnail:

diff --git a/python/lsst/daf/butler/core/datasets/type.py b/python/lsst/daf/butler/core/datasets/type.py
@@ -240,25 +240,79 @@ def __repr__(self) -> str:
             extra += ", isCalibration=True"
         return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
 
-    def __eq__(self, other: Any) -> bool:
+    def _equal_ignoring_storage_class(self, other: Any) -> bool:
+        """Check everything is equal except the storage class.
+
+        Parameters
+        ----------
+        other : Any
+            Object to check against this one.
+
+        Returns
+        -------
+        mostly : `bool`
+            Returns `True` if everything except the storage class is equal.
+        """
         if not isinstance(other, type(self)):
             return False
         if self._name != other._name:
             return False
         if self._dimensions != other._dimensions:
             return False
+        if self._isCalibration != other._isCalibration:
+            return False
+        if self._parentStorageClass is not None and other._parentStorageClass is not None:
+            return self._parentStorageClass == other._parentStorageClass
+        else:
+            return self._parentStorageClassName == other._parentStorageClassName
+
+    def __eq__(self, other: Any) -> bool:
+        mostly_equal = self._equal_ignoring_storage_class(other)
+        if not mostly_equal:
+            return False
+
+        # Be careful not to force a storage class to import the corresponding
+        # python code.
         if self._storageClass is not None and other._storageClass is not None:
             if self._storageClass != other._storageClass:
                 return False
         else:
             if self._storageClassName != other._storageClassName:
                 return False
-        if self._isCalibration != other._isCalibration:
+        return True
+
+    def is_compatible_with(self, other: DatasetType) -> bool:
+        """Determine if the given `DatasetType` is compatible with this one.
+
+        Compatibility requires a matching name and dimensions and a storage
+        class for this dataset type that can convert the python type associated
+        with the other storage class to this python type.
+
+        Parameters
+        ----------
+        other : `DatasetType`
+            Dataset type to check.
+
+        Returns
+        -------
+        is_compatible : `bool`
+            Returns `True` if the other dataset type is either the same as this
+            or the storage class associated with the other can be converted to
+            this.
+        """
+        mostly_equal = self._equal_ignoring_storage_class(other)
+        if not mostly_equal:
             return False
-        if self._parentStorageClass is not None and other._parentStorageClass is not None:
-            return self._parentStorageClass == other._parentStorageClass
-        else:
-            return self._parentStorageClassName == other._parentStorageClassName
+
+        # If the storage class names match then they are compatible.
+        if self._storageClassName == other._storageClassName:
+            return True
+
+        # Now required to check the full storage class.
+        self_sc = self.storageClass
+        other_sc = other.storageClass
+
+        return self_sc.can_convert(other_sc)
 
     def __hash__(self) -> int:
         """Hash DatasetType instance.

diff --git a/python/lsst/daf/butler/core/storageClass.py b/python/lsst/daf/butler/core/storageClass.py
@@ -434,12 +434,19 @@ def can_convert(self, other: StorageClass) -> bool:
         Returns
         -------
         can : `bool`
-            `True` if the two storage classes are compatible.
+            `True` if this storage class has a registered converter for
+            the python type associated with the other storage class. That
+            converter will convert the other python type to the one associated
+            with this storage class.
         """
         if other.name == self.name:
             # Identical storage classes are compatible.
             return True
 
+        if issubclass(other.pytype, self.pytype):
+            # Storage classes have different names but the same python type.
+            return True
+
         for candidate_type in self.converters_by_type:
             if issubclass(other.pytype, candidate_type):
                 return True

diff --git a/python/lsst/daf/butler/datastores/inMemoryDatastore.py b/python/lsst/daf/butler/datastores/inMemoryDatastore.py
@@ -373,6 +373,10 @@ def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
         if ref.id is None:
             raise RuntimeError(f"Can not store unresolved DatasetRef {ref}")
 
+        # May need to coerce the in memory dataset to the correct
+        # python type, otherwise parameters may not work.
+        inMemoryDataset = ref.datasetType.storageClass.coerce_type(inMemoryDataset)
+
         self._validate_put_parameters(inMemoryDataset, ref)
 
         self.datasets[ref.id] = inMemoryDataset

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
@@ -166,6 +166,10 @@ def testEquality(self):
             DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
             DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent"),
         )
+        self.assertNotEqual(
+            DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=True),
+            DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="parent", isCalibration=False),
+        )
         self.assertNotEqual(
             DatasetType(
                 "a",
@@ -247,6 +251,23 @@ def testEquality(self):
             DatasetType("a.b", dimensionsA, "test_b", parentStorageClass="storageB"),
         )
 
+    def testCompatibility(self):
+        storageA = StorageClass("test_a", pytype=set, converters={"list": "builtins.set"})
+        storageB = StorageClass("test_b", pytype=list)
+        storageC = StorageClass("test_c", pytype=dict)
+        self.assertTrue(storageA.can_convert(storageB))
+        dimensionsA = self.universe.extract(["instrument"])
+
+        dA = DatasetType("a", dimensionsA, storageA)
+        dA2 = DatasetType("a", dimensionsA, storageB)
+        self.assertNotEqual(dA, dA2)
+        self.assertTrue(dA.is_compatible_with(dA))
+        self.assertTrue(dA.is_compatible_with(dA2))
+        self.assertFalse(dA2.is_compatible_with(dA))
+
+        dA3 = DatasetType("a", dimensionsA, storageC)
+        self.assertFalse(dA.is_compatible_with(dA3))
+
     def testJson(self):
         storageA = StorageClass("test_a")
         dimensionsA = self.universe.extract(["instrument"])

diff --git a/tests/test_storageClass.py b/tests/test_storageClass.py
@@ -25,6 +25,7 @@
 import unittest
 
 from lsst.daf.butler import StorageClass, StorageClassConfig, StorageClassDelegate, StorageClassFactory
+from lsst.daf.butler.tests import MetricsExample
 from lsst.utils.introspection import get_full_type_name
 
 """Tests related to the StorageClass infrastructure.
@@ -240,7 +241,7 @@ def testConverters(self):
 
         className = "TestConverters"
         converters = {
-            "lsst.daf.butler.tests.MetricsExample": "lsst.daf.butler.tests.MetricsExampleModel.from_metrics",
+            "lsst.daf.butler.tests.MetricsExample": "lsst.daf.butler.tests.MetricsExample.exportAsDict",
             # Add some entries that will fail to import.
             "lsst.daf.butler.bad.type": "lsst.daf.butler.tests.MetricsExampleModel.from_metrics",
             "lsst.daf.butler.tests.MetricsExampleModel": "lsst.daf.butler.bad.function",
@@ -271,6 +272,17 @@ def testConverters(self):
         converted = sc.coerce_type([1, 2, 3])
         self.assertEqual(converted, {"key": [1, 2, 3]})
 
+        # Convert Metrics using a named method converter.
+        metric = MetricsExample(summary={"a": 1}, data=[1, 2], output={"c": "e"})
+        converted = sc.coerce_type(metric)
+        self.assertEqual(converted["data"], [1, 2], converted)
+
+        # Check that python types matching is allowed.
+        sc4 = StorageClass("Test2", pytype=set)
+        self.assertTrue(sc2.can_convert(sc4))
+        converted = sc2.coerce_type({1, 2})
+        self.assertEqual(converted, {1, 2})
+
         # Try to coerce a type that is not supported.
         with self.assertRaises(TypeError):
             sc.coerce_type(set([1, 2, 3]))