lsst · pschella · Mar 20, 2018 · Mar 4, 2018 · Mar 16, 2018 · Mar 16, 2018
diff --git a/config/registry/default_schema.yaml b/config/registry/default_schema.yaml
diff --git a/python/lsst/daf/butler/butler.py b/python/lsst/daf/butler/butler.py
@@ -69,104 +69,66 @@ def __init__(self, config):
         if self.run is None:
             self.run = self.registry.makeRun(self.config['run'])
 
-    def getDirect(self, ref, parameters=None):
-        """Load a `Dataset` or a slice thereof from a `DatasetRef`.
-
-        Unlike `Butler.get`, this method allows `Datasets` outside the Butler's `Collection` to be read as
-        long as the `DatasetRef` that identifies them can be obtained separately.
+    def put(self, obj, datasetType, dataId, producer=None):
+        """Store and register a dataset.
 
         Parameters
         ----------
-        ref : `DatasetRef`
-            A pointer to the `Dataset` to load.
-        parameters : `dict`
-            `StorageClass`-specific parameters that can be used to obtain a slice of the `Dataset`.
+        obj : `object`
+            The dataset.
+        datasetType : `DatasetType` instance or `str`
+            The `DatasetType`.
+        dataId : `dict`
+            An identifier with `DataUnit` names and values.
+        producer : `Quantum`, optional
+            The producer.
 
         Returns
         -------
-        inMemoryDataset : `InMemoryDataset`
-            The requested `Dataset`.
+        ref : `DatasetRef`
+            A reference to the stored dataset.
         """
-        parent = self.datastore.get(ref.uri, ref.datasetType.storageClass, parameters) if ref.uri else None
-        children = {name: self.datastore.get(childRef, parameters)
-                    for name, childRef in ref.components.items()}
-        return ref.datasetType.storageClass.assemble(parent, children)
+        datasetType = self.registry.getDatasetType(datasetType)
+        ref = self.registry.addDataset(datasetType, dataId, run=self.run, producer=producer)
+        # self.datastore.put(obj, ref)
+        return ref
 
-    def get(self, ref, parameters=None):
-        """Load a `Dataset` or a slice thereof from the Butler's `Collection`.
+    def getDirect(self, ref):
+        """Retrieve a stored dataset.
+
+        Unlike `Butler.get`, this method allows datasets outside the Butler's collection to be read as
+        long as the `DatasetRef` that identifies them can be obtained separately.
 
         Parameters
         ----------
         ref : `DatasetRef`
-            The `Dataset` to retrieve.
-        parameters : `dict`
-            A dictionary of `StorageClass`-specific parameters that can be
-            used to obtain a slice of the `Dataset`.
+            Reference to an already stored dataset.
 
         Returns
         -------
-        dataset : `InMemoryDataset`
-            The requested `Dataset`.
+        obj : `object`
+            The dataset.
         """
-        ref = self.registry.find(self.run.collection, ref)
-        if ref:
-            return self.getDirect(ref, parameters)
-        else:
-            return None  # No Dataset found
+        # Currently a direct pass-through to `Datastore.get` but this should
+        # change for composites.
+        return self.datastore.get(ref)
 
-    def put(self, ref, inMemoryDataset, producer=None):
-        """Write a `Dataset`.
+    def get(self, datasetType, dataId):
+        """Retrieve a stored dataset.
 
         Parameters
         ----------
-        ref : `DatasetRef`
-            The `Dataset` being stored.
-        inMemoryDataset : `InMemoryDataset`
-            The `Dataset` to store.
-        producer : `Quantum`
-            The producer of this `Dataset`.  May be ``None`` for some
-            `Registry` instances.
-            ``producer.run`` must match ``self.config['run']``.
+        datasetType : `DatasetType` instance or `str`
+            The `DatasetType`.
+        dataId : `dict`
+            A `dict` of `DataUnit` name, value pairs that label the `DatasetRef`
+            within a Collection.
 
         Returns
         -------
-        datasetRef : `DatasetRef`
-            The registered (and stored) dataset.
-        """
-        ref = self.registry.expand(ref)
-        run = self.run
-        assert(producer is None or run == producer.run)
-        storageHint = ref.makeStorageHint(run)
-        uri, components = self.datastore.put(inMemoryDataset, ref.datasetType.storageClass,
-                                             storageHint, ref.datasetType.name)
-        return self.registry.addDataset(ref, uri, components, producer=producer, run=run)
-
-    def markInputUsed(self, quantum, ref):
-        """Mark a `Dataset` as having been "actually" (not just
-        predicted-to-be) used by a `Quantum`.
-
-        Parameters
-        ----------
-        quantum : `Quantum`
-            The dependent `Quantum`.
-        ref : `DatasetRef`
-            The `Dataset` that is a true dependency of ``quantum``.
-        """
-        ref = self.registry.find(self.run.collection, ref)
-        self.registry.markInputUsed(ref, quantum)
-
-    def unlink(self, *refs):
-        """Remove dataset from collection.
-
-        Remove the `Dataset`\ s associated with the given `DatasetRef`\ s
-        from the `Butler`\ 's collection, and signal that they may be deleted
-        from storage if they are not referenced by any other collection.
-
-        Parameters
-        ----------
-        refs : `list` of `DatasetRef`
-            List of refs for `Dataset`\ s to unlink.
+        obj : `object`
+            The dataset.
         """
-        refs = [self.registry.find(self.run.collection, ref) for ref in refs]
-        for ref in self.registry.disassociate(self.run.collection, refs, remove=True):
-            self.datastore.remove(ref.uri)
+        datasetType = self.registry.getDatasetType(datasetType)
+        ref = self.registry.find(datasetType, dataId)
+        return self.getDirect(ref)
diff --git a/python/lsst/daf/butler/core/datasets.py b/python/lsst/daf/butler/core/datasets.py
@@ -96,51 +96,50 @@ class DatasetRef(object):
     ----------
     datasetType : `DatasetType`
         The `DatasetType` for this `Dataset`.
-    units : `dict`
+    dataId : `dict`
         Dictionary where the keys are `DataUnit` names and the values are
-        `DataUnit` instances.
+        `DataUnit` values.
+    id : `int`, optional
+        A unique identifier.
+        Normally set to `None` and assigned by `Registry`
     """
 
-    __slots__ = ("_type", "_producer", "_predictedConsumers", "_actualConsumers")
-    _currentId = -1
+    __slots__ = ("_id", "_datasetType", "_dataId", "_producer",
+                 "_predictedConsumers", "_actualConsumers", "_components",
+                 "_assembler")
 
-    @classmethod
-    def getNewId(cls):
-        """Generate a new Dataset ID number.
-
-        ..todo::
-            This is a temporary workaround that will probably disapear in
-            the future, when a solution is found to the problem of
-            autoincrement compound primary keys in SQLite.
-        """
-        cls._currentId += 1
-        return cls._currentId
-
-    def __init__(self, datasetType, units):
-        units = datasetType.units.conform(units)
-        super().__init__(
-            datasetType.name,
-            **{unit.__class__.__name__: unit.value for unit in units}
-        )
+    def __init__(self, datasetType, dataId, id=None):
+        assert isinstance(datasetType, DatasetType)
+        self._id = id
         self._datasetType = datasetType
-        self._units = units
+        self._dataId = dataId
         self._producer = None
         self._predictedConsumers = dict()
         self._actualConsumers = dict()
+        self._components = dict()
+        self._assembler = None
+
+    @property
+    def id(self):
+        """Primary key of the dataset (`int`)
+
+        Typically assigned by `Registry`.
+        """
+        return self._id
 
     @property
     def datasetType(self):
         """The `DatasetType` associated with the `Dataset` the `DatasetRef`
         points to.
         """
-        return self._type
+        return self._datasetType
 
     @property
-    def units(self):
-        """A `tuple` of `DataUnit` instances that label the `DatasetRef`
+    def dataId(self):
+        """A `dict` of `DataUnit` name, value pairs that label the `DatasetRef`
         within a Collection.
         """
-        return self._units
+        return self._dataId
 
     @property
     def producer(self):
@@ -173,14 +172,20 @@ def actualConsumers(self):
         """
         return _safeMakeMappingProxyType(self._actualConsumers)
 
-    def makeStorageHint(self, run, template=None):
-        """Construct a storage hint by filling in template with the Collection
-        collection and the values in the units tuple.
+    @property
+    def components(self):
+        """Named `DatasetRef` components.
+
+        Read-only; update via `Registry.attachComponent()`.
+        """
+        return _safeMakeMappingProxyType(self._components)
+
+    @property
+    def assembler(self):
+        """Fully-qualified name of an importable Assembler object that can be
+        used to construct this Dataset from its components.
 
-        Although a `Dataset` may belong to multiple Collections, only the one
-        corresponding to its `Run` is used.
+        `None` for datasets that are not virtual composites.
+        Read-only; update via `Registry.setAssembler()`.
         """
-        if template is None:
-            template = self.datasetType.template
-        units = {unit.__class__.__name__: unit.value for unit in self.units}
-        return template.format(DatasetType=self.datasetType.name, Run=run.collection, **units)
+        return self._assembler
diff --git a/python/lsst/daf/butler/core/run.py b/python/lsst/daf/butler/core/run.py
@@ -29,46 +29,32 @@ class Run(object):
 
     Parameters
     ----------
-    runId : `int`
-        ID to associate with this run.
-    registryId : `int`
-        ID associated with this `Registry`.
+    execution : `int`
+        A unique identifier for this Run, which is also the associated
+        Execution record.
     collection : `str`
-        Collection to use for this run.
-    environment : `str`
-        Something about the environment.
-    pipeline : `str`
-        Something about the pipeline.
+        A Collection name with which all Datasets in this Run are initially
+        associated, also used as a human-readable name for this Run.
+    environment : `DatasetRef`
+        A reference to a dataset that contains a description of
+        the software environment (e.g. versions) used for this Run.
+    pipeline : `DatasetRef`
+        A reference to a dataset that contains a serialization of
+        the SuperTask Pipeline used for this Run (if any).
     """
-    _currentId = 0
-
-    @classmethod
-    def getNewId(cls):
-        cls._currentId += 1
-        return cls._currentId
-
-    __slots__ = ("_runId", "_registryId", "_collection", "_environment", "_pipeline")
+    __slots__ = ("_execution", "_collection", "_environment", "_pipeline")
     __eq__ = slotValuesAreEqual
     __hash__ = slotValuesToHash
 
-    def __init__(self, runId, registryId, collection, environment, pipeline):
-        self._runId = runId
-        self._registryId = registryId
+    def __init__(self, execution, collection, environment, pipeline):
+        self._execution = execution
         self._collection = collection
         self._environment = environment
         self._pipeline = pipeline
 
     @property
-    def pkey(self):
-        return (self._runId, self.registryId)
-
-    @property
-    def runId(self):
-        return self._runId
-
-    @property
-    def registryId(self):
-        return self._registryId
+    def execution(self):
+        return self._execution
 
     @property
     def collection(self):