Skip to content

Commit

Permalink
Add support for collection docstrings.
Browse files Browse the repository at this point in the history
  • Loading branch information
TallJimbo committed Nov 10, 2020
1 parent 2b40b8a commit 93f6cfc
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 16 deletions.
42 changes: 38 additions & 4 deletions python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,8 @@ def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
"""
self._opaque[tableName].delete(**where)

def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None:
def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED,
doc: Optional[str] = None) -> None:
"""Add a new collection if one with the given name does not exist.
Parameters
Expand All @@ -421,13 +422,15 @@ def registerCollection(self, name: str, type: CollectionType = CollectionType.TA
The name of the collection to create.
type : `CollectionType`
Enum value indicating the type of collection to create.
doc : `str`, optional
Documentation string for the collection.
Notes
-----
This method cannot be called within transactions, as it needs to be
able to perform its own transaction to be concurrent.
"""
self._collections.register(name, type)
self._collections.register(name, type, doc=doc)

def getCollectionType(self, name: str) -> CollectionType:
"""Return an enumeration value indicating the type of the given
Expand All @@ -450,20 +453,22 @@ def getCollectionType(self, name: str) -> CollectionType:
"""
return self._collections.find(name).type

def registerRun(self, name: str) -> None:
def registerRun(self, name: str, doc: Optional[str] = None) -> None:
"""Add a new run if one with the given name does not exist.
Parameters
----------
name : `str`
The name of the run to create.
doc : `str`, optional
Documentation string for the collection.
Notes
-----
This method cannot be called within transactions, as it needs to be
able to perform its own transaction to be concurrent.
"""
self._collections.register(name, CollectionType.RUN)
self._collections.register(name, CollectionType.RUN, doc=doc)

@transactional
def removeCollection(self, name: str) -> None:
Expand Down Expand Up @@ -554,6 +559,35 @@ def setCollectionChain(self, parent: str, children: Any) -> None:
if children != record.children:
record.update(self._collections, children)

def getCollectionDocumentation(self, collection: str) -> Optional[str]:
"""Retrieve the documentation string for a collection.
Parameters
----------
name : `str`
Name of the collection.
Returns
-------
docs : `str` or `None`
Docstring for the collection with the given name.
"""
return self._collections.getDocumentation(self._collections.find(collection).key)

def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
"""Set the documentation string for a collection.
Parameters
----------
name : `str`
Name of the collection.
docs : `str` or `None`
Docstring for the collection with the given name; will replace any
existing docstring. Passing `None` will remove any existing
docstring.
"""
self._collections.setDocumentation(self._collections.find(collection).key, doc)

def registerDatasetType(self, datasetType: DatasetType) -> bool:
"""
Add a new `DatasetType` to the Registry.
Expand Down
18 changes: 17 additions & 1 deletion python/lsst/daf/butler/registry/collections/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,15 @@ def refresh(self) -> None:
for chain in chains:
chain.refresh(self)

def register(self, name: str, type: CollectionType) -> CollectionRecord:
def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord:
# Docstring inherited from CollectionManager.
record = self._getByName(name)
if record is None:
row, _ = self._db.sync(
self._tables.collection,
keys={"name": name},
compared={"type": int(type)},
extra={"doc": doc},
returning=[self._collectionIdName],
)
assert row is not None
Expand Down Expand Up @@ -425,6 +426,21 @@ def __getitem__(self, key: Any) -> CollectionRecord:
def __iter__(self) -> Iterator[CollectionRecord]:
yield from self._records.values()

def getDocumentation(self, key: Any) -> Optional[str]:
# Docstring inherited from CollectionManager.
sql = sqlalchemy.sql.select(
[self._tables.collection.columns.doc]
).select_from(
self._tables.collection
).where(
self._tables.collection.columns[self._collectionIdName] == key
)
return self._db.query(sql).scalar()

def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
# Docstring inherited from CollectionManager.
self._db.update(self._tables.collection, {self._collectionIdName: "key"}, {"key": key, "doc": doc})

def _setRecordCache(self, records: Iterable[CollectionRecord]) -> None:
"""Set internal record cache to contain given records,
old cached records will be removed.
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/registry/collections/nameKey.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@


# This has to be updated on every schema change
_VERSION = VersionTuple(1, 0, 0)
_VERSION = VersionTuple(2, 0, 0)


def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionTablesTuple:
Expand All @@ -58,6 +58,7 @@ def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionT
fields=[
_KEY_FIELD_SPEC,
ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False),
ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True),
],
),
run=makeRunTableSpec("name", sqlalchemy.String, tsRepr),
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/registry/collections/synthIntKey.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@


# This has to be updated on every schema change
_VERSION = VersionTuple(1, 0, 0)
_VERSION = VersionTuple(2, 0, 0)


def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionTablesTuple:
Expand All @@ -61,6 +61,7 @@ def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionT
_KEY_FIELD_SPEC,
ddl.FieldSpec("name", dtype=sqlalchemy.String, length=64, nullable=False),
ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False),
ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True),
],
unique=[("name",)],
),
Expand Down
34 changes: 33 additions & 1 deletion python/lsst/daf/butler/registry/interfaces/_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def refresh(self) -> None:
raise NotImplementedError()

@abstractmethod
def register(self, name: str, type: CollectionType) -> CollectionRecord:
def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord:
"""Ensure that a collection of the given name and type are present
in the layer this manager is associated with.
Expand All @@ -414,6 +414,9 @@ def register(self, name: str, type: CollectionType) -> CollectionRecord:
Name of the collection.
type : `CollectionType`
Enumeration value indicating the type of collection.
doc : `str`, optional
Documentation string for the collection. Ignored if the collection
already exists.
Returns
-------
Expand Down Expand Up @@ -532,3 +535,32 @@ def __iter__(self) -> Iterator[CollectionRecord]:
The record for a managed collection.
"""
raise NotImplementedError()

@abstractmethod
def getDocumentation(self, key: Any) -> Optional[str]:
"""Retrieve the documentation string for a collection.
Parameters
----------
key
Internal primary key value for the collection.
Returns
-------
docs : `str` or `None`
Docstring for the collection with the given key.
"""
raise NotImplementedError()

@abstractmethod
def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
"""Set the documentation string for a collection.
Parameters
----------
key
Internal primary key value for the collection.
docs : `str`, optional
Docstring for the collection with the given key.
"""
raise NotImplementedError()
8 changes: 7 additions & 1 deletion python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,11 @@ def testCollections(self):
self.loadData(registry, "datasets.yaml")
run1 = "imported_g"
run2 = "imported_r"
# Test setting a collection docstring after it has been created.
registry.setCollectionDocumentation(run1, "doc for run1")
self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
registry.setCollectionDocumentation(run1, None)
self.assertIsNone(registry.getCollectionDocumentation(run1))
datasetType = "bias"
# Find some datasets via their run's collection.
dataId1 = {"instrument": "Cam1", "detector": 1}
Expand All @@ -456,7 +461,8 @@ def testCollections(self):
self.assertIsNotNone(ref2)
# Associate those into a new collection,then look for them there.
tag1 = "tag1"
registry.registerCollection(tag1, type=CollectionType.TAGGED)
registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
registry.associate(tag1, [ref1, ref2])
self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/transfers/_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ def _finish(self) -> None:
for run in datasetsByRun.keys():
self._collections[run] = self._registry._collections.find(run)
for collectionName in self._computeSortedCollections():
self._backend.saveCollection(self._collections[collectionName])
doc = self._registry.getCollectionDocumentation(collectionName)
self._backend.saveCollection(self._collections[collectionName], doc)
# Sort the dataset types and runs before exporting to ensure
# reproducible order in export file.
for datasetType in sorted(self._datasets.keys()):
Expand Down
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/transfers/_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def saveDimensionData(self, element: DimensionElement, *data: DimensionRecord) -
raise NotImplementedError()

@abstractmethod
def saveCollection(self, record: CollectionRecord) -> None:
def saveCollection(self, record: CollectionRecord, doc: Optional[str]) -> None:
"""Export a collection.
This only exports the collection's own state, not its associations with
Expand All @@ -83,6 +83,8 @@ def saveCollection(self, record: CollectionRecord) -> None:
----------
record: `CollectionRecord`
Object representing the collection to export.
doc : `str` or `None`
Documentation string for the collection.
"""
raise NotImplementedError()

Expand Down
16 changes: 11 additions & 5 deletions python/lsst/daf/butler/transfers/_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
from ._interfaces import RepoExportBackend, RepoImportBackend


EXPORT_FORMAT_VERSION = VersionTuple(1, 0, 0)
EXPORT_FORMAT_VERSION = VersionTuple(1, 0, 1)
"""Export format version.
Files with a different major version or a newer minor version cannot be read by
Expand Down Expand Up @@ -98,13 +98,15 @@ def saveDimensionData(self, element: DimensionElement, *data: DimensionRecord) -
"records": data_dicts,
})

def saveCollection(self, record: CollectionRecord) -> None:
def saveCollection(self, record: CollectionRecord, doc: Optional[str]) -> None:
# Docstring inherited from RepoExportBackend.saveCollections.
data: Dict[str, Any] = {
"type": "collection",
"collection_type": record.type.name,
"name": record.name,
}
if doc is not None:
data["doc"] = doc
if isinstance(record, RunRecord):
data["host"] = record.host
data["timespan_begin"] = record.timespan.begin
Expand Down Expand Up @@ -219,6 +221,7 @@ def __init__(self, stream: IO, registry: Registry):
self.runs: Dict[str, Tuple[Optional[str], Timespan]] = {}
self.chains: Dict[str, List[str]] = {}
self.collections: Dict[str, CollectionType] = {}
self.collectionDocs: Dict[str, str] = {}
self.datasetTypes: NamedValueSet[DatasetType] = NamedValueSet()
self.dimensions: Mapping[DimensionElement, List[DimensionRecord]] = defaultdict(list)
self.tagAssociations: Dict[str, List[int]] = defaultdict(list)
Expand Down Expand Up @@ -264,6 +267,9 @@ def __init__(self, stream: IO, registry: Registry):
self.chains[data["name"]] = children
else:
self.collections[data["name"]] = collectionType
doc = data.get("doc")
if doc is not None:
self.collectionDocs[data["name"]] = doc
elif data["type"] == "run":
# Also support old form of saving a run with no extra info.
self.runs[data["name"]] = (None, Timespan(None, None))
Expand Down Expand Up @@ -310,12 +316,12 @@ def register(self) -> None:
for datasetType in self.datasetTypes:
self.registry.registerDatasetType(datasetType)
for run in self.runs:
self.registry.registerRun(run)
self.registry.registerRun(run, doc=self.collectionDocs.get(run))
# No way to add extra run info to registry yet.
for collection, collection_type in self.collections.items():
self.registry.registerCollection(collection, collection_type)
self.registry.registerCollection(collection, collection_type, doc=self.collectionDocs.get(run))
for chain, children in self.chains.items():
self.registry.registerCollection(chain, CollectionType.CHAINED)
self.registry.registerCollection(chain, CollectionType.CHAINED, doc=self.collectionDocs.get(run))
self.registry.setCollectionChain(chain, children)

def load(self, datastore: Optional[Datastore], *,
Expand Down

0 comments on commit 93f6cfc

Please sign in to comment.