Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-27490: Add support for collection docstrings. #430

Merged
merged 1 commit into from
Nov 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 38 additions & 4 deletions python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,8 @@ def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
"""
self._opaque[tableName].delete(**where)

def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None:
def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED,
doc: Optional[str] = None) -> None:
"""Add a new collection if one with the given name does not exist.

Parameters
Expand All @@ -421,13 +422,15 @@ def registerCollection(self, name: str, type: CollectionType = CollectionType.TA
The name of the collection to create.
type : `CollectionType`
Enum value indicating the type of collection to create.
doc : `str`, optional
Documentation string for the collection.

Notes
-----
This method cannot be called within transactions, as it needs to be
able to perform its own transaction to be concurrent.
"""
self._collections.register(name, type)
self._collections.register(name, type, doc=doc)

def getCollectionType(self, name: str) -> CollectionType:
"""Return an enumeration value indicating the type of the given
Expand All @@ -450,20 +453,22 @@ def getCollectionType(self, name: str) -> CollectionType:
"""
return self._collections.find(name).type

def registerRun(self, name: str) -> None:
def registerRun(self, name: str, doc: Optional[str] = None) -> None:
"""Add a new run if one with the given name does not exist.

Parameters
----------
name : `str`
The name of the run to create.
doc : `str`, optional
Documentation string for the collection.

Notes
-----
This method cannot be called within transactions, as it needs to be
able to perform its own transaction to be concurrent.
"""
self._collections.register(name, CollectionType.RUN)
self._collections.register(name, CollectionType.RUN, doc=doc)

@transactional
def removeCollection(self, name: str) -> None:
Expand Down Expand Up @@ -554,6 +559,35 @@ def setCollectionChain(self, parent: str, children: Any) -> None:
if children != record.children:
record.update(self._collections, children)

def getCollectionDocumentation(self, collection: str) -> Optional[str]:
"""Retrieve the documentation string for a collection.

Parameters
----------
name : `str`
Name of the collection.

Returns
-------
docs : `str` or `None`
Docstring for the collection with the given name.
"""
return self._collections.getDocumentation(self._collections.find(collection).key)

def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
"""Set the documentation string for a collection.

Parameters
----------
name : `str`
Name of the collection.
docs : `str` or `None`
Docstring for the collection with the given name; will replace any
existing docstring. Passing `None` will remove any existing
docstring.
"""
self._collections.setDocumentation(self._collections.find(collection).key, doc)

def registerDatasetType(self, datasetType: DatasetType) -> bool:
"""
Add a new `DatasetType` to the Registry.
Expand Down
18 changes: 17 additions & 1 deletion python/lsst/daf/butler/registry/collections/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,15 @@ def refresh(self) -> None:
for chain in chains:
chain.refresh(self)

def register(self, name: str, type: CollectionType) -> CollectionRecord:
def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord:
# Docstring inherited from CollectionManager.
record = self._getByName(name)
if record is None:
row, _ = self._db.sync(
self._tables.collection,
keys={"name": name},
compared={"type": int(type)},
extra={"doc": doc},
returning=[self._collectionIdName],
)
assert row is not None
Expand Down Expand Up @@ -425,6 +426,21 @@ def __getitem__(self, key: Any) -> CollectionRecord:
def __iter__(self) -> Iterator[CollectionRecord]:
yield from self._records.values()

def getDocumentation(self, key: Any) -> Optional[str]:
# Docstring inherited from CollectionManager.
sql = sqlalchemy.sql.select(
[self._tables.collection.columns.doc]
).select_from(
self._tables.collection
).where(
self._tables.collection.columns[self._collectionIdName] == key
)
return self._db.query(sql).scalar()

def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
# Docstring inherited from CollectionManager.
self._db.update(self._tables.collection, {self._collectionIdName: "key"}, {"key": key, "doc": doc})

def _setRecordCache(self, records: Iterable[CollectionRecord]) -> None:
"""Set internal record cache to contain given records,
old cached records will be removed.
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/registry/collections/nameKey.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@


# This has to be updated on every schema change
_VERSION = VersionTuple(1, 0, 0)
_VERSION = VersionTuple(2, 0, 0)


def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionTablesTuple:
Expand All @@ -58,6 +58,7 @@ def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionT
fields=[
_KEY_FIELD_SPEC,
ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False),
ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True),
],
),
run=makeRunTableSpec("name", sqlalchemy.String, tsRepr),
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/registry/collections/synthIntKey.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@


# This has to be updated on every schema change
_VERSION = VersionTuple(1, 0, 0)
_VERSION = VersionTuple(2, 0, 0)


def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionTablesTuple:
Expand All @@ -61,6 +61,7 @@ def _makeTableSpecs(tsRepr: Type[TimespanDatabaseRepresentation]) -> CollectionT
_KEY_FIELD_SPEC,
ddl.FieldSpec("name", dtype=sqlalchemy.String, length=64, nullable=False),
ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False),
ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True),
],
unique=[("name",)],
),
Expand Down
34 changes: 33 additions & 1 deletion python/lsst/daf/butler/registry/interfaces/_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def refresh(self) -> None:
raise NotImplementedError()

@abstractmethod
def register(self, name: str, type: CollectionType) -> CollectionRecord:
def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord:
"""Ensure that a collection of the given name and type are present
in the layer this manager is associated with.

Expand All @@ -414,6 +414,9 @@ def register(self, name: str, type: CollectionType) -> CollectionRecord:
Name of the collection.
type : `CollectionType`
Enumeration value indicating the type of collection.
doc : `str`, optional
Documentation string for the collection. Ignored if the collection
already exists.

Returns
-------
Expand Down Expand Up @@ -532,3 +535,32 @@ def __iter__(self) -> Iterator[CollectionRecord]:
The record for a managed collection.
"""
raise NotImplementedError()

@abstractmethod
def getDocumentation(self, key: Any) -> Optional[str]:
"""Retrieve the documentation string for a collection.

Parameters
----------
key
Internal primary key value for the collection.

Returns
-------
docs : `str` or `None`
Docstring for the collection with the given key.
"""
raise NotImplementedError()

@abstractmethod
def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
"""Set the documentation string for a collection.

Parameters
----------
key
Internal primary key value for the collection.
docs : `str`, optional
Docstring for the collection with the given key.
"""
raise NotImplementedError()
8 changes: 7 additions & 1 deletion python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,11 @@ def testCollections(self):
self.loadData(registry, "datasets.yaml")
run1 = "imported_g"
run2 = "imported_r"
# Test setting a collection docstring after it has been created.
registry.setCollectionDocumentation(run1, "doc for run1")
self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
registry.setCollectionDocumentation(run1, None)
self.assertIsNone(registry.getCollectionDocumentation(run1))
datasetType = "bias"
# Find some datasets via their run's collection.
dataId1 = {"instrument": "Cam1", "detector": 1}
Expand All @@ -456,7 +461,8 @@ def testCollections(self):
self.assertIsNotNone(ref2)
# Associate those into a new collection,then look for them there.
tag1 = "tag1"
registry.registerCollection(tag1, type=CollectionType.TAGGED)
registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
registry.associate(tag1, [ref1, ref2])
self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/transfers/_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ def _finish(self) -> None:
for run in datasetsByRun.keys():
self._collections[run] = self._registry._collections.find(run)
for collectionName in self._computeSortedCollections():
self._backend.saveCollection(self._collections[collectionName])
doc = self._registry.getCollectionDocumentation(collectionName)
self._backend.saveCollection(self._collections[collectionName], doc)
# Sort the dataset types and runs before exporting to ensure
# reproducible order in export file.
for datasetType in sorted(self._datasets.keys()):
Expand Down
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/transfers/_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def saveDimensionData(self, element: DimensionElement, *data: DimensionRecord) -
raise NotImplementedError()

@abstractmethod
def saveCollection(self, record: CollectionRecord) -> None:
def saveCollection(self, record: CollectionRecord, doc: Optional[str]) -> None:
"""Export a collection.

This only exports the collection's own state, not its associations with
Expand All @@ -83,6 +83,8 @@ def saveCollection(self, record: CollectionRecord) -> None:
----------
record: `CollectionRecord`
Object representing the collection to export.
doc : `str` or `None`
Documentation string for the collection.
"""
raise NotImplementedError()

Expand Down
16 changes: 11 additions & 5 deletions python/lsst/daf/butler/transfers/_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
from ._interfaces import RepoExportBackend, RepoImportBackend


EXPORT_FORMAT_VERSION = VersionTuple(1, 0, 0)
EXPORT_FORMAT_VERSION = VersionTuple(1, 0, 1)
"""Export format version.

Files with a different major version or a newer minor version cannot be read by
Expand Down Expand Up @@ -98,13 +98,15 @@ def saveDimensionData(self, element: DimensionElement, *data: DimensionRecord) -
"records": data_dicts,
})

def saveCollection(self, record: CollectionRecord) -> None:
def saveCollection(self, record: CollectionRecord, doc: Optional[str]) -> None:
# Docstring inherited from RepoExportBackend.saveCollections.
data: Dict[str, Any] = {
"type": "collection",
"collection_type": record.type.name,
"name": record.name,
}
if doc is not None:
data["doc"] = doc
if isinstance(record, RunRecord):
data["host"] = record.host
data["timespan_begin"] = record.timespan.begin
Expand Down Expand Up @@ -219,6 +221,7 @@ def __init__(self, stream: IO, registry: Registry):
self.runs: Dict[str, Tuple[Optional[str], Timespan]] = {}
self.chains: Dict[str, List[str]] = {}
self.collections: Dict[str, CollectionType] = {}
self.collectionDocs: Dict[str, str] = {}
self.datasetTypes: NamedValueSet[DatasetType] = NamedValueSet()
self.dimensions: Mapping[DimensionElement, List[DimensionRecord]] = defaultdict(list)
self.tagAssociations: Dict[str, List[int]] = defaultdict(list)
Expand Down Expand Up @@ -264,6 +267,9 @@ def __init__(self, stream: IO, registry: Registry):
self.chains[data["name"]] = children
else:
self.collections[data["name"]] = collectionType
doc = data.get("doc")
if doc is not None:
self.collectionDocs[data["name"]] = doc
elif data["type"] == "run":
# Also support old form of saving a run with no extra info.
self.runs[data["name"]] = (None, Timespan(None, None))
Expand Down Expand Up @@ -310,12 +316,12 @@ def register(self) -> None:
for datasetType in self.datasetTypes:
self.registry.registerDatasetType(datasetType)
for run in self.runs:
self.registry.registerRun(run)
self.registry.registerRun(run, doc=self.collectionDocs.get(run))
# No way to add extra run info to registry yet.
for collection, collection_type in self.collections.items():
self.registry.registerCollection(collection, collection_type)
self.registry.registerCollection(collection, collection_type, doc=self.collectionDocs.get(run))
for chain, children in self.chains.items():
self.registry.registerCollection(chain, CollectionType.CHAINED)
self.registry.registerCollection(chain, CollectionType.CHAINED, doc=self.collectionDocs.get(run))
self.registry.setCollectionChain(chain, children)

def load(self, datastore: Optional[Datastore], *,
Expand Down