Merge pull request #953 from lsst/tickets/DM-42636

DM-42636: add group and day_obs first-class dimensions
lsst · Feb 29, 2024 · 7c1f99e · 7c1f99e
2 parents b6936e0 + 58e02cb
commit 7c1f99e
Show file tree

Hide file tree

Showing 35 changed files with 18,608 additions and 8,370 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -64,7 +64,8 @@ jobs:
       - name: Install dependencies
         shell: bash -l {0}
         run: |
-          pip install -r requirements.txt
+          pip install uv
+          uv pip install -r requirements.txt
 
       # We have two cores so we can speed up the testing with xdist
       - name: Install pytest packages
@@ -82,7 +83,7 @@ jobs:
       - name: Build and install
         shell: bash -l {0}
         run: |
-          pip install -v --no-deps -e .
+          uv pip install -v --no-deps -e .
 
       - name: Run tests
         shell: bash -l {0}

diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml
@@ -25,10 +25,18 @@ jobs:
       - name: Install sqlite
         run: sudo apt-get install sqlite libyaml-dev
 
-      - name: Update pip/wheel infrastructure
+      - name: Set the VIRTUAL_ENV variable for uv to work
+        run: |
+          echo "VIRTUAL_ENV=${Python_ROOT_DIR}" >> $GITHUB_ENV
+
+      - name: Install uv
         run: |
           python -m pip install --upgrade pip
-          pip install wheel
+          pip install uv
+
+      - name: Update wheel infrastructure
+        run: |
+          uv pip install wheel
 
       - name: Install postgresql (server)
         run: |
@@ -37,16 +45,16 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install -r requirements.txt
+          uv pip install -r requirements.txt
 
       - name: Build and install
-        run: pip install --no-deps -v .
+        run: uv pip install --no-deps -v -e .
 
       - name: Install graphviz
         run: sudo apt-get install graphviz
 
       - name: Install documenteer
-        run: pip install 'documenteer[pipelines]==0.8.2'
+        run: uv pip install 'documenteer[pipelines]==0.8.2'
 
       - name: Build documentation
         working-directory: ./doc

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -69,16 +69,25 @@ jobs:
         with:
           python-version: "3.11"
 
-      - name: Install dependencies
+
+      - name: Set the VIRTUAL_ENV variable for uv to work
+        run: |
+          echo "VIRTUAL_ENV=${Python_ROOT_DIR}" >> $GITHUB_ENV
+
+      - name: Install uv
         run: |
           python -m pip install --upgrade pip
-          pip install --upgrade setuptools wheel build
-          pip install -r requirements.txt
-          pip install testcontainers
+          pip install uv
+
+      - name: Install dependencies
+        run: |
+          uv pip install --upgrade setuptools wheel build
+          uv pip install -r requirements.txt
+          uv pip install testcontainers
 
       - name: Install Butler client
         run: |
-          pip install -v --no-deps -e .
+          uv pip install -v --no-deps -e .
 
       - name: Run smoke test
         run: |

diff --git a/doc/changes/DM-42636.api.rst b/doc/changes/DM-42636.api.rst
@@ -0,0 +1 @@
+Added ``Timespan.from_day_obs()`` to construct a 24-hour time span from an observing day specified as a YYYYMMDD integer.
diff --git a/doc/changes/DM-42636.feature.rst b/doc/changes/DM-42636.feature.rst
@@ -0,0 +1,4 @@
+* Released ``DimensionUniverse`` version 6
+  * ``group`` and ``day_obs`` are now true dimensions.
+  * ``exposure`` now implies both ``group`` and ``day_obs``, and ``visit`` implies ``day_obs``.
+* Exported YAML files using universe version 1 and newer can be imported and converted to universe version 6.
diff --git a/doc/lsst.daf.butler/dimensions.rst b/doc/lsst.daf.butler/dimensions.rst
@@ -81,3 +81,33 @@ The same is not true of regions (especially regions on the sphere), at least not
 Instead, spatial regions for dimensions are stored as opaque, ``base64``-encoded strings in the database, but we also create an overlap table for each spatial dimension element that relates it to a special "common" skypix dimension (see `DimensionUniverse.commonSkyPix`).
 We can then use a regular index on the common skypix ID to make spatial joins efficient, to the extent that proximity in skypix ID corresponds to proximity on sky.
 In practice, these IDs correspond to some space-filling curve, which yields good typical-case performance with a reasonable choice of pixelization level, but no guarantees on worst-case performance.
+
+.. _lsst.daf.butler-dimensions_universe_history:
+
+Dimension Universe Change History
+---------------------------------
+
+Each project can have their own universe definition.
+The dimension universe data model evolves over time.
+A universe is uniquely identified by its vesion number and its namespace.
+The default universe had a namespace of ``daf_butler``.
+
+The default namespace has had the following version changes:
+
+1. Added ``healpix`` dimension.
+   Removed ``visit.seeing``.
+   Updated storage classes for many dimensions.
+   Defined governor dimensions (``instrument`` and ``skymap``).
+   Added ``day_obs`` and ``seq_num`` to ``visit`` and ``exposure``.
+   Renamed ``exposure.name`` to ``exposure.obs_id``.
+2. Modified how visits are defined
+   (Added the default ``visit_system`` to ``instrument``,
+   added ``visit_system_membership`` dimension, removed ``visit_system`` from ``visit`` dimension).
+   Added ``has_simulated``, ``azimuth``, ``seq_start``, and ``seq_end`` to ``exposure``.
+   Added ``seq_num``, ``azimuth`` to ``visit``.
+3. Updated the length of the ``observation_reason`` field from 32 to 68 in ``exposure`` and ``visit``.
+4. Added "populated_by" information to visit fields to allow related dimensions to be discovered automatically.
+5. Changed the length of the ``instrument.name`` field from 16 to 32 characters.
+6. Made ``day_obs`` and ``group`` full dimensions.
+
+Prior to October 2020 there were no version numbers for the dimension universe.
diff --git a/python/lsst/daf/butler/_timespan.py b/python/lsst/daf/butler/_timespan.py
@@ -58,6 +58,9 @@
     from .registry import Registry
 
 
+_ONE_DAY = astropy.time.TimeDelta("1d", scale="tai")
+
+
 class _SpecialTimespanBound(enum.Enum):
     """Enumeration to provide a singleton value for empty timespan bounds.
 
@@ -243,6 +246,43 @@ def fromInstant(cls, time: astropy.time.Time) -> Timespan:
             )
         return Timespan(None, None, _nsec=(nsec, nsec + 1))
 
+    @classmethod
+    def from_day_obs(cls, day_obs: int, offset: int = 0) -> Timespan:
+        """Construct a timespan for a 24-hour period based on the day of
+        observation.
+
+        Parameters
+        ----------
+        day_obs : `int`
+            The day of observation as an integer of the form YYYYMMDD.
+            The year must be at least 1970 since these are converted to TAI.
+        offset : `int`, optional
+            Offset in seconds from TAI midnight to be applied.
+
+        Returns
+        -------
+        day_span : `Timespan`
+            A timespan corresponding to a full day of observing.
+
+        Notes
+        -----
+        If the observing day is 20240229 and the offset is 12 hours the
+        resulting time span will be 2024-02-29T12:00 to 2024-03-01T12:00.
+        """
+        if day_obs < 1970_00_00 or day_obs > 1_0000_00_00:
+            raise ValueError(f"day_obs must be in form yyyyMMDD and be newer than 1970, not {day_obs}.")
+
+        ymd = str(day_obs)
+        t1 = astropy.time.Time(f"{ymd[0:4]}-{ymd[4:6]}-{ymd[6:8]}T00:00:00", format="isot", scale="tai")
+
+        if offset != 0:
+            t_delta = astropy.time.TimeDelta(offset, format="sec", scale="tai")
+            t1 += t_delta
+
+        t2 = t1 + _ONE_DAY
+
+        return Timespan(t1, t2)
+
     @property
     @cached_getter
     def begin(self) -> TimespanBound:

diff --git a/python/lsst/daf/butler/configs/dimensions.yaml b/python/lsst/daf/butler/configs/dimensions.yaml
@@ -1,4 +1,4 @@
-version: 5
+version: 6
 namespace: daf_butler
 skypix:
   # 'common' is the skypix system and level used to relate all other spatial
@@ -134,6 +134,30 @@ elements:
       nested:
         cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage
 
+  group:
+    doc: >
+      A group of observations defined at the time they were observed by the
+      acquisition system.
+    keys:
+      - name: name
+        type: string
+        length: 64
+    requires: [instrument]
+    storage:
+      cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage
+
+  day_obs:
+    doc: >
+      A day and night of observations that rolls over during daylight hours.
+      The identifier is an decimal integer-concatenated date, i.e. YYYYMMDD,
+      with the exact rollover time observatory-dependent.
+    keys:
+      - name: id
+        type: int
+    requires: [instrument]
+    storage:
+      cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage
+
   visit:
     doc: >
       A sequence of observations processed together, comprised of one or
@@ -148,14 +172,8 @@ elements:
         type: string
         length: 64
     requires: [instrument]
-    implies: [physical_filter]
+    implies: [physical_filter, day_obs]
     metadata:
-      - name: day_obs
-        type: int
-        doc: >
-          Day of observation as defined by the observatory (YYYYMMDD format).
-          If a visit crosses multiple days this entry will be the earliest
-          day of any of the exposures that make up the visit.
       - name: seq_num
         type: int
         doc: >
@@ -219,7 +237,7 @@ elements:
         type: string
         length: 64
     requires: [instrument]
-    implies: [physical_filter]
+    implies: [physical_filter, group, day_obs]
     metadata:
       - name: exposure_time
         type: float
@@ -237,10 +255,6 @@ elements:
         doc: >
           The reason this observation was taken. (e.g. science,
           filter scan, unknown).
-      - name: day_obs
-        type: int
-        doc: >
-          Day of observation as defined by the observatory (YYYYMMDD format).
       - name: seq_num
         type: int
         doc: >
@@ -255,17 +269,6 @@ elements:
         type: int
         doc: >
           Oldest sequence number that might be related to this exposure.
-      - name: group_name
-        type: string
-        length: 64
-        doc: >
-          String group identifier associated with this exposure by the
-          acquisition system.
-      - name: group_id
-        type: int
-        doc: >
-          Integer group identifier associated with this exposure by the
-          acquisition system.
       - name: target_name
         type: string
         length: 64
@@ -431,7 +434,7 @@ topology:
     skymap_regions: [patch, tract]
 
   temporal:
-    observation_timespans: [exposure, visit]
+    observation_timespans: [exposure, visit, day_obs]
 
 packers:
   visit_detector:

diff --git a/python/lsst/daf/butler/dimensions/_records.py b/python/lsst/daf/butler/dimensions/_records.py
@@ -112,9 +112,12 @@ def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSe
     if definition.spatial:
         members["region"] = (str, ...)
 
-    # mypy does not seem to like create_model
+    # For the new derived class name need to convert to camel case.
+    # so "day_obs" -> "DayObs".
+    derived_name = "".join([part.capitalize() for part in definition.name.split("_")])
+
     model = create_model(
-        f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
+        f"SpecificSerializedDimensionRecord{derived_name}",
         __base__=SpecificSerializedDimensionRecord,
         **members,  # type: ignore
     )

diff --git a/python/lsst/daf/butler/registry/queries/_structs.py b/python/lsst/daf/butler/registry/queries/_structs.py
@@ -225,8 +225,8 @@ def parse_element(cls, order_by: Iterable[str], element: DimensionElement) -> Or
             if name[0] == "-":
                 ascending = False
                 name = name[1:]
-            column = categorizeElementOrderByName(element, name)
-            term = cls._make_term(element, column, ascending)
+            found_element, column = categorizeElementOrderByName(element, name)
+            term = cls._make_term(found_element, column, ascending)
             terms.append(term)
         return cls(terms)
 
@@ -303,9 +303,9 @@ def __init__(self, order_by: Iterable[str], element: DimensionElement):
             if name[0] == "-":
                 ascending = False
                 name = name[1:]
-            column = categorizeElementOrderByName(element, name)
+            found_element, column = categorizeElementOrderByName(element, name)
             self.order_by_columns.append(
-                OrderByClauseColumn(element=element, column=column, ordering=ascending)
+                OrderByClauseColumn(element=found_element, column=column, ordering=ascending)
             )
 
     order_by_columns: Iterable[OrderByClauseColumn]

diff --git a/python/lsst/daf/butler/registry/queries/expressions/categorize.py b/python/lsst/daf/butler/registry/queries/expressions/categorize.py
@@ -241,6 +241,10 @@ def categorizeOrderByName(dimensions: DimensionGroup, name: str) -> tuple[Dimens
         elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
             # Primary key is optional
             field_name = None
+        elif field_name in element.dimensions.names:
+            # Something like visit.physical_filter, which which want to remap
+            # to just "physical_filter".
+            return dimensions.universe[field_name], None
         else:
             if not (
                 field_name in element.metadata.names
@@ -251,7 +255,7 @@ def categorizeOrderByName(dimensions: DimensionGroup, name: str) -> tuple[Dimens
     return element, field_name
 
 
-def categorizeElementOrderByName(element: DimensionElement, name: str) -> str | None:
+def categorizeElementOrderByName(element: DimensionElement, name: str) -> tuple[DimensionElement, str | None]:
     """Categorize an identifier in an ORDER BY clause for a single element.
 
     Parameters
@@ -263,6 +267,8 @@ def categorizeElementOrderByName(element: DimensionElement, name: str) -> str |
 
     Returns
     -------
+    element : `DimensionElement`
+        The `DimensionElement` the identifier refers to.
     column : `str` or `None`
         The name of a column in the table for ``element``, or `None` if
         ``element`` is a `Dimension` and the requested column is its primary
@@ -300,17 +306,21 @@ def categorizeElementOrderByName(element: DimensionElement, name: str) -> str |
             # Must be a dimension element
             if not isinstance(element, Dimension):
                 raise ValueError(f"Element '{element}' is not a dimension.")
+        elif name in element.dimensions.names and name != element.name:
+            # Something like visit.physical_filter, which which want to remap
+            # to just "physical_filter".
+            return element.universe[name], None
         else:
             # Can be a metadata name or any of the keys, but primary key needs
             # to be treated the same as a reference to the dimension name
             # itself.
             if isinstance(element, Dimension):
                 if name == element.primaryKey.name:
-                    return None
+                    return element, None
                 elif name in element.uniqueKeys.names:
-                    return name
+                    return element, name
             if name in element.metadata.names:
-                return name
+                return element, name
             raise ValueError(f"Field '{name}' does not exist in '{element}'.")
     else:
         # qualified name, must be a dimension element and a field
@@ -334,4 +344,4 @@ def categorizeElementOrderByName(element: DimensionElement, name: str) -> str |
             ):
                 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
 
-    return field_name
+    return element, field_name
diff --git a/python/lsst/daf/butler/registry/tests/_database.py b/python/lsst/daf/butler/registry/tests/_database.py
@@ -1257,7 +1257,7 @@ def test_aggregate(self) -> None:
         pixelization = Mq3cPixelization(10)
         start = astropy.time.Time("2020-01-01T00:00:00", format="isot", scale="tai")
         offset = astropy.time.TimeDelta(60, format="sec")
-        timespans = [Timespan(begin=start + offset * n, end=start + offset * n + 1) for n in range(3)]
+        timespans = [Timespan(begin=start + offset * n, end=start + offset * (n + 1)) for n in range(3)]
         ts_cls = db.getTimespanRepresentation()
         ts_col = ts_cls.from_columns(t.columns)
         db.insert(