Skip to content

Commit

Permalink
Rework how expression constants (null, ingest_date) are handled.
Browse files Browse the repository at this point in the history
  • Loading branch information
TallJimbo committed Dec 16, 2020
1 parent 15afa4a commit 82ccbb4
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 25 deletions.
8 changes: 1 addition & 7 deletions python/lsst/daf/butler/registry/queries/_structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@
from .expressions import Node, NormalForm, NormalFormExpression, ParserYacc # type: ignore


BIND_CONSTANTS = {"None": None, "NULL": None}


@immutable
class QueryWhereExpression:
"""A struct representing a parsed user-provided WHERE expression.
Expand All @@ -82,10 +79,7 @@ def __init__(self, expression: Optional[str] = None, bind: Optional[Mapping[str,
else:
self._tree = None
if bind is None:
bind = BIND_CONSTANTS
else:
bind = dict(bind)
bind.update(BIND_CONSTANTS)
bind = {}
self._bind = bind

def attach(
Expand Down
28 changes: 19 additions & 9 deletions python/lsst/daf/butler/registry/queries/expressions/categorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

__all__ = () # all symbols intentionally private; for internal package use.

import enum
from typing import (
Optional,
Tuple,
Expand All @@ -34,23 +35,32 @@
)


def categorizeIngestDateId(name: str) -> bool:
"""Categorize an identifier in a parsed expression as an ingest_date
attribute of a dataset table.
class ExpressionConstant(enum.Enum):
"""Enumeration for constants recognized in all expressions.
"""
NULL = "null"
INGEST_DATE = "ingest_date"


def categorizeConstant(name: str) -> Optional[ExpressionConstant]:
"""Categorize an identifier in a parsed expression as one of a few global
constants.
Parameters
----------
name : `str`
Identifier to categorize.
Identifier to categorize. Case-insensitive.
Returns
-------
isIngestDate : `bool`
True is returned if identifier name is ``ingest_date``.
categorized : `ExpressionConstant` or `None`
Enumeration value if the string represents a constant, `None`
otherwise.
"""
# TODO: this is hardcoded for now, may be better to extract it from schema
# but I do not know how to do it yet.
return name == "ingest_date"
try:
return ExpressionConstant(name.lower())
except ValueError:
return None


def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
Expand Down
11 changes: 6 additions & 5 deletions python/lsst/daf/butler/registry/queries/expressions/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from ...wildcards import EllipsisType, Ellipsis
from .parser import Node, TreeVisitor
from .normalForm import NormalForm, NormalFormVisitor
from .categorize import categorizeElementId, categorizeIngestDateId
from .categorize import categorizeElementId, categorizeConstant, ExpressionConstant

if TYPE_CHECKING:
import astropy.time
Expand Down Expand Up @@ -194,10 +194,11 @@ def visitIdentifier(self, name: str, node: Node) -> TreeSummary:
# Docstring inherited from TreeVisitor.visitIdentifier
if name in self.bindKeys:
return TreeSummary()
if categorizeIngestDateId(name):
return TreeSummary(
hasIngestDate=True,
)
constant = categorizeConstant(name)
if constant is ExpressionConstant.INGEST_DATE:
return TreeSummary(hasIngestDate=True)
elif constant is not None:
return TreeSummary()
element, column = categorizeElementId(self.universe, name)
if column is None:
assert isinstance(element, Dimension)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
)
from ....core.utils import iterable
from .parser import Node, TreeVisitor
from .categorize import categorizeElementId, categorizeIngestDateId
from .categorize import categorizeElementId, categorizeConstant, ExpressionConstant

if TYPE_CHECKING:
from .._structs import QueryColumns
Expand Down Expand Up @@ -873,13 +873,17 @@ def visitIdentifier(self, name: str, node: Node) -> WhereClauseConverter:
if isinstance(value, Timespan):
return TimespanWhereClauseConverter(self._TimespanReprClass.fromLiteral(value))
return ScalarWhereClauseConverter.fromLiteral(value)
if categorizeIngestDateId(name):
constant = categorizeConstant(name)
if constant is ExpressionConstant.INGEST_DATE:
assert self.columns.datasets is not None
assert self.columns.datasets.ingestDate is not None, "dataset.ingest_date is not in the query"
return ScalarWhereClauseConverter.fromExpression(
_TimestampColumnElement(self.columns.datasets.ingestDate),
Time,
)
elif constant is ExpressionConstant.NULL:
return ScalarWhereClauseConverter.fromLiteral(None)
assert constant is None, "Check for enum values should be exhaustive."
element, column = categorizeElementId(self.universe, name)
if column is not None:
if column == TimespanDatabaseRepresentation.NAME:
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1722,7 +1722,7 @@ def query(where):
# the expression.

# t1 is before the start of i1, so this should not include i1.
self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (None, t1)"))
self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
# t2 is exactly at the start of i2, but ends are exclusive, so these
# should not include i2.
self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
Expand All @@ -1734,7 +1734,7 @@ def query(where):
# t4 is exactly at the end of i4, so this should include i4.
self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
# i4's upper bound of t4 is exclusive so this should not include t4.
self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, None)"))
self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))

# Now some timespan vs. time scalar queries.
self.assertEqual(ids[:i2], query("visit.timespan < t2"))
Expand Down

0 comments on commit 82ccbb4

Please sign in to comment.