diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c889db04..2cd3b7345 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Added nanosecond precision datetime filtering that ensures nanosecond precision support in filtering by datetime. This is configured via the `USE_DATETIME_NANOS` environment variable, while maintaining microseconds compatibility for datetime precision. [#529](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/529) + ### Changed ### Fixed diff --git a/README.md b/README.md index fc5525887..f2a7f498e 100644 --- a/README.md +++ b/README.md @@ -366,6 +366,7 @@ You can customize additional settings in your `.env` file: | `STAC_DEFAULT_ITEM_LIMIT` | Configures the default number of STAC items returned when no limit parameter is specified in the request. | `10` | Optional | | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | +| `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional | | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | diff --git a/stac_fastapi/core/stac_fastapi/core/datetime_utils.py b/stac_fastapi/core/stac_fastapi/core/datetime_utils.py index d5f992de8..587f2d0b4 100644 --- a/stac_fastapi/core/stac_fastapi/core/datetime_utils.py +++ b/stac_fastapi/core/stac_fastapi/core/datetime_utils.py @@ -2,6 +2,7 @@ from datetime import datetime, timezone +from stac_fastapi.core.utilities import get_bool_env from stac_fastapi.types.rfc3339 import rfc3339_str_to_datetime @@ -15,27 +16,71 @@ def format_datetime_range(date_str: str) -> str: Returns: str: A string formatted as 'YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ', with '..' used if any element is None. """ - - def normalize(dt): - """Normalize datetime string and preserve millisecond precision.""" - dt = dt.strip() - if not dt or dt == "..": - return ".." - dt_obj = rfc3339_str_to_datetime(dt) - dt_utc = dt_obj.astimezone(timezone.utc) - return dt_utc.isoformat(timespec="milliseconds").replace("+00:00", "Z") - - if not isinstance(date_str, str): - return "../.." - - if "/" not in date_str: - return f"{normalize(date_str)}/{normalize(date_str)}" - - try: - start, end = date_str.split("/", 1) - except Exception: - return "../.." - return f"{normalize(start)}/{normalize(end)}" + use_datetime_nanos = get_bool_env("USE_DATETIME_NANOS", default=True) + + if use_datetime_nanos: + MIN_DATE_NANOS = datetime(1970, 1, 1, tzinfo=timezone.utc) + MAX_DATE_NANOS = datetime(2262, 4, 11, 23, 47, 16, 854775, tzinfo=timezone.utc) + + def normalize(dt): + """Normalize datetime string and preserve nano second precision.""" + dt = dt.strip() + if not dt or dt == "..": + return ".." + dt_utc = rfc3339_str_to_datetime(dt).astimezone(timezone.utc) + if dt_utc < MIN_DATE_NANOS: + dt_utc = MIN_DATE_NANOS + if dt_utc > MAX_DATE_NANOS: + dt_utc = MAX_DATE_NANOS + return dt_utc.isoformat(timespec="auto").replace("+00:00", "Z") + + if not isinstance(date_str, str): + return f"{MIN_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}/{MAX_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}" + + if "/" not in date_str: + return f"{normalize(date_str)}/{normalize(date_str)}" + + try: + start, end = date_str.split("/", 1) + except Exception: + return f"{MIN_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}/{MAX_DATE_NANOS.isoformat(timespec='auto').replace('+00:00','Z')}" + + normalized_start = normalize(start) + normalized_end = normalize(end) + + if normalized_start == "..": + normalized_start = MIN_DATE_NANOS.isoformat(timespec="auto").replace( + "+00:00", "Z" + ) + if normalized_end == "..": + normalized_end = MAX_DATE_NANOS.isoformat(timespec="auto").replace( + "+00:00", "Z" + ) + + return f"{normalized_start}/{normalized_end}" + + else: + + def normalize(dt): + """Normalize datetime string and preserve millisecond precision.""" + dt = dt.strip() + if not dt or dt == "..": + return ".." + dt_obj = rfc3339_str_to_datetime(dt) + dt_utc = dt_obj.astimezone(timezone.utc) + return dt_utc.isoformat(timespec="milliseconds").replace("+00:00", "Z") + + if not isinstance(date_str, str): + return "../.." + + if "/" not in date_str: + return f"{normalize(date_str)}/{normalize(date_str)}" + + try: + start, end = date_str.split("/", 1) + except Exception: + return "../.." + return f"{normalize(start)}/{normalize(end)}" # Borrowed from pystac - https://github.com/stac-utils/pystac/blob/f5e4cf4a29b62e9ef675d4a4dac7977b09f53c8f/pystac/utils.py#L370-L394 diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py index d6b68e858..efae12080 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py @@ -8,8 +8,10 @@ import re from datetime import date from datetime import datetime as datetime_type +from datetime import timezone from typing import Dict, Optional, Union +from stac_fastapi.core.utilities import get_bool_env from stac_fastapi.types.rfc3339 import DateTimeType logger = logging.getLogger(__name__) @@ -37,38 +39,103 @@ def return_date( always containing 'gte' and 'lte' keys. """ result: Dict[str, Optional[str]] = {"gte": None, "lte": None} - + use_datetime_nanos = get_bool_env("USE_DATETIME_NANOS", default=True) if interval is None: return result - if isinstance(interval, str): - if "/" in interval: - parts = interval.split("/") - result["gte"] = ( - parts[0] if parts[0] != ".." else datetime_type.min.isoformat() + "Z" - ) - result["lte"] = ( - parts[1] - if len(parts) > 1 and parts[1] != ".." - else datetime_type.max.isoformat() + "Z" + if use_datetime_nanos: + MIN_DATE_NANOS = datetime_type(1970, 1, 1, tzinfo=timezone.utc) + MAX_DATE_NANOS = datetime_type( + 2262, 4, 11, 23, 47, 16, 854775, tzinfo=timezone.utc + ) + + if isinstance(interval, str): + if "/" in interval: + parts = interval.split("/") + result["gte"] = ( + parts[0] if parts[0] != ".." else MIN_DATE_NANOS.isoformat() + "Z" + ) + result["lte"] = ( + parts[1] + if len(parts) > 1 and parts[1] != ".." + else MAX_DATE_NANOS.isoformat() + "Z" + ) + else: + converted_time = interval if interval != ".." else None + result["gte"] = result["lte"] = converted_time + return result + + if isinstance(interval, datetime_type): + dt_utc = ( + interval.astimezone(timezone.utc) + if interval.tzinfo + else interval.replace(tzinfo=timezone.utc) ) - else: - converted_time = interval if interval != ".." else None - result["gte"] = result["lte"] = converted_time + if dt_utc < MIN_DATE_NANOS: + dt_utc = MIN_DATE_NANOS + elif dt_utc > MAX_DATE_NANOS: + dt_utc = MAX_DATE_NANOS + datetime_iso = dt_utc.isoformat() + result["gte"] = result["lte"] = datetime_iso + elif isinstance(interval, tuple): + start, end = interval + # Ensure datetimes are converted to UTC and formatted with 'Z' + if start: + start_utc = ( + start.astimezone(timezone.utc) + if start.tzinfo + else start.replace(tzinfo=timezone.utc) + ) + if start_utc < MIN_DATE_NANOS: + start_utc = MIN_DATE_NANOS + elif start_utc > MAX_DATE_NANOS: + start_utc = MAX_DATE_NANOS + result["gte"] = start_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + if end: + end_utc = ( + end.astimezone(timezone.utc) + if end.tzinfo + else end.replace(tzinfo=timezone.utc) + ) + if end_utc < MIN_DATE_NANOS: + end_utc = MIN_DATE_NANOS + elif end_utc > MAX_DATE_NANOS: + end_utc = MAX_DATE_NANOS + result["lte"] = end_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + return result - if isinstance(interval, datetime_type): - datetime_iso = interval.isoformat() - result["gte"] = result["lte"] = datetime_iso - elif isinstance(interval, tuple): - start, end = interval - # Ensure datetimes are converted to UTC and formatted with 'Z' - if start: - result["gte"] = start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - if end: - result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - - return result + else: + if isinstance(interval, str): + if "/" in interval: + parts = interval.split("/") + result["gte"] = ( + parts[0] + if parts[0] != ".." + else datetime_type.min.isoformat() + "Z" + ) + result["lte"] = ( + parts[1] + if len(parts) > 1 and parts[1] != ".." + else datetime_type.max.isoformat() + "Z" + ) + else: + converted_time = interval if interval != ".." else None + result["gte"] = result["lte"] = converted_time + return result + + if isinstance(interval, datetime_type): + datetime_iso = interval.isoformat() + result["gte"] = result["lte"] = datetime_iso + elif isinstance(interval, tuple): + start, end = interval + # Ensure datetimes are converted to UTC and formatted with 'Z' + if start: + result["gte"] = start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + if end: + result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + + return result def extract_date(date_str: str) -> date: diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index cb0c8f2d5..129194da6 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -142,7 +142,7 @@ class Geometry(Protocol): # noqa "type": "object", "properties": { # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - "datetime": {"type": "date"}, + "datetime": {"type": "date_nanos"}, "start_datetime": {"type": "date"}, "end_datetime": {"type": "date"}, "created": {"type": "date"}, diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index 04f3a1521..38d7e5978 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -608,10 +608,10 @@ async def test_datetime_bad_interval(app_client, txn_client, ctx): await create_item(txn_client, third_item) dt_formats = [ - "1920-02-04T12:30:22+00:00/1920-02-06T12:30:22+00:00", - "1920-02-04T12:30:22.00Z/1920-02-06T12:30:22.00Z", - "1920-02-04T12:30:22Z/1920-02-06T12:30:22Z", - "1920-02-04T12:30:22.00+00:00/1920-02-06T12:30:22.00+00:00", + "1970-02-04T12:30:22+00:00/1970-02-06T12:30:22+00:00", + "1970-02-04T12:30:22.00Z/1970-02-06T12:30:22.00Z", + "1970-02-04T12:30:22Z/1970-02-06T12:30:22Z", + "1970-02-04T12:30:22.00+00:00/1970-02-06T12:30:22.00+00:00", ] for dt in dt_formats: