Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Postgis search fix #1475

Merged
merged 4 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 15 additions & 10 deletions datacube/drivers/postgis/_api.py
Expand Up @@ -182,34 +182,38 @@ def get_dataset_fields(metadata_type_definition):
return fields


def non_native_fields(mdt_metadata):
return {
name: field
for name, field in get_dataset_fields(mdt_metadata).items()
if not isinstance(field, NativeField)
}


def extract_dataset_search_fields(ds_metadata, mdt_metadata):
"""
:param ds_metdata: A Dataset metadata document
:param mdt_metadata: The corresponding metadata-type definition document

:return: A dictionary mapping search field names to (type_name, value) tuples.
"""
fields = {
name: field
for name, field in get_dataset_fields(mdt_metadata).items()
if not isinstance(field, NativeField)
}
return extract_dataset_fields(ds_metadata, fields)
return extract_dataset_fields(ds_metadata, non_native_fields(mdt_metadata))


def extract_dataset_fields(ds_metadata, fields):
"""
:param ds_metdata: A Dataset metadata document
:param mdt_metadata: The corresponding metadata-type definition document
:param fields: A dictionary of field names to Field objects

:return: A dictionary mapping search field names to (type_name, value) tuples.
"""
result = {}
for field_name, field in fields.items():
try:
fld_type = field.type_name
fld_val = field.search_value_to_alchemy(field.extract(ds_metadata))
result[field_name] = (fld_type, fld_val)
raw_val = field.extract(ds_metadata)
sqla_val = field.search_value_to_alchemy(raw_val)
result[field_name] = (fld_type, sqla_val)
except UnindexableValue:
continue
return result
Expand Down Expand Up @@ -640,7 +644,8 @@ def search_datasets(self, expressions,
select_query = self.search_datasets_query(expressions, source_exprs,
select_fields, with_source_ids,
limit, geom=geom)
_LOG.debug("search_datasets SQL: %s", str(select_query))
str_qry = str(select_query)
_LOG.debug("search_datasets SQL: %s", str_qry)
return self._connection.execute(select_query)

def bulk_simple_dataset_search(self, products=None, batch_size=0):
Expand Down
4 changes: 3 additions & 1 deletion datacube/drivers/postgis/_fields.py
Expand Up @@ -266,7 +266,9 @@ def search_value_to_alchemy(self, value):
)

def between(self, low, high):
return ValueBetweenExpression(self, low, high)
# Numeric fields actually stored as ranges in current schema.
# return ValueBetweenExpression(self, low, high)
return RangeBetweenExpression(self, low, high, _range_class=NumericRange)

def parse_value(self, value):
return Decimal(value)
Expand Down
9 changes: 5 additions & 4 deletions datacube/index/postgis/_datasets.py
Expand Up @@ -17,7 +17,7 @@

from datacube.drivers.postgis._fields import SimpleDocField, DateDocField
from datacube.drivers.postgis._schema import Dataset as SQLDataset, search_field_map
from datacube.drivers.postgis._api import extract_dataset_search_fields
from datacube.drivers.postgis._api import non_native_fields, extract_dataset_fields
from datacube.utils.uris import split_uri
from datacube.drivers.postgis._spatial import generate_dataset_spatial_values, extract_geometry_from_eo3_projection

Expand Down Expand Up @@ -226,10 +226,11 @@ def _add_batch(self, batch_ds: Iterable[DatasetTuple], cache: Mapping[str, Any])
if values is not None:
batch["spatial_indexes"][crs].append(values)
if prod.metadata_type.name in cache:
search_field_vals = cache[prod.metadata_type.name]
search_fields = cache[prod.metadata_type.name]
else:
search_field_vals = extract_dataset_search_fields(metadata_doc, prod.metadata_type.definition)
cache[prod.metadata_type.name] = search_field_vals
search_fields = non_native_fields(prod.metadata_type.definition)
cache[prod.metadata_type.name] = search_fields
search_field_vals = extract_dataset_fields(metadata_doc, search_fields)
for fname, finfo in search_field_vals.items():
ftype, fval = finfo
if isinstance(fval, Range):
Expand Down
1 change: 1 addition & 0 deletions docs/about/whats_new.rst
Expand Up @@ -18,6 +18,7 @@ v1.9.next
- Rename `gbox` to `geobox` in parameter names (:pull:`1441`)
- Remove executor API (:pull:`1462`)
- Remove ingestion methods, `GridWorkflow` and `Tile` classes (:pull:`1465`)
- Fix postgis queries for numeric custom search fields (:pull:`1475`)


v1.8.next
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/data/eo3/ls8_dataset3.yaml
Expand Up @@ -37,7 +37,7 @@ properties:
dea:dataset_maturity: final
dtr:end_datetime: '2013-04-04T00:59:02.900824Z'
dtr:start_datetime: '2013-04-04T00:58:34.682275Z'
eo:cloud_cover: 2.513757420052948e+01
eo:cloud_cover: 25.13757420052948
eo:gsd: 1.5e+01 # Ground sample distance (m)
eo:instrument: OLI_TIRS
eo:platform: landsat-8
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/data/eo3/ls8_dataset4.yaml
Expand Up @@ -36,7 +36,7 @@ grids:
properties:
datetime: '2013-07-21T00:57:26.432563Z'
dea:dataset_maturity: final
eo:cloud_cover: 1.828773330949106e+01
eo:cloud_cover: 18.28773330949106
eo:gsd: 1.5e+01 # Ground sample distance (m)
eo:instrument: OLI_TIRS
eo:platform: landsat-8
Expand Down
37 changes: 37 additions & 0 deletions integration_tests/index/test_search_eo3.py
Expand Up @@ -58,6 +58,43 @@ def test_search_dataset_equals_eo3(index: Index, ls8_eo3_dataset: Dataset):
)


def test_search_dataset_range_eo3(index: Index,
ls8_eo3_dataset: Dataset,
ls8_eo3_dataset2: Dataset,
ls8_eo3_dataset3: Dataset,
ls8_eo3_dataset4: Dataset,
):
# Less Than
datasets = index.datasets.search_eager(
product=ls8_eo3_dataset.product.name,
cloud_cover=Range(None, 50.0)
)
assert len(datasets) == 2
ids = [ds.id for ds in datasets]
assert ls8_eo3_dataset3.id in ids
assert ls8_eo3_dataset4.id in ids

# Greater than
datasets = index.datasets.search_eager(
product=ls8_eo3_dataset.product.name,
cloud_cover=Range(50.0, None)
)
assert len(datasets) == 2
ids = [ds.id for ds in datasets]
assert ls8_eo3_dataset.id in ids
assert ls8_eo3_dataset2.id in ids

# Full Range comparison
datasets = index.datasets.search_eager(
product=ls8_eo3_dataset.product.name,
cloud_cover=Range(20.0, 55.0)
)
assert len(datasets) == 2
ids = [ds.id for ds in datasets]
assert ls8_eo3_dataset2.id in ids
assert ls8_eo3_dataset3.id in ids


def test_search_dataset_by_metadata_eo3(index: Index, ls8_eo3_dataset: Dataset) -> None:
datasets = index.datasets.search_by_metadata(
{"properties": {"eo:platform": "landsat-8", "eo:instrument": "OLI_TIRS"}}
Expand Down