Skip to content

Commit

Permalink
Merge pull request #1491 from opendatacube/better_archiving_error
Browse files Browse the repository at this point in the history
Better archiving error
  • Loading branch information
Ariana-B committed Sep 13, 2023
2 parents 24b046e + ee8551f commit 9924bac
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 2 deletions.
23 changes: 22 additions & 1 deletion datacube/index/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from datacube.index.exceptions import TransactionException
from datacube.index.fields import Field
from datacube.model import Dataset, MetadataType, Range
from datacube.model import DatasetType as Product
from datacube.model import Product
from datacube.utils import cached_property, jsonify_document, read_documents, InvalidDocException
from datacube.utils.changes import AllowPolicy, Change, Offset, DocumentMismatchError, check_doc_unchanged
from datacube.utils.generic import thread_local_cache
Expand Down Expand Up @@ -922,28 +922,49 @@ def find_less_mature(self, ds: Dataset, delta: int = 500) -> Iterable[Dataset]:
"""
less_mature = []
assert delta >= 0

def check_maturity_information(dataset, props):
# check that the dataset metadata includes all maturity-related properties
# passing in the required props to enable greater extensibility should it be needed
for prop in props:
if hasattr(dataset.metadata, prop) and (getattr(dataset.metadata, prop) is not None):
return
raise ValueError(
f"Dataset {dataset.id} is missing property {prop} required for maturity check"
)

check_maturity_information(ds, ["region_code", "time", "dataset_maturity"])

# 'expand' the date range by `delta` milliseconds to give a bit more leniency in datetime comparison
expanded_time_range = Range(ds.metadata.time.begin - timedelta(milliseconds=delta),
ds.metadata.time.end + timedelta(milliseconds=delta))
dupes = self.search(product=ds.product.name,
region_code=ds.metadata.region_code,
time=expanded_time_range)

for dupe in dupes:
if dupe.id == ds.id:
continue

# only need to check that dupe has dataset maturity, missing/null region_code and time
# would already have been filtered out during the search query
check_maturity_information(dupe, ["dataset_maturity"])

if dupe.metadata.dataset_maturity == ds.metadata.dataset_maturity:
# Duplicate has the same maturity, which one should be archived is unclear
raise ValueError(
f"A dataset with the same maturity as dataset {ds.id} already exists, "
f"with id: {dupe.id}"
)

if dupe.metadata.dataset_maturity < ds.metadata.dataset_maturity:
# Duplicate is more mature than dataset
# Note that "final" < "nrt"
raise ValueError(
f"A more mature version of dataset {ds.id} already exists, with id: "
f"{dupe.id} and maturity: {dupe.metadata.dataset_maturity}"
)

less_mature.append(dupe)
return less_mature

Expand Down
1 change: 1 addition & 0 deletions docs/about/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ v1.8.next
- Mark ``--confirm-ignore-lineage``, ``--auto-add-lineage``, and ``--verify-lineage`` as deprecated or to be deprecated (:pull:`1472`)
- Default delta values in ``archive_less_mature`` and ``find_less_mature`` (:pull:`1472`)
- Fix SQLAlchemy calls and pin jsonschema version to suppress deprecation warnings (:pull:`1476`)
- Throw a better error if a dataset is not compatible with ``archive_less_mature`` logic (:pull:`1491`)

v1.8.15 (11th July 2023)
========================
Expand Down
10 changes: 10 additions & 0 deletions integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,16 @@ def final_dataset(index, extended_eo3_metadata_type, ls8_eo3_product, final_data
*final_dataset_doc)


@pytest.fixture
def ds_no_region(index, extended_eo3_metadata_type, ls8_eo3_product, final_dataset_doc):
doc_no_region = deepcopy(final_dataset_doc)
doc_no_region[0]["properties"]["odc:region_code"] = None
return doc_to_ds_no_add(
index,
ls8_eo3_product.name,
*doc_no_region)


@pytest.fixture
def ga_s2am_ard3_final(index, eo3_sentinel_metadata_type, ga_s2am_ard_3_product, ga_s2am_ard_3_final_doc):
return doc_to_ds_no_add(
Expand Down
12 changes: 11 additions & 1 deletion integration_tests/index/test_index_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_archive_datasets(index, local_config, ls8_eo3_dataset):
assert not indexed_dataset.is_archived


def test_archive_less_mature(index, final_dataset, nrt_dataset):
def test_archive_less_mature(index, final_dataset, nrt_dataset, ds_no_region):
# case 1: add nrt then final; nrt should get archived
index.datasets.add(nrt_dataset, with_lineage=False, archive_less_mature=0)
index.datasets.get(nrt_dataset.id).is_active
Expand All @@ -110,6 +110,16 @@ def test_archive_less_mature(index, final_dataset, nrt_dataset):
index.datasets.add(nrt_dataset, with_lineage=False, archive_less_mature=0)


def test_cannot_search_for_less_mature(index, nrt_dataset, ds_no_region):
# if a dataset is missing a property required for finding less mature datasets,
# it should error
index.datasets.add(nrt_dataset, with_lineage=False, archive_less_mature=0)
index.datasets.get(nrt_dataset.id).is_active
assert ds_no_region.metadata.region_code is None
with pytest.raises(ValueError, match="region_code"):
index.datasets.add(ds_no_region, with_lineage=False, archive_less_mature=0)


def test_archive_less_mature_approx_timestamp(index, ga_s2am_ard3_final, ga_s2am_ard3_interim):
# test archive_less_mature where there's a slight difference in timestamps
index.datasets.add(ga_s2am_ard3_interim, with_lineage=False)
Expand Down

0 comments on commit 9924bac

Please sign in to comment.