diff --git a/eodatasets3/assemble.py b/eodatasets3/assemble.py index c1df3fcf..997f1e8d 100644 --- a/eodatasets3/assemble.py +++ b/eodatasets3/assemble.py @@ -25,6 +25,7 @@ ComplicatedNamingConventions, AccessoryDoc, Location, + ComplicatedNamingConventionsDerivatives, ) from eodatasets3.properties import EoFields from eodatasets3.validate import Level, ValidationMessage @@ -169,6 +170,10 @@ def __init__( self.names = ComplicatedNamingConventions.for_standard_dea(self) elif naming_conventions == "dea_s2": self.names = ComplicatedNamingConventions.for_standard_dea_s2(self) + elif naming_conventions == "dea_c3": + self.names = ComplicatedNamingConventionsDerivatives.for_c3_derivatives( + self + ) else: raise NotImplementedError("configurable naming conventions") diff --git a/eodatasets3/model.py b/eodatasets3/model.py index 92c1adff..0cfaa6a9 100644 --- a/eodatasets3/model.py +++ b/eodatasets3/model.py @@ -5,17 +5,16 @@ import affine import attr -from eodatasets3 import utils -from eodatasets3.properties import StacPropertyView, EoFields from ruamel.yaml.comments import CommentedMap from shapely.geometry.base import BaseGeometry +from eodatasets3 import utils +from eodatasets3.properties import StacPropertyView, EoFields # TODO: these need discussion. DEA_URI_PREFIX = "https://collections.dea.ga.gov.au" ODC_DATASET_SCHEMA_URL = "https://schemas.opendatacube.org/dataset" - # Either a local filesystem path or a string URI. # (the URI can use any scheme supported by rasterio, such as tar:// or https:// or ...) Location = Union[Path, str] @@ -373,6 +372,79 @@ def producer_abbreviated(self) -> Optional[str]: ) +class ComplicatedNamingConventionsDerivatives(ComplicatedNamingConventions): + """ + This class is inherited from ComplicatedNamingConventions + and overrides few attributes specific to C3 data processing for data other than ARD. + """ + + @classmethod + def for_c3_derivatives(cls, dataset: EoFields, uri=DEA_URI_PREFIX): + """ + The required fields for the c3 data processing are controlled here. + """ + return cls( + dataset=dataset, + base_product_uri=uri, + required_fields=( + "eo:platform", + "odc:dataset_version", + "odc:collection_number", + "odc:processing_datetime", + "odc:producer", + "odc:product_family", + "odc:region_code", + "dea:dataset_maturity", + ), + ) + + @property + def _org_collection_number(self) -> Optional[int]: + # Deliberately fail if collection_number is not defined in the config yaml + return int(self.dataset.collection_number) + + def _product_group(self, subname=None) -> str: + # Computues product group, e.g "ga_ls_wo_3" + # Deliberately fail if any of these attributes not found. + parts = [ + self.producer_abbreviated, + self.platform_abbreviated, + self.dataset.product_family, + ] + return "_".join(parts) + + def destination_folder(self, base: Path): + self._check_enough_properties_to_name() + parts = [self.product_name, self.dataset.dataset_version.replace(".", "-")] + parts.extend(utils.subfolderise(self.dataset.region_code)) + parts.extend(f"{self.dataset.datetime:%Y/%m/%d}".split("/")) + return base.joinpath(*parts) + + def _dataset_label(self, sub_name: str = None): + """ + Responsible for producing the string of product name, regioncode, datetime and maturity + ex: 'ga_ls_wo_3_090081_1998-07-30_interim' + + Redundant parameter sub_name is required, since the parent class and other invocations wants it so. + """ + parts = [ + self.product_name, + self._displayable_region_code, + f"{self.dataset.datetime:%Y-%m-%d}", + self.dataset.maturity, + ] + return "_".join(parts) + + @property + def platform_abbreviated(self) -> Optional[str]: + # For now from Alchemist the platform is always landsat for C3 processing + if "ls" not in self.dataset.platform: + raise ValueError( + "ComplicatedNamingConventionsDerivatives assumes lansat as platform" + ) + return "ls" + + @attr.s(auto_attribs=True, slots=True) class DatasetDoc(EoFields): id: UUID = None diff --git a/eodatasets3/properties.py b/eodatasets3/properties.py index dd29849f..68f53bd4 100644 --- a/eodatasets3/properties.py +++ b/eodatasets3/properties.py @@ -7,10 +7,10 @@ from typing import Tuple, Dict, Optional, Any, Mapping, Callable, Union import ciso8601 -from eodatasets3.utils import default_utc - from ruamel.yaml.timestamp import TimeStamp as RuamelTimeStamp +from eodatasets3.utils import default_utc + class FileFormat(Enum): GeoTIFF = 1 @@ -226,6 +226,8 @@ class StacPropertyView(collections.abc.Mapping): "landsat:wrs_path": int, "landsat:wrs_row": int, "odc:dataset_version": None, + "odc:collection_number": int, + "odc:naming_conventions": None, # Not strict as there may be more added in ODC... "odc:file_format": of_enum_type(FileFormat, strict=False), "odc:processing_datetime": datetime_type, @@ -413,10 +415,34 @@ def dataset_version(self) -> str: """ return self.properties.get("odc:dataset_version") + @property + def collection_number(self) -> str: + """ + The version of the collection. + Eg: + metadata: + product_family: wofs + dataset_version: 1.6.0 + collection_number: 3 + """ + return self.properties.get("odc:collection_number", "0") + @dataset_version.setter def dataset_version(self, value): self.properties["odc:dataset_version"] = value + @collection_number.setter + def collection_number(self, value): + self.properties["odc:collection_number"] = value + + @property + def naming_conventions(self) -> str: + return self.properties.get("odc:naming_conventions") + + @naming_conventions.setter + def naming_conventions(self, value): + self.properties["odc:naming_conventions"] = value + @property def product_family(self) -> str: """ diff --git a/tests/integration/test_assemble.py b/tests/integration/test_assemble.py index 6a70565a..0799f63c 100644 --- a/tests/integration/test_assemble.py +++ b/tests/integration/test_assemble.py @@ -5,13 +5,13 @@ import numpy import pytest +from ruamel import yaml + from eodatasets3 import DatasetAssembler from eodatasets3.images import GridSpec from eodatasets3.model import DatasetDoc -from ruamel import yaml -from tests.integration.common import assert_same_as_file - from tests import assert_file_structure +from tests.integration.common import assert_same_as_file def test_dea_style_package( @@ -309,6 +309,7 @@ def test_s2_naming_conventions(tmp_path: Path): # The s2 naming conventions have an extra subfolder of the datatake start time. metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix() + assert metadata_path_offset == ( "ga_s2am_blueberries_1/Oz/2018/11/04/20170822T015626/" "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.odc-metadata.yaml" @@ -399,3 +400,35 @@ def test_complain_about_missing_fields(tmp_path: Path, l1_ls8_folder: Path): f"Expected field {needed_field_name} to " f"be listed as mandatory in the error message" ) + + +def test_dea_c3_naming_conventions(tmp_path: Path): + """ + A sample scene for Alchemist C3 processing that tests the naming conventions. + """ + p = DatasetAssembler(tmp_path, naming_conventions="dea_c3") + p.platform = "ga_ls5t" + p.datetime = datetime(1998, 7, 30) + p.product_family = "wo" + p.processed = "1998-07-30T12:23:23" + p.maturity = "interim" + p.producer = "ga.gov.au" + p.region_code = "090081" + + # Try missing few fields and expect ValueError + with pytest.raises( + ValueError, match="Need more properties to fulfill naming conventions." + ): + p.done() + + # Put back the missed ones + p.dataset_version = "1.6.0" + p.collection_number = "3" + + # Success case + dataset_id, metadata_path = p.done() + metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix() + assert ( + metadata_path_offset + == "ga_ls_wo_3/1-6-0/090/081/1998/07/30/ga_ls_wo_3_090081_1998-07-30_interim.odc-metadata.yaml" + )