Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various minor improvements #1472

Merged
merged 3 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions datacube/index/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def archive(self, ids: Iterable[DSID]) -> None:
:param Iterable[Union[str,UUID]] ids: list of dataset ids to archive
"""

def archive_less_mature(self, ds: Dataset, delta: int) -> None:
def archive_less_mature(self, ds: Dataset, delta: int = 500) -> None:
"""
Archive less mature versions of a dataset

Expand All @@ -912,7 +912,7 @@ def archive_less_mature(self, ds: Dataset, delta: int) -> None:
for lm_ds in less_mature_ids:
_LOG.info(f"Archived less mature dataset: {lm_ds}")

def find_less_mature(self, ds: Dataset, delta: int) -> Iterable[Dataset]:
def find_less_mature(self, ds: Dataset, delta: int = 500) -> Iterable[Dataset]:
"""
Find less mature versions of a dataset

Expand Down
3 changes: 1 addition & 2 deletions datacube/index/hl.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,10 @@ def match(doc: Mapping[str, Any]) -> bool:
if matches(doc, rule):
return rule.product

relevant_doc = {k: v for k, v in doc.items() if k in rule.signature}
raise BadMatch('Dataset metadata did not match product signature.'
'\nDataset definition:\n %s\n'
'\nProduct signature:\n %s\n'
% (json.dumps(relevant_doc, indent=4),
% (json.dumps(doc, indent=4),
json.dumps(rule.signature, indent=4)))

return match
Expand Down
23 changes: 7 additions & 16 deletions datacube/scripts/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ def _resolve_uri(uri, doc):
if isinstance(loc, (list, tuple)):
if len(loc) > 0:
return loc[0]
else:
return uri

return uri

Expand Down Expand Up @@ -144,19 +142,22 @@ def mk_dataset(ds, uri):
'you can supply several by repeating this option with a new product name'),
multiple=True)
@click.option('--auto-add-lineage/--no-auto-add-lineage', is_flag=True, default=True,
help=('Default behaviour is to automatically add lineage datasets if they are missing from the database, '
help=('WARNING: will be deprecated in datacube v1.9.\n'
'Default behaviour is to automatically add lineage datasets if they are missing from the database, '
'but this can be disabled if lineage is expected to be present in the DB, '
'in this case add will abort when encountering missing lineage dataset'))
@click.option('--verify-lineage/--no-verify-lineage', is_flag=True, default=True,
help=('Lineage referenced in the metadata document should be the same as in DB, '
help=('WARNING: will be deprecated in datacube v1.9.\n'
'Lineage referenced in the metadata document should be the same as in DB, '
'default behaviour is to skip those top-level datasets that have lineage data '
'different from the version in the DB. This option allows omitting verification step.'))
@click.option('--dry-run', help='Check if everything is ok', is_flag=True, default=False)
@click.option('--ignore-lineage',
help="Pretend that there is no lineage data in the datasets being indexed",
is_flag=True, default=False)
@click.option('--confirm-ignore-lineage',
help="Pretend that there is no lineage data in the datasets being indexed, without confirmation",
help=('WARNING: this flag has been deprecated and will be removed in datacube v1.9.\n'
'Pretend that there is no lineage data in the datasets being indexed, without confirmation'),
is_flag=True, default=False)
@click.option('--archive-less-mature', is_flag=False, flag_value=500, default=None,
help=('Find and archive less mature versions of the dataset, will fail if more mature versions '
Expand All @@ -179,17 +180,7 @@ def index_cmd(index, product_names,
print_help_msg(index_cmd)
sys.exit(1)

if confirm_ignore_lineage is False and ignore_lineage is True:
if sys.stdin.isatty():
confirmed = click.confirm("Requested to skip lineage information, Are you sure?", default=False)
if not confirmed:
click.echo('OK aborting', err=True)
sys.exit(1)
else:
click.echo("Use --confirm-ignore-lineage from non-interactive scripts. Aborting.")
sys.exit(1)

confirm_ignore_lineage = True
confirm_ignore_lineage = ignore_lineage

try:
ds_resolve = Doc2Dataset(index,
Expand Down
52 changes: 21 additions & 31 deletions datacube/ui/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,34 +89,24 @@ def ui_path_doc_stream(paths, logger=None, uri=True, raw=False):

"""

def on_error1(p, e):
if logger is not None:
logger.error(str(e))

def on_error2(p, e):
if logger is not None:
logger.error('Failed reading documents from %s', str(p))

yield from _path_doc_stream(_resolve_doc_files(paths, on_error=on_error1),
on_error=on_error2, uri=uri, raw=raw)


def _resolve_doc_files(paths, on_error):
for p in paths:
try:
yield get_metadata_path(p)
except ValueError as e:
on_error(p, e)


def _path_doc_stream(files, on_error, uri=True, raw=False):
"""See :func:`ui_path_doc_stream` for documentation"""
maybe_wrap = identity if raw else SimpleDocNav

for fname in files:
try:
for p, doc in read_documents(fname, uri=uri):
yield p, maybe_wrap(doc)

except InvalidDocException as e:
on_error(fname, e)
def _resolve_doc_files(paths):
for p in paths:
try:
yield get_metadata_path(p)
except ValueError as e:
if logger is not None:
logger.error(str(e))

def _path_doc_stream(files, uri=True, raw=False):
maybe_wrap = identity if raw else SimpleDocNav

for fname in files:
try:
for p, doc in read_documents(fname, uri=uri):
yield p, maybe_wrap(doc)

except InvalidDocException as e:
if logger is not None:
logger.error('Failed reading documents from %s', str(fname))

yield from _path_doc_stream(_resolve_doc_files(paths), uri=uri, raw=raw)
3 changes: 3 additions & 0 deletions docs/about/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ What's New

v1.8.next
=========
- Improve error message for mismatch between dataset metadata and product signature (:pull:`1472`)
- Mark ``--confirm-ignore-lineage``, ``--auto-add-lineage``, and ``--verify-lineage`` as deprecated or to be deprecated (:pull:`1472`)
- Default delta values in ``archive_less_mature`` and ``find_less_mature`` (:pull:`1472`)

v1.8.15 (11th July 2023)
========================
Expand Down
6 changes: 3 additions & 3 deletions integration_tests/test_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_indexing(clirunner, index, product_def):
"-v",
"dataset",
"add",
"--confirm-ignore-lineage",
"--ignore-lineage",
str(index_yaml),
]
)
Expand Down Expand Up @@ -315,7 +315,7 @@ def test_indexing_with_spectral_map(clirunner, index, dataset_types):
clirunner(["-v", "product", "add", str(dataset_types)])

# Index the Dataset
clirunner(["-v", "dataset", "add", '--confirm-ignore-lineage', str(index_yaml)])
clirunner(["-v", "dataset", "add", '--ignore-lineage', str(index_yaml)])
dc = Datacube(index=index)
check_open_with_dc_simple(dc, product_def, [product_id], measurement)

Expand All @@ -335,7 +335,7 @@ def test_end_to_end_multitime(clirunner, index, product_def, original_data):
measurement=measurement,
)
# Index the Datasets
clirunner(["-v", "dataset", "add", '--confirm-ignore-lineage', str(index_yaml)])
clirunner(["-v", "dataset", "add", '--ignore-lineage', str(index_yaml)])

if idx == 0: # Full check for the first measurement only
# Check data for all product IDs
Expand Down
6 changes: 3 additions & 3 deletions integration_tests/test_cli_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_cli_dataset_subcommand(index, clirunner,

# Insert datasets
for path in eo3_dataset_paths:
result = clirunner(['dataset', 'add', "--confirm-ignore-lineage", path])
result = clirunner(['dataset', 'add', "--ignore-lineage", path])

runner = clirunner(['dataset', 'archive'], verbose_flag=False, expect_success=False)
assert "Completed dataset archival." not in runner.output
Expand Down Expand Up @@ -146,8 +146,8 @@ def test_readd_and_update_metadata_product_dataset_command(index, clirunner,
assert "No such dataset in the database" in update.output
assert "Failure while processing" in update.output

clirunner(['dataset', 'add', '--confirm-ignore-lineage', ds_path])
rerun_add = clirunner(['dataset', 'add', '--confirm-ignore-lineage', ds_path])
clirunner(['dataset', 'add', '--ignore-lineage', ds_path])
rerun_add = clirunner(['dataset', 'add', '--ignore-lineage', ds_path])
assert "WARNING Dataset" in rerun_add.output
assert "is already in the database" in rerun_add.output

Expand Down
11 changes: 2 additions & 9 deletions integration_tests/test_dataset_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def check_skip_lineage_test(clirunner, index):

prefix = write_files({'agdc-metadata.yml': yaml.safe_dump(ds.doc)})

clirunner(['dataset', 'add', '--confirm-ignore-lineage', '--product', 'A', str(prefix)])
clirunner(['dataset', 'add', '--ignore-lineage', '--product', 'A', str(prefix)])

ds_ = index.datasets.get(ds.id, include_sources=True)
assert ds_ is not None
Expand Down Expand Up @@ -58,7 +58,7 @@ def check_no_product_match(clirunner, index):
# Ignore lineage but fail to match main dataset
r = clirunner(['dataset', 'add',
'--product', 'B',
'--confirm-ignore-lineage',
'--ignore-lineage',
str(prefix)])

assert 'ERROR' in r.output
Expand Down Expand Up @@ -184,12 +184,6 @@ def check_missing_metadata_doc(clirunner):
assert "ERROR No supported metadata docs found for dataset" in r.output


def check_no_confirm(clirunner, path):
r = clirunner(['dataset', 'add', '--ignore-lineage', str(path)], expect_success=False)
assert r.exit_code != 0
assert 'Use --confirm-ignore-lineage from non-interactive scripts' in r.output


def check_bad_yaml(clirunner, index):
prefix = write_files({'broken.yml': '"'})
r = clirunner(['dataset', 'add', str(prefix / 'broken.yml')])
Expand Down Expand Up @@ -266,7 +260,6 @@ def test_dataset_add(dataset_add_configs, index_empty, clirunner):
check_inconsistent_lineage(clirunner, index)
check_missing_metadata_doc(clirunner)
check_missing_lineage(clirunner, index)
check_no_confirm(clirunner, p.datasets)
check_bad_yaml(clirunner, index)

# check --product=nosuchproduct
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_end_to_end(clirunner, index, testdata_dir, ingest_configs, datacube_env

# - this will be no-op but with ignore lineage
clirunner(['-v', 'dataset', 'add',
'--confirm-ignore-lineage',
'--ignore-lineage',
str(lbg_nbar), str(lbg_pq)])

# Test no-op update
Expand Down