Skip to content

Commit

Permalink
Add Doc2Dataset helper class
Browse files Browse the repository at this point in the history
High level helper class for converting metadata documents into `Dataset`
objects.
  • Loading branch information
Kirill888 committed Jul 20, 2018
1 parent 3c1519e commit 417a58a
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 18 deletions.
68 changes: 68 additions & 0 deletions datacube/index/hl.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,71 @@ def resolve_ds(ds, sources, cache=None):
return None, e

return resolve_no_lineage if skip_lineage else resolve


class Doc2Dataset(object):
"""Helper class for constructing `Dataset` objects from metadata document.
User needs to supply index, which products to consider for matching and what
to do with lineage.
"""
def __init__(self,
index,
products=None,
exclude_products=None,
fail_on_missing_lineage=False,
verify_lineage=True,
skip_lineage=False):
"""
:param index: Database
:param products: List of product names against which to match datasets
(including lineage datasets), if not supplied will consider all
products.
:param exclude_products: List of products to exclude from matching
:param fail_on_missing_lineage: If True fail resolve if any lineage
datasets are missing from the DB
:param verify_lineage: If True check that lineage datasets in the
supplied document are identical to dB versions
:param skip_lineage: If True ignore lineage sub-tree in the supplied
document and construct dataset without lineage datasets
"""
rules, err_msg = load_rules_from_types(index,
product_names=products,
excluding=exclude_products)
if rules is None:
raise ValueError(err_msg)

self._ds_resolve = dataset_resolver(index,
rules,
fail_on_missing_lineage=fail_on_missing_lineage,
verify_lineage=verify_lineage,
skip_lineage=skip_lineage)

def __call__(self, doc, uri):
"""Attempt to construct dataset from metadata document and a uri.
:param doc: Dictionary or SimpleDocNav object
:param uri: String "location" property of the Dataset
:return: (dataset, None) is successful,
:return: (None, ErrorMessage) on failure
"""
if not isinstance(doc, SimpleDocNav):
doc = SimpleDocNav(doc)

dataset, err = self._ds_resolve(doc, uri)
if dataset is None:
return None, err

is_consistent, reason = check_dataset_consistent(dataset)
if not is_consistent:
return None, reason

return dataset, None
28 changes: 10 additions & 18 deletions datacube/scripts/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from datacube.index.index import Index
from datacube.index.exceptions import MissingRecordError
from datacube.model import Dataset
from datacube.index.hl import load_rules_from_types, dataset_resolver, check_dataset_consistent
from datacube.index.hl import Doc2Dataset, check_dataset_consistent
from datacube.ui import click as ui
from datacube.ui.click import cli
from datacube.ui.common import get_metadata_path
Expand Down Expand Up @@ -82,11 +82,6 @@ def dataset_stream(doc_stream, ds_resolve):
_LOG.error('%s', str(err))
continue

is_consistent, reason = check_dataset_consistent(dataset)
if not is_consistent:
_LOG.error("Dataset %s inconsistency: %s", dataset.id, reason)
continue

yield dataset


Expand Down Expand Up @@ -187,20 +182,17 @@ def index_cmd(index, product_names,
if auto_match is True:
_LOG.warning("--auto-match option is deprecated, update your scripts, behaviour is the same without it")

rules, error_msg = load_rules_from_types(index,
product_names,
excluding=exclude_product_names)
if rules is None:
_LOG.error(error_msg)
try:
ds_resolve = Doc2Dataset(index,
product_names,
exclude_products=exclude_product_names,
skip_lineage=confirm_ignore_lineage,
fail_on_missing_lineage=not auto_add_lineage,
verify_lineage=verify_lineage)
except ValueError as e:
_LOG.error(e)
sys.exit(2)

assert len(rules) > 0

ds_resolve = dataset_resolver(index, rules,
skip_lineage=confirm_ignore_lineage,
fail_on_missing_lineage=not auto_add_lineage,
verify_lineage=verify_lineage)

def run_it(dataset_paths):
doc_stream = ui_doc_path_stream(dataset_paths)
dss = dataset_stream(doc_stream, ds_resolve)
Expand Down

0 comments on commit 417a58a

Please sign in to comment.