# Index HLTC Datasets
Index the High/Low Titde Composite product, producing dataset documents that match the product definition.

In [13]:
base_path = '/g/data/fk4/datacube/002/HLTC/HLTC_2_0/'
product_dict = {
    'high_tide_comp_20p': 'COMPOSITE_HIGH',
    'high_tide_comp_count': 'COUNT_HIGH',
    'low_tide_comp_20p': 'COMPOSITE_LOW',
    'low_tide_comp_count': 'COUNT_LOW',
}

In [14]:
import glob

In [4]:
import netCDF4 as nc
import xarray as xr

In [5]:
import yaml
try:
    from yaml import CSafeLoader as SafeLoader
except ImportError:
    from yaml import SafeLoader

In [6]:
from datacube.index._api import Index
from datacube.index.postgres import PostgresDb
from datacube.model import Dataset, DatasetType as Product, MetadataType
from datacube.scripts.dataset import read_documents, create_dataset, parse_match_rules_options, find_matching_product
from datacube.utils import changes, validate_document
import json

In [7]:
class SimpleConfig():
    def __init__(self, **config):
        try:
            import os
            import pwd
            default_username = pwd.getpwuid(os.geteuid()).pw_name
        except ImportError:
            # No default on Windows
            default_username = None
            
        self._config = {
            'db_hostname': '',
            'db_database': 'datacube',
            'db_connection_timeout': 60,
            'db_username': default_username,
        }
        self._config.update(config)
        
    def __getattr__(self, name):
        return self._config.get(name, None)
    
    def __dir__(self):
        return self._config.keys()
    
    def __str__(self):
        return "SimpleConfig:{}".format(''.join('\n\t{}: {}'.format(*item) for item in self._config.items()))

    def __repr__(self):
        return self.__str__()

## Set up dev and prod indexes

In [9]:
dev_config = SimpleConfig(db_hostname='130.56.244.110', db_database='andrewcube')
dev_index = Index(PostgresDb.from_config(dev_config, application_name='reindex'))

In [10]:
prod_config = SimpleConfig(db_hostname='130.56.244.105', db_database='datacube', db_port='6432')
prod_index = Index(PostgresDb.from_config(prod_config, application_name='reindex'))

* Add products to index
* Read dataset docs
* Correct Dataset doc
* Create dataset object
* Add to index

## Add products to index

In [11]:
def add_products(index, products_def_doc):
    for product_doc_pth, product_doc in read_documents(products_def_doc):
        print(product_doc['name'])
        try:
            index.products.add_document(product_doc)
        except changes.DocumentMismatchError as e:
            index.products.update_document(product_doc, allow_unsafe_updates=True)

In [None]:
products_def_doc = '/home/547/adh547/Projects/digitalearthau/digitalearthau/config/products/hltc.yaml'
add_products(dev_index, products_def_doc)

In [12]:
def make_rules(index):
    all_product_names = [prod.name for prod in index.products.get_all()]
    rules = parse_match_rules_options(index, None, all_product_names, True)
    return rules

In [17]:
rules = make_rules(dev_index)

## Loop over every product, and every file

In [18]:
from tqdm import tqdm_notebook
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

In [22]:
import pandas as pd
import datetime
def to_isodatetime(dt_string):
    return pd.to_datetime(dt_string).isoformat()

In [24]:
def index_datasets(index, product_dict, base_path, rules):
    for product_name, product_prefix in product_dict.items():
        file_list = glob.glob(base_path + product_prefix + '*.nc')
        product_obj = index.products.get_by_name(product_name)

        print(product_name)
        for file_name in tqdm_notebook(file_list):
            pth, doc = next(read_documents(file_name))
            uri = pth.absolute().as_uri()

            xr_dataset = xr.open_dataset(file_name)
            doc['extent']['from_dt'] = datetime.datetime(2000, 1, 1, 0, 0).isoformat()
            doc['extent']['to_dt'] = datetime.datetime(2016, 10, 31, 0, 0).isoformat()
            doc['extent']['key_dt'] = datetime.datetime(2016, 10, 31, 0, 0).isoformat()       

            doc['image']['bands'] = {key: {'layer': key, 'path': ''} for key in product_obj.measurements.keys()}
            
            doc.update(product_obj.metadata_doc)

            dataset = create_dataset(doc, uri, rules)

            try:
                index.datasets.add(dataset, sources_policy='ensure')
            except changes.DocumentMismatchError as e:
                print(e)
                index.datasets.update(dataset, {tuple(): changes.allow_any})
                

index_datasets(dev_index, product_dict, base_path, rules)

high_tide_comp_20p


A Jupyter Widget



high_tide_comp_count


A Jupyter Widget


low_tide_comp_20p


A Jupyter Widget


low_tide_comp_count


A Jupyter Widget




## Query and plot data

In [None]:
%matplotlib inline

In [None]:
import datacube
dc = datacube.Datacube(index=dev_index)

In [None]:
query = dict(lat=(-20.25, -20.35), lon=(118.525, 118.575), resolution=(25,25))

In [None]:
data_high = dc.load(product='high_tide_comp_20p', **query)
data_high.red.plot()

In [None]:
data_low = dc.load(product='low_tide_comp_20p', **query)
data_low.red.plot()