## Metadata generation for AVIRIS


In [None]:
import json

import numpy as np
import xarray as xr
from IPython.display import JSON, Markdown
from s3fs import S3FileSystem

from odc.geo.xr import ODCExtension
from datacube import Datacube
from datacube.index.abstract import DatasetTuple
from datacube.index.hl import prep_eo3
from datacube.utils.serialise import jsonify_document

from odc.av3 import av3_mk_dataset

prefix = "s3://adias-prod-dc-data-projects/odc-hs/av3/"

s3 = S3FileSystem()
_ = s3.connect()

urls = [f"s3://{f}" for f in s3.ls(prefix) if f.endswith(".zarr")]
print(f"Found {len(urls)} in {prefix}")

### Grab consolidated metadata for each zarr

In [None]:
zzmd = [json.loads(s3.cat(f"{z}/.zmetadata")) for z in urls]

### Open with xarray in lazy mode

In [None]:
xxs = [xr.open_zarr(url, decode_coords="all") for url in urls]

xx = xxs[0]  # Pick first one for review
assert isinstance(xx.odc, ODCExtension)
display(xx.odc.geobox, xx.odc.spatial_dims, xx.odc.transform, xx)

## Build Dataset docs

In [None]:
docs = [av3_mk_dataset(xx, zmd) for xx, zmd in zip(xxs, zzmd)]
display(
    Markdown("### EO3 Dataset (raw)"),
    JSON(docs[0]),
    Markdown("### EO3 Dataset (expanded)"),
    JSON(jsonify_document(prep_eo3(docs[0]))),
)

## Add or update datasets in DB

In [None]:
def update_existing_dss(dss):
    uuids = [ds.metadata["id"] for ds in dss]
    dc.index.datasets.purge(uuids, allow_delete_active=True)
    return dc.index.datasets.bulk_add(dss)


dc = Datacube(env="emit")
product = dc.index.products.get_by_name("av3_l2a")

dss = [DatasetTuple(product, prep_eo3(doc), [doc["location"]]) for doc in docs]

if False:
    dc.index.datasets.bulk_add(dss)
else:
    print("not adding to db by default")

if False:
    update_existing_dss(dss)

------------