In [None]:
import itertools
from pathlib import Path

import odc.stac
from odc.stac import parse_item
from pystac.item import Item as StacItem
from tqdm.auto import tqdm
from utils.tar import tar_doc_stream
from utils.txt import from_njson, to_njson

from odc.emit import cmr_to_stac, emit_load, open_zict, open_zict_json, prep_s3_fs
from odc.emit._md import emit_id


def emit_md_stream(fname):
    store = open_zict(fname, "r")
    as_json = open_zict_json(store)

    for kid in (k[:-4] for k in store if k.endswith(".cmr")):
        cmr_doc = as_json[kid + ".cmr"]
        dmr_doc = store[kid + ".dmrpp"]
        yield kid, (cmr_doc, dmr_doc)


srcs = {
    "cmr": Path("Data/emit-jsons.tar.gz"),
    "dmrpp": Path("Data/emit-dmrpp.tar.gz"),
}

stac_njson = Path("Data/emit-stac.njson.gz")

In [None]:
emit_src_md_zip = Path("Data/emit-src-md.zip")
if not emit_src_md_zip.exists():
    store = open_zict(emit_src_md_zip, "w")
    cmrs = ((emit_id(p, ".cmr"), doc) for p, doc in tar_doc_stream(srcs["cmr"]))
    dmrpp = ((emit_id(p, ".dmrpp"), doc) for p, doc in tar_doc_stream(srcs["dmrpp"]))

    store.update(tqdm(itertools.chain(cmrs, dmrpp)))
    store.close()
else:
    print(f"Skipping generation of {emit_src_md_zip}, exists")

In [None]:
if stac_njson.exists():
    print(f"Skipping generation of `{stac_njson}`, exists")
else:
    mdd = (cmr_to_stac(cmr, dmr) for _, (cmr, dmr) in emit_md_stream(emit_src_md_zip))
    to_njson(tqdm(mdd), stac_njson)

## Review STAC doc

In [None]:
stacs_path = Path("/tmp/emit.zip")

if not stacs_path.exists():
    print(f"... {stac_njson} => {stacs_path}")
    stacs = open_zict_json(stacs_path, "w")
    stacs.update((doc["id"], doc) for doc in tqdm(from_njson(stac_njson)))
    stacs.close()

stacs = open_zict_json(stacs_path, "r")

In [None]:
stac_doc = stacs["EMIT_L2A_RFL_001_20230316T045133_2307503_005"]

In [None]:
sit = StacItem.from_dict(stac_doc)
pit = parse_item(sit)
display(sit, pit)

In [None]:
(ds,) = odc.stac.stac2ds([sit])
ds.metadata_doc

## Load back

In [None]:
fs = prep_s3_fs()

xx0 = emit_load(stac_doc, fs, chunks={"y": 32})
glt = xx0[["glt_x", "glt_y"]].drop("spatial_ref")
xx = xx0.drop(["glt_x", "glt_y", "ortho_x", "ortho_y", "ortho_spatial_ref"])

display(xx.odc.geobox, glt.odc.geobox, xx, glt)

-----------------------------------

In [None]:
ww = xx.wavelengths.compute()
ww

In [None]:
#",".join([f"{w:.15e}" for w in ww.data.tolist()])

!du -h Data/emit-src-md.zip
!du -h Data/emit-stac.njson.gz
!du -h /tmp/emit.zip
#!du -h /tmp/*gz
#!unzip -lv Data/emit-src-md.zip | head -20

------------------------------------------------