In [16]:
import sys
import argparse
import json
import fsspec
import xarray as xr
from pathlib import Path

import xstac
import pystac
import shapely.geometry

In [17]:
BBOX = {
    "north": [31.1, -180, 89.84, 180],
    "south": [-89.84, -180, -39.36, 180],
}
    

In [18]:
DESC = {
    "daily":"",
    "monthly":"",
    "aggregate":"",
}

In [1]:
# talk to Ann about these
CITATION_URLS = {
    "daily": "https://doi.org/10.7265/efmz-2t65",
    "monthly": "https://doi.org/10.7265/efmz-2t65",
    "annual": "https://doi.org/10.7265/efmz-2t65",
}
# DOI_NAMES = {
#     "daily": "10.3334/ORNLDAAC/1840",
#     "monthly": "10.3334/ORNLDAAC/1855",
#     "annual": "10.3334/ORNLDAAC/1852",
# }
CITATIONS = {("Meier, W. N., F. Fetterer, A. K. Windnagel, and S. Stewart. 2021. NOAA/NSIDC Climate Data Record of Passive Microwave Sea Ice Concentration, Version 4. [Indicate subset used]. Boulder, Colorado, USA.
                NSIDC: National Snow and Ice Data Center https://doi.org/10.7265/efmz-2t65."),  # noqa: E501}

In [2]:
FREQUENCIES = ["daily", "monthly", "annual"]

In [3]:
NETCDF_MEDIA_TYPE = pystac.MediaType.HDF

NameError: name 'pystac' is not defined

In [None]:
def parse_args(args=None):
    parser = argparse.ArgumentParser(usage=__doc__)
    parser.add_argument(
        "--region", type=str, choices=["both", "north", "south"], default="both"
    )
    parser.add_argument(
        "--frequency",
        type=str,
        choices=["all", "daily", "monthly", "aggregate"],
        default="all",
    )
    return parser.parse_args(args)

In [4]:

def generate(frequency, region):

    collection_template = {
        "id": f"seaiceClimateDataRecordV4-{frequency}-{region}",
        "stac_extensions": [
            "https://stac-extensions.github.io/scientific/v1.0.0/schema.json"
        ],
        "description": "{{ collection.description }}",
        "type": "Collection",
        "title": f"NOAA/NSIDC passive microwave sea ice concentration climate data record {frequency.title()} {FULL_REGIONS[region]}",
        "license": "proprietary",
        "keywords": [
            "EARTH SCIENCE",
            "CRYOSPHERE",
            "SEA ICE",
            "SEA ICE CONCENTRATION",
            "Polar",
            # maybe add more from netcdf
        ],
        "stac_version": "1.0.0",
        "links": [
            {
                "rel": "license",
                "title": "EOSDIS Data Use Policy",
                "href": "https://science.nasa.gov/earth-science/earth-science-data/data-information-policy",
            },
            {"rel": "cite-as", "href": CITATION_URLS[frequency]},
        ],
        "extent": {
            "spatial": {"bbox": [BBOX[region]]},
            "temporal": {"interval": [[None, None]]},
        },
        "providers": [
            {
                # Ask Ann
                # NCEI funders
                "name": "NSIDC",
                "roles": ["host", "processor"],
                "url": "https://nsidc.org/data",
            },
            {
                "name": "ORNL DAAC",
                "roles": ["producer"],
                "url": CITATION_URLS[frequency],
            },
        ],
        "assets": {
            "netcdf-https": {
                "href": f"https://noaadata.apps.nsidc.org/NOAA/G02202_V4/{region}/{frequency}.nc",
                "type": "application/vnd+zarr",
                "title": f"{frequency.title()} {FULL_REGIONS[region]} Daymet HTTPS Zarr root",
                "description": f"HTTPS URI of the {frequency} {FULL_REGIONS[region]} Daymet Zarr Group on Azure Blob Storage.",  # noqa: E501
                "roles": ["data", "zarr", "https"],
                "xarray:open_kwargs": {"consolidated": True},
            },
            "zarr-abfs": {
                "href": f"abfs://daymet-zarr/{frequency}/{region}.zarr",
                "type": "application/vnd+zarr",
                "title": f"{frequency.title()} {FULL_REGIONS[region]} Daymet Azure Blob File System Zarr root",
                "description": f"Azure Blob File System of the {frequency} {FULL_REGIONS[region]} Daymet Zarr Group on Azure Blob Storage for use with adlfs.",  # noqa: E501
                "roles": ["data", "zarr", "abfs"],
                "xarray:storage_options": {"account_name": "daymeteuwest"},
                "xarray:open_kwargs": {"consolidated": True},
            },
            "thumbnail": {
                # "href": f"https://noaadata.apps.nsidc.org/NOAA/G02202_V4/{region}/{frequency}/{year)/seaice_conc_{frequency}_{region_fileindicator}_{date}.png",  # noqa: E501
                "type": "image/png",
                "title": f"Daymet {frequency} {FULL_REGIONS[region]} map thumbnail",
                "roles": ["thumbnail"],
            },
        },
        "msft:short_description": f"{frequency.title()} {short_desc_snippet} on a 1-km grid for {FULL_REGIONS[region]}",
        "msft:storage_account": "daymeteuwest",
        "msft:container": "daymet-zarr",
        "msft:group_id": "daymet",
        "msft:group_keys": [frequency, FULL_REGIONS[region].lower()],
        "sci:doi": DOI_NAMES[frequency],
        "sci:citation": CITATIONS[frequency],
    }

    store = fsspec.get_mapper(
        f"az://daymet-zarr/{frequency}/{region}.zarr", account_name="daymeteuwest"
    )
    ds = xr.open_zarr(store, consolidated=True)
    if "yearday" in ds:
        ds.yearday.attrs["long_name"] = ds.yearday.attrs["long_name"].replace(
            "Januaray", "January"
        )

    collection = xstac.xarray_to_stac(
        ds,
        collection_template,
        temporal_dimension="time",
        x_dimension="x",
        y_dimension="y",
    )

    collection.remove_links(pystac.RelType.SELF)
    collection.remove_links(pystac.RelType.ROOT)

    collection_result = collection.to_dict(include_self_link=False)

    # additional dimensions not implemented in xstac
    collection_result["cube:dimensions"]["nv"] = {
        "type": "count",
        "description": "Size of the 'time_bnds' variable.",
        "values": [0, 1],
    }

    item_template = {
        "id": f"daymet-{frequency}-{region}",
        "type": "Feature",
        "links": [],
        "bbox": BBOX[region],
        "geometry": shapely.geometry.mapping(shapely.geometry.box(*BBOX[region])),
        "stac_version": "1.0.0",
        "properties": {"start_datetime": None, "end_datetime": None},
        "assets": {
            "zarr-https": {
                "href": f"https://daymeteuwest.blob.core.windows.net/daymet-zarr/{frequency}/{region}.zarr",
                "type": "application/vnd+zarr",
                "title": f"{frequency.title()} {FULL_REGIONS[region]} Daymet HTTPS Zarr root",
                "description": f"HTTPS URI of the {frequency} {FULL_REGIONS[region]} Daymet Zarr Group on Azure Blob Storage.",  # noqa: E501
                "roles": ["data", "zarr", "https"],
                "xarray:open_kwargs": {"consolidated": True},
            },
            "zarr-abfs": {
                "href": f"abfs://daymet-zarr/{frequency}/{region}.zarr",
                "type": "application/vnd+zarr",
                "title": f"{frequency.title()} {FULL_REGIONS[region]} Daymet Azure Blob File System Zarr root",
                "description": f"Azure Blob File System of the {frequency} {FULL_REGIONS[region]} Daymet Zarr Group on Azure Blob Storage for use with adlfs.",  # noqa: E501
                "roles": ["data", "zarr", "abfs"],
                "xarray:storage_options": {"account_name": "daymeteuwest"},
                "xarray:open_kwargs": {"consolidated": True},
            },
            "thumbnail": {
                "href": f"https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/daymet-{frequency}-{region}.png",  # noqa: E501
                "type": "image/png",
                "title": f"Daymet {frequency} {FULL_REGIONS[region]} map thumbnail",
            },
        },
    }

    item = xstac.xarray_to_stac(
        ds, item_template, temporal_dimension="time", x_dimension="x", y_dimension="y"
    )

    item_result = item.to_dict(include_self_link=False)

    # additional dimensions not implemented in xstac
    item_result["properties"]["cube:dimensions"]["nv"] = {
        "type": "count",
        "description": "Size of the 'time_bnds' variable.",
        "values": [0, 1],
    }

    for link in item_result["links"]:
        if link["rel"] == "root":
            link["href"] = "../catalog.json"
            link["rel"] = str(link["rel"].value)
            link["type"] = str(link["type"].value)

    return collection_result, item_result

SyntaxError: f-string: unmatched ')' (2122860394.py, line 66)