Generate STAC Collections for NOAA Climate Data Record

In [1]:
import sys
from datetime import datetime
import argparse
import json
import fsspec
import xarray as xr
from pathlib import Path

import xstac
import pystac
import shapely.geometry

In [2]:
BBOX = {
    "north": [31.1, -180, 89.84, 180],
    "south": [-89.84, -180, -39.36, 180],
}
    

In [3]:
DESC = {
    "daily":"",
    "monthly":"",
    "aggregate":"",
}

In [4]:
CITATION_URLS = {
    "daily": "https://doi.org/10.7265/efmz-2t65",
    "monthly": "https://doi.org/10.7265/efmz-2t65",
    "annual": "https://doi.org/10.7265/efmz-2t65",
}

CITATION = {("Meier, W. N., F. Fetterer, A. K. Windnagel, and S. Stewart. 2021. NOAA/NSIDC Climate Data Record of Passive Microwave Sea Ice Concentration, Version 4."
             "[Indicate subset used]. Boulder, Colorado, USA. NSIDC: National Snow and Ice Data Center https://doi.org/10.7265/efmz-2t65.")}

In [5]:
FREQUENCIES = ["daily", "monthly"]
# TODO: Add aggregate later

In [14]:
REGIONS = ["north", "south"]

In [6]:
# Unsure if this is correct
NETCDF_MEDIA_TYPE = pystac.MediaType.HDF

In [7]:
# Generate list of years
start_year = (datetime(1978, 1, 1).year)
current_year = (datetime.now()).year
years = list(range(start_year, 1 + current_year))
print(years)

[1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]


In [8]:
def parse_args(args=None):
    parser = argparse.ArgumentParser(usage=__doc__)
    parser.add_argument(
        "--region", type=str, choices=["both", "north", "south"], default="both"
    )
    parser.add_argument(
        "--frequency",
        type=str,
        choices=["all", "daily", "monthly", "aggregate"],
        default="all",
    )
    return parser.parse_args(args)

In [20]:

def generate(frequency, region, year):

    collection_template = {
        "id": f"seaiceClimateDataRecordV4-{frequency}-{region}",
        "stac_extensions": [
            "https://stac-extensions.github.io/scientific/v1.0.0/schema.json"
        ],
        "description": "{{ collection.description }}",
        "type": "Collection",
        "title": f"NOAA/NSIDC passive microwave sea ice concentration climate data record {frequency.title()} {FULL_REGIONS[region]}",
        "license": "proprietary",
        "keywords": [
            "EARTH SCIENCE",
            "CRYOSPHERE",
            "SEA ICE",
            "SEA ICE CONCENTRATION",
            "Polar",
            # maybe add more from netcdf
        ],
        "stac_version": "1.0.0",
        "links": [
            {
                "rel": "license",
                "title": "EOSDIS Data Use Policy",
                "href": "https://science.nasa.gov/earth-science/earth-science-data/data-information-policy",
            },
            {"rel": "cite-as", "href": CITATION},
        ],
        "extent": {
            "spatial": {"bbox": [BBOX[region]]},
            },
        "providers": [
            {
                "name": "NOAA at the National Snow and Ice Data Center",
                "roles": ["licensor", "host", "processor"],
                "url": "https://nsidc.org/data",
            },
        ],
        "assets": {
            "netcdf-https": {
                "href": f"https://noaadata.apps.nsidc.org/NOAA/G02202_V4/{region}/{frequency}.nc",
                "type": "application/x-hdf",
                "roles": ["data", "netcdf4", "https"]},
               #  "xarray:open_kwargs": {"consolidated": True},
        },
        "sci:doi": DOI_NAMES[frequency],
        "sci:citation": CITATIONS[frequency],
    }


    if region == "north":
        region_sn = "nh"
    else:
        region_sn = "sh"

    if frequency == "daily":
        filepath = f"https://noaadata.apps.nsidc.org/NOAA/G02202_V4/{region}/{frequency}/{year}/seaice_conc_daily_{region_sn}_{year}{doy}_n07_v04r00.nc"
    else:
        filepath = f"https://noaadata.apps.nsidc.org/NOAA/G02202_V4/{region}/{frequency}/seaice_conc_monthly_{region_sn}_{year}{month}_n07_v04r00.nc" 

    store = fsspec.get_mapper(
        f"https://noaadata.apps.nsidc.org/NOAA/G02202_V4/{region}/{frequency}/.nc)
    ds = xr.open_dataset("https://noaadata.apps.nsidc.org/NOAA/G02202_V4/south/monthly/seaice_conc_monthly_sh_197902_n07_v04r00.nc")


    collection = xstac.xarray_to_stac(
        ds,
        collection_template,
        temporal_dimension="time",
        x_dimension="x",
        y_dimension="y",
    )



In [29]:
store = fsspec.get_mapper(
        f"https://noaadata.apps.nsidc.org/NOAA/G02202_V4/north/monthly/.nc"
)
print(store)

<fsspec.mapping.FSMap object at 0x16588ba50>


In [25]:
url = ("https://noaadata.apps.nsidc.org/NOAA/G02202_V4/south/monthly/seaice_conc_monthly_sh_197902_n07_v04r00.nc")
file = "~/Downloads/seaice_conc_monthly_sh_197811_n07_v04r00.nc"
ds = xr.open_dataset(file)
print(ds)

<xarray.Dataset> Size: 2MB
Dimensions:                           (tdim: 1, y: 332, x: 316)
Coordinates:
    time                              (tdim) datetime64[ns] 8B ...
    xgrid                             (x) float32 1kB ...
    ygrid                             (y) float32 1kB ...
Dimensions without coordinates: tdim, y, x
Data variables:
    cdr_seaice_conc_monthly           (tdim, y, x) float32 420kB ...
    nsidc_bt_seaice_conc_monthly      (tdim, y, x) float32 420kB ...
    nsidc_nt_seaice_conc_monthly      (tdim, y, x) float32 420kB ...
    projection                        |S1 1B ...
    qa_of_cdr_seaice_conc_monthly     (tdim, y, x) float32 420kB ...
    stdev_of_cdr_seaice_conc_monthly  (tdim, y, x) float32 420kB ...
Attributes: (12/42)
    Conventions:               CF-1.6, ACDD-1.3
    title:                     NOAA/NSIDC Climate Data Record of Passive Micr...
    comment:                   none
    program:                   NOAA Climate Data Record Program
    cdr_var