In [10]:
%load_ext jupyter_black

In [11]:
import re
import os
from pathlib import Path
import xarray as xr
FILE_NAME_PATTERN = re.compile(r"/([A-Za-z]+(?:-|_)?[A-Za-z]+)+")
data = Path(os.path.abspath(__name__)).parents[1] / "data"
data

PosixPath('/workspaces/mmmpy/data')

In [14]:
files = (data / "MRMS_MergedReflectivity").glob("*grib2")

In [15]:
def dims(ds: xr.Dataset) -> xr.Dataset:
    duplicates = ["heightAboveSea"]
    # if more than one file was passed the valid_time should be greater than 1
    if ds.valid_time.size > 1:
        # for which we add a new validTime dimension
        ds = ds.expand_dims({"validTime": ds["valid_time"].to_numpy()})
        duplicates.append("validTime")

    return ds.drop("valid_time").drop_duplicates(duplicates)


def name(ds: xr.Dataset) -> xr.Dataset:
    if len(ds.data_vars) != 1:
        # mrms grib2 data should only have one variable
        raise Exception
    (ds_name,) = ds
    # not storing history, will use the history object to infer a name
    hist = ds.attrs.pop("history", None)
    # if a name was not explicility provided
    # if not name:
    # use the known name if unknow infer one from the file name
    if ds_name != "unknown":
        name = ds_name
    else:
        name_list = FILE_NAME_PATTERN.findall(hist)
        if name_list:
            return name_list[-1]
        else:
            raise Exception

    return ds.rename({ds_name: name})


ds = (
    xr.open_mfdataset(
        files,
        chunks={},
        engine="cfgrib",
        data_vars="minimal",
        combine="nested",
        concat_dim=["heightAboveSea"],
        backend_kwargs=dict(
            mask_and_scale=True,
            decode_times=True,
            concat_characters=True,
            decode_coords=True,
            # use_cftime="%Y-%m",
            decode_timedelta=None,
            lock=None,
            indexpath="{path}.{short_hash}.idx",
            filter_by_keys={},
            read_keys=[],
            encode_cf=("parameter", "time", "geography", "vertical"),
            squeeze=True,
            time_dims={"valid_time"},
        ),
    )
    .pipe(dims)
    .pipe(name)
)
ds

KeyboardInterrupt: 

In [36]:
from pathlib import Path
tuple(Path("/workspaces/MMM-Py/data/").glob("[!mrms]"))
# Path(Path("/tmp/mmmpy/"))

()