In [None]:
import xarray as xr
import cf_xarray
import dask

dask.config.set(**{"array.slicing.split_large_chunks": False})

In [None]:
def open_example_dataset():
    # Open dataset
    grid = xr.open_dataset("data/ocean_grid_sym_OM4_05.nc")
    ds = xr.open_dataset(
        "http://35.188.34.63:8080/thredds/dodsC/OM4p5/ocean_monthly_z.200301-200712.nc4",
        chunks={"time": 1},
    )
    ds = xr.merge([grid, ds], compat="override")

    # Generalize to show curvilinear grid case where axes and coordinates are different
    axes = ["xh", "xq", "yh", "yq"]
    ds = ds.drop_vars(axes)
    ds = ds.assign_coords({axis: ds[axis] for axis in axes})
    ds = ds.set_coords(
        [
            var
            for var in ds.variables
            for prefix in ["geo"]
            if var.startswith(prefix)
        ]
    )

    return ds


def assign_coordinates_and_cell_measures(ds):

    # Add missing attributes (To implement in cf_xarray)
    for varname, variable in ds.data_vars.items():
        coordinates = []
        for coord in sum(ds.cf.coordinates.values(), []):
            if set(ds[coord].dims) <= set(variable.dims):
                coordinates.append(coord)
        if coordinates:
            variable.attrs["coordinates"] = " ".join(coordinates)
        else:
            variable.attrs.pop("coordinates", None)

        cell_measures = {}
        for stdname in ("cell_thickness", "cell_area", "ocean_volume"):
            key = stdname.split("_")[-1]
            value = ds.cf.standard_names[stdname]
            for measure in value:
                if (
                    set(ds[measure].dims) <= set(variable.dims)
                    and measure != varname
                ):
                    cell_measures[key] = measure
        if cell_measures:
            variable.attrs["cell_measures"] = " ".join(
                [f"{k}: {v}" for k, v in cell_measures.items()]
            )
        else:
            variable.attrs.pop("cell_measures", None)

In [None]:
ds = open_example_dataset()
ds

In [None]:
# Show cf-xarray guessing function
ds = ds.cf.guess_coord_axis(verbose=True)
assign_coordinates_and_cell_measures(ds)

In [None]:
# Use cf-xarray to assign coordinates and cell measures
ds.cf