In [10]:
import logging

import dask.bag
import dask.diagnostics
import holoviews.operation.datashader as hd
import hvplot.xarray
import geoviews as gv
import numpy as np
import xarray as xr

from cartopy import crs as ccrs
from kerchunk.combine import MultiZarrToZarr
from kerchunk.hdf import SingleHdf5ToZarr

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
SECONDS_PER_DAY = 86400


def shift_dataset(ds):
    half_nx = ds.sizes["grid_xt"] // 2
    half_grid_xt = ds.grid_xt.isel(grid_xt=slice(None, half_nx))
    shifted_grid_xt = xr.concat(
        [half_grid_xt, -half_grid_xt.isel(grid_xt=slice(None, None, -1))],
        dim="grid_xt"
    )
    ds = ds.assign_coords(grid_xt=shifted_grid_xt)
    return ds.sortby("grid_xt")

In [5]:
combined_target = "combined-kerchunk.json"

In [6]:
ds = xr.open_dataset(
    "reference://", engine="zarr",
    backend_kwargs={
        "storage_options": {
            "fo": combined_target,
            "remote_protocol": "gs",
            "remote_options": {"anon": True}
        },
        "consolidated": False
    },
    chunks={}
)

INFO:fsspec.reference:Read reference from URL combined-kerchunk.json


In [7]:
shifted = shift_dataset(ds)
shifted["pr"] = SECONDS_PER_DAY * shifted.pr

In [8]:
shifted

Unnamed: 0,Array,Chunk
Bytes,319.92 MiB,90.00 kiB
Shape,"(3640, 11520, 2)","(1, 11520, 2)"
Dask graph,3640 chunks in 3 graph layers,3640 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 319.92 MiB 90.00 kiB Shape (3640, 11520, 2) (1, 11520, 2) Dask graph 3640 chunks in 3 graph layers Data type float32 numpy.ndarray",2  11520  3640,

Unnamed: 0,Array,Chunk
Bytes,319.92 MiB,90.00 kiB
Shape,"(3640, 11520, 2)","(1, 11520, 2)"
Dask graph,3640 chunks in 3 graph layers,3640 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,159.96 MiB,45.00 kiB
Shape,"(3640, 5760, 2)","(1, 5760, 2)"
Dask graph,3640 chunks in 2 graph layers,3640 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 159.96 MiB 45.00 kiB Shape (3640, 5760, 2) (1, 5760, 2) Dask graph 3640 chunks in 2 graph layers Data type float32 numpy.ndarray",2  5760  3640,

Unnamed: 0,Array,Chunk
Bytes,159.96 MiB,45.00 kiB
Shape,"(3640, 5760, 2)","(1, 5760, 2)"
Dask graph,3640 chunks in 2 graph layers,3640 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.76 TiB,7.91 MiB
Shape,"(3640, 5760, 11520)","(1, 720, 1440)"
Dask graph,232960 chunks in 4 graph layers,232960 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.76 TiB 7.91 MiB Shape (3640, 5760, 11520) (1, 720, 1440) Dask graph 232960 chunks in 4 graph layers Data type float64 numpy.ndarray",11520  5760  3640,

Unnamed: 0,Array,Chunk
Bytes,1.76 TiB,7.91 MiB
Shape,"(3640, 5760, 11520)","(1, 720, 1440)"
Dask graph,232960 chunks in 4 graph layers,232960 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [11]:
gv_ds = gv.Dataset(shifted, ["grid_xt", "grid_yt", "time"], "pr", crs=ccrs.PlateCarree())
images = gv_ds.to(gv.Image)
regridded = hd.regrid(images)
regridded.opts(
    height=500,
    width=1000,
    colorbar=True,
    projection=ccrs.Robinson(),
    logz=True,
    cmap="Blues",
    clim=(0.1, 20000)
) * gv.feature.coastline