# Rechunk the kerchunked dataset

In [1]:
import fsspec
import fsspec.implementations.reference
import zarr
import xarray as xr
from pathlib import Path

from rechunker import rechunk

In [2]:
import rechunker
rechunker.__version__

'0.5.0'

In [3]:
import zarr
zarr.__version__

'2.13.3'

#### Start a Dask Gateway cluster
Use a custom helper function `ebd.start_dask_cluster` to set options on this cluster.  We don't have to use this helper, it just cuts down on lines of code in notebooks.

In [4]:
import sys
import os
sys.path.append('/shared/users/rsignell/lib')
import ebdpy as ebd
os.environ['AWS_PROFILE'] = 'esip-qhub'  # use env vars for AWS credentials to write

client, cluster, gateway = ebd.start_dask_cluster(
    profile=os.environ['AWS_PROFILE'],
    worker_max=30,
    region='us-west-2', 
    worker_profile='Medium Worker',
    use_existing_cluster=True,
    adaptive_scaling=False, 
    wait_for_cluster=False, 
    propagate_env=True)

Region: us-west-2
Existing Dask clusters:
Cluster Index c_idx: 0 / Name: dev.86df4e246414441789173f5a323ec67b ClusterStatus.RUNNING
Using existing cluster [0].
Setting Fixed Scaling workers=30
Reconnect client to clear cache
client.dashboard_link (for new browser tab/window or dashboard searchbar in Jupyterhub):
https://nebari.esipfed.org/gateway/clusters/dev.86df4e246414441789173f5a323ec67b/status
Propagating environment variables to workers
Using environment: users/users-pangeo50


coiled.analytics.computation.interval is set to '10m'. Ignoring this old default value, using '15s' instead. To override, use any value other than '10m'.


#### Open Kerchunked Dataset JSON for one year

#### Open Entire Kerchunked Dataset, lazy loaded from parquet

In [5]:
s3_lazy_refs = 's3://esip-qhub-public/nwm/LDAS-1k/lazyrefs'

In [6]:
fs = fsspec.implementations.reference.DFReferenceFileSystem(s3_lazy_refs, lazy=True, target_options={"anon": True},
                                                            remote_protocol="s3", remote_options={"anon": True})
m = fs.get_mapper("")
ds = xr.open_dataset(m, engine="zarr", chunks={'time':1, 'y':3840, 'x':4608}, 
                     backend_kwargs=dict(consolidated=False))

In [7]:
ds

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.03 TiB,135.00 MiB
Shape,"(116631, 3840, 2, 4608)","(1, 3840, 1, 4608)"
Dask graph,233262 chunks in 2 graph layers,233262 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.03 TiB 135.00 MiB Shape (116631, 3840, 2, 4608) (1, 3840, 1, 4608) Dask graph 233262 chunks in 2 graph layers Data type float64 numpy.ndarray",116631  1  4608  2  3840,

Unnamed: 0,Array,Chunk
Bytes,30.03 TiB,135.00 MiB
Shape,"(116631, 3840, 2, 4608)","(1, 3840, 1, 4608)"
Dask graph,233262 chunks in 2 graph layers,233262 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.03 TiB,135.00 MiB
Shape,"(116631, 3840, 2, 4608)","(1, 3840, 1, 4608)"
Dask graph,233262 chunks in 2 graph layers,233262 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.03 TiB 135.00 MiB Shape (116631, 3840, 2, 4608) (1, 3840, 1, 4608) Dask graph 233262 chunks in 2 graph layers Data type float64 numpy.ndarray",116631  1  4608  2  3840,

Unnamed: 0,Array,Chunk
Bytes,30.03 TiB,135.00 MiB
Shape,"(116631, 3840, 2, 4608)","(1, 3840, 1, 4608)"
Dask graph,233262 chunks in 2 graph layers,233262 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,60.06 TiB,135.00 MiB
Shape,"(116631, 3840, 4, 4608)","(1, 3840, 1, 4608)"
Dask graph,466524 chunks in 2 graph layers,466524 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 60.06 TiB 135.00 MiB Shape (116631, 3840, 4, 4608) (1, 3840, 1, 4608) Dask graph 466524 chunks in 2 graph layers Data type float64 numpy.ndarray",116631  1  4608  4  3840,

Unnamed: 0,Array,Chunk
Bytes,60.06 TiB,135.00 MiB
Shape,"(116631, 3840, 4, 4608)","(1, 3840, 1, 4608)"
Dask graph,466524 chunks in 2 graph layers,466524 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,60.06 TiB,135.00 MiB
Shape,"(116631, 3840, 4, 4608)","(1, 3840, 1, 4608)"
Dask graph,466524 chunks in 2 graph layers,466524 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 60.06 TiB 135.00 MiB Shape (116631, 3840, 4, 4608) (1, 3840, 1, 4608) Dask graph 466524 chunks in 2 graph layers Data type float64 numpy.ndarray",116631  1  4608  4  3840,

Unnamed: 0,Array,Chunk
Bytes,60.06 TiB,135.00 MiB
Shape,"(116631, 3840, 4, 4608)","(1, 3840, 1, 4608)"
Dask graph,466524 chunks in 2 graph layers,466524 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


#### Select high-priority vars only

In [8]:
ds = ds[['ACCET', 'SNEQV', 'FSNO', 'crs']]

In [9]:
ds

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.02 TiB 135.00 MiB Shape (116631, 3840, 4608) (1, 3840, 4608) Dask graph 116631 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  116631,

Unnamed: 0,Array,Chunk
Bytes,15.02 TiB,135.00 MiB
Shape,"(116631, 3840, 4608)","(1, 3840, 4608)"
Dask graph,116631 chunks in 2 graph layers,116631 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [10]:
ds['ACCET'].isel(time=slice(0,144))

Unnamed: 0,Array,Chunk
Bytes,18.98 GiB,135.00 MiB
Shape,"(144, 3840, 4608)","(1, 3840, 4608)"
Dask graph,144 chunks in 3 graph layers,144 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 18.98 GiB 135.00 MiB Shape (144, 3840, 4608) (1, 3840, 4608) Dask graph 144 chunks in 3 graph layers Data type float64 numpy.ndarray",4608  3840  144,

Unnamed: 0,Array,Chunk
Bytes,18.98 GiB,135.00 MiB
Shape,"(144, 3840, 4608)","(1, 3840, 4608)"
Dask graph,144 chunks in 3 graph layers,144 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [11]:
ds.attrs

{'Conventions': 'CF-1.6',
 'GDAL_DataType': 'Generic',
 'TITLE': 'OUTPUT FROM WRF-Hydro v5.2.0-beta2',
 'code_version': 'v5.2.0-beta2',
 'model_configuration': 'retrospective',
 'model_initialization_time': '1979-02-01_00:00:00',
 'model_output_type': 'land',
 'model_output_valid_time': '1979-02-01_03:00:00',
 'model_total_valid_times': 472,
 'proj4': '+proj=lcc +units=m +a=6370000.0 +b=6370000.0 +lat_1=30.0 +lat_2=60.0 +lat_0=40.0 +lon_0=-97.0 +x_0=0 +y_0=0 +k_0=1.0 +nadgrids=@null +wktext  +no_defs'}

#### set up zarr stores for temporary and final zarr stores on S3

In [12]:
fs_write = fsspec.filesystem('s3', anon=False, skip_instance_cache=True)

In [13]:
temp_name = 'esip-qhub/testing/usgs/nwm1km.tmp'
target_name = 'esip-qhub/testing/usgs/nwm1km.zarr'

In [None]:
fs_write.rm(temp_name, recursive=True)

In [None]:
fs_write.rm(target_name, recursive=True)

In [18]:
temp_store = fs_write.get_mapper(temp_name)
target_store = fs_write.get_mapper(target_name)

In [None]:
temp_store = zarr.storage.FFStore(temp_name)
target_store = zarr.storage.FSStore(target_name)

In [19]:
ds = ds.drop('crs')

In [20]:
a = len(ds.time)/(144/2)

In [21]:
b = (len(ds.x) * len(ds.y))/((96*2)*(132*2))

In [22]:
a/b

4.640266927083334

In [23]:
#client.close()

In [24]:
#from dask.distributed import Client

In [25]:
#client = Client(threads_per_worker=1)

In [26]:
#client.amm.start()

In [None]:
import zarr.storage
from numcodecs import Zstd
zarr.storage.default_compressor = Zstd(level=9)

#### Rechunk!

In [27]:
rechunked = rechunk(ds.isel(time=slice(0,144)), target_chunks={'y':96*2, 'x':132*2, 'time':144/2},
                    target_store=target_store, temp_store=temp_store, max_mem='2.8GiB')

In [28]:
%%time
rechunked.execute(retries=10)

CPU times: user 5.61 s, sys: 380 ms, total: 5.99 s
Wall time: 2min 23s


<zarr.hierarchy.Group '/'>

  self.scheduler_comm.close_rpc()


In [None]:
zarr.convenience.consolidate_metadata(target_store)

#### Explore the rechunked dataset

In [None]:
ds2 = xr.open_dataset(target_store, engine='zarr', chunks={})

In [None]:
ds2

In [None]:
ds2.ACCET

In [None]:
import hvplot.xarray


In [None]:
ds2.ACCET[:,2000,2000].hvplot(x='time')