In [46]:
import xarray as xr
import numpy as np
from numpy import pi, sin, cos, arccos, clip, deg2rad
import numpy.ma as ma
from datetime import datetime
import dask
import time

@dask.delayed
def load(dsmapper, plevel):
    
    """
    Load data from zarr-store
    """

    ds = xr.open_zarr(dsmapper, consolidated=True, chunks=None).sel(level=plevel)
    data = ds.DH.load().values
    ii = ~xr.apply_ufunc(np.isnan, data)
    
    time = ds.DATENUM.load().values[ii]
    data, lat, lon = data[ii], ds.LAT.values[ii], ds.LON.values[ii]
    
    ### Calculation using data-, lat-, and lon- arrays
    ###
    ### Returns one array
    
    return data+lat+lon+time

### Connect to a dask-cluster and set cluster-options

In [2]:
import json
import gcsfs

with open('pangeo-181919-e7bc5bdaf4d5.json') as f:
    token = json.load(f)
gcs = gcsfs.GCSFileSystem(token=token)

from dask_gateway import GatewayCluster, Gateway
from distributed import Client

g = Gateway()
g.list_clusters()

[]

In [3]:
options = g.cluster_options()
options.worker_cores = 2; options.worker_memory = 4
# Create a cluster with those options
cluster = g.new_cluster(options)

In [4]:
g.list_clusters()

[ClusterReport<name=prod.fb8ffa5a4d1c4c4a9d847760eb6840c6, status=RUNNING>]

In [5]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: /services/dask-gateway/clusters/prod.fb8ffa5a4d1c4c4a9d847760eb6840c6/status,


In [6]:
cluster.scale(1)

### Loading of zarr-files from cloud-storage

Loading dataset only:

In [38]:
plevel = 4
dspath='pangeo-argo-eke/data/DynamicHeightData/global_dynamic_height_chunked.zarr'
dsmapper = gcs.get_mapper(dspath)

chunks=None:

In [39]:
%%time 
results = dask.compute( load(dsmapper, plevel) )

CPU times: user 28.7 ms, sys: 5.68 ms, total: 34.4 ms
Wall time: 1.79 s


chunks='auto':

In [43]:
%%time 
results = dask.compute( load(dsmapper, plevel) )

CPU times: user 21.1 ms, sys: 18.3 ms, total: 39.4 ms
Wall time: 1.92 s


(consolidated=False)

In [45]:
%%time
results = dask.compute( load(dsmapper,plevel) )

CPU times: user 29.4 ms, sys: 10.8 ms, total: 40.2 ms
Wall time: 4.76 s


Dataset looks like this:

In [40]:
#dspath='pangeo-argo-eke/data/DynamicHeightData/global_dynamic_height.zarr'
dspath='pangeo-argo-eke/data/DynamicHeightData/global_dynamic_height_chunked.zarr'
dsmapper = gcs.get_mapper(dspath)

In [41]:
xr.open_zarr(dsmapper, consolidated=True)

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,465.81 MiB,16.06 MiB
Shape,"(29, 2105319)","(1, 2105319)"
Dask graph,29 chunks in 2 graph layers,29 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 465.81 MiB 16.06 MiB Shape (29, 2105319) (1, 2105319) Dask graph 29 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  29,

Unnamed: 0,Array,Chunk
Bytes,465.81 MiB,16.06 MiB
Shape,"(29, 2105319)","(1, 2105319)"
Dask graph,29 chunks in 2 graph layers,29 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,465.81 MiB,16.06 MiB
Shape,"(29, 2105319)","(1, 2105319)"
Dask graph,29 chunks in 2 graph layers,29 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 465.81 MiB 16.06 MiB Shape (29, 2105319) (1, 2105319) Dask graph 29 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  29,

Unnamed: 0,Array,Chunk
Bytes,465.81 MiB,16.06 MiB
Shape,"(29, 2105319)","(1, 2105319)"
Dask graph,29 chunks in 2 graph layers,29 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


#### Scaling down and closing cluster

In [47]:
cluster.scale(0)

In [48]:
cluster.close()

In [49]:
cluster.shutdown()