In [40]:
import xarray as xr
import numpy as np
from numpy import pi, sin, cos, arccos, clip, deg2rad
import numpy.ma as ma
from datetime import datetime
import dask
import time

@dask.delayed
def load(dsmapper):
    
    """
    Load data from zarr-store
    """
    ds = xr.open_zarr(dsmapper, consolidated=True, chunks='auto')
    data0 = ds.Anomalies.load().values[:,0]
    ii = ~xr.apply_ufunc(np.isnan, data0)
    data, lat, lon, time = data0[ii], ds.latitude.load().values[ii], ds.longitude.load().values[ii], ds.time.load().values[ii]
    
    ### Calculation using data-, lat-, lon-, and time-arrays
    ###
    ### Returns one array
    return None
    #return data, lat, lon, time

### Connect to a dask-cluster and set cluster-options

In [3]:
import json
import gcsfs

with open('pangeo-181919-e7bc5bdaf4d5.json') as f:
    token = json.load(f)
gcs = gcsfs.GCSFileSystem(token=token)

from dask_gateway import GatewayCluster, Gateway
from distributed import Client

g = Gateway()
#g.list_clusters()

In [4]:
options = g.cluster_options()
options.worker_cores = 2; options.worker_memory = 4
# Create a cluster with those options
cluster = g.new_cluster(options)

In [5]:
g.list_clusters()

[ClusterReport<name=prod.fbded122d1854cd1a0243d9421dc55e1, status=RUNNING>]

In [6]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: /services/dask-gateway/clusters/prod.fbded122d1854cd1a0243d9421dc55e1/status,


In [7]:
cluster.scale(1)

### Loading of data-, lat-, lon-, and time-arrays from cloud-storage

Loading dataset only:

In [8]:
plevel = 4
dspath='pangeo-argo-eke/global/mean_and_anomalies/with_pressure_coordinate/readable_ws_and_NHarm/mean_and_anomalies_global_ws500_plevel'+str(plevel)+'.zarr'
dsmapper = gcs.get_mapper(dspath)

#### Alternatives:

In [26]:
%%time 
results = dask.compute( load(dsmapper) )

CPU times: user 9.37 ms, sys: 792 µs, total: 10.2 ms
Wall time: 1.58 s


In [39]:
%%time 
results = dask.compute( load(dsmapper) )

CPU times: user 9.96 ms, sys: 0 ns, total: 9.96 ms
Wall time: 1.54 s


In [45]:
%%time 
results = dask.compute( load(dsmapper) )

CPU times: user 8.83 ms, sys: 58 µs, total: 8.89 ms
Wall time: 1.54 s


Dataset looks like this:

In [46]:
xr.open_zarr(dsmapper, consolidated=True)

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319,) (2105319,) Dask graph 1 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",2105319  1,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319,)","(2105319,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319, 1)","(2105319, 1)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319, 1) (2105319, 1) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1  2105319,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319, 1)","(2105319, 1)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319, 1)","(2105319, 1)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.06 MiB 16.06 MiB Shape (2105319, 1) (2105319, 1) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1  2105319,

Unnamed: 0,Array,Chunk
Bytes,16.06 MiB,16.06 MiB
Shape,"(2105319, 1)","(2105319, 1)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


#### Scaling down and closing cluster

In [47]:
cluster.scale(0)

In [None]:
cluster.close()

In [None]:
cluster.shutdown()