In [24]:
import xarray as xr
import numpy as np
from numpy import pi, sin, cos, arccos, clip, deg2rad
import numpy.ma as ma
from datetime import datetime
import dask
import time

@dask.delayed
def load(dsmapper, plevel):
    
    """
    Load data from zarr-store
    """

    ds = xr.open_zarr(dsmapper, consolidated=True, chunks=None).sel(level=plevel)
    data = ds.DH.load().values
    ii = ~xr.apply_ufunc(np.isnan, data)
    
    time = ds.DATENUM.load().values[ii]
    data, lat, lon = data[ii], ds.LAT.values[ii], ds.LON.values[ii]
    
    ### Calculation using data-, lat-, and lon- arrays
    ###
    ### Returns one array
    
    return data+lat+lon+time


def delayed_computations(plevel):
    
    dspath='pangeo-argo-eke/data/DynamicHeightData/global_dynamic_height.zarr'
    dsmapper = gcs.get_mapper(dspath)
    
    maskmapper = gcs.get_mapper('pangeo-argo-eke/data/MaskData/global_1x1_parametermask.zarr')
    maskdata = xr.open_dataset(maskmapper, engine="zarr", consolidated=True)
    mask = maskdata.mask.values
    long = maskdata.longitude.values
    latg = maskdata.latitude.values
    lonB,latB = np.meshgrid(long,latg)
    lonB,latB = ma.MaskedArray(lonB, 1-mask),ma.MaskedArray(latB, 1-mask)

    list_of_delayed_objects = []
    for latmasked, lonmasked in zip(latB.compressed()[:1], lonB.compressed()[:1]):
        delayed = load(dsmapper, plevel)
        list_of_delayed_objects.append( delayed )
        
    # Returning list of computations
    return list_of_delayed_objects

### Connect to a dask-cluster and set cluster-options

In [16]:
import json
import gcsfs

with open('pangeo-181919-e7bc5bdaf4d5.json') as f:
    token = json.load(f)
gcs = gcsfs.GCSFileSystem(token=token)

from dask_gateway import GatewayCluster, Gateway
from distributed import Client

g = Gateway()
g.list_clusters()

[ClusterReport<name=prod.79858275509c4d049c64a929ce4fa6c6, status=RUNNING>]

In [10]:
options = g.cluster_options()
options.worker_cores = 2; options.worker_memory = 4
# Create a cluster with those options
cluster = g.new_cluster(options)

In [12]:
g.list_clusters()

[ClusterReport<name=prod.79858275509c4d049c64a929ce4fa6c6, status=RUNNING>]

In [13]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: /services/dask-gateway/clusters/prod.79858275509c4d049c64a929ce4fa6c6/status,


In [14]:
cluster.scale(1)

### Loading of zarr-files from cloud-storage

Loading dataset only:

In [None]:
plevel = 4

In [25]:
%%time 
list_of_results = dask.compute( delayed_computations(plevel) )[0]

CPU times: user 47.8 ms, sys: 14.6 ms, total: 62.4 ms
Wall time: 4.64 s


#### Scaling down and closing cluster

In [None]:
cluster.scale(0)

In [None]:
cluster.close()

In [None]:
cluster.shutdown()