In [1]:
import xarray as xr
import numpy as np
from numpy import pi, sin, cos, arccos, clip, deg2rad
import numpy.ma as ma
from datetime import datetime
import dask
import time
import gcsfs
import os


@dask.delayed
def load(dsmapper, plevel):
    
    """
    Load data from zarr-store
    """
    ds = xr.open_zarr(dsmapper, consolidated=True, chunks=None)
    data = ds.u.sel(level=plevel).load().values
    ii = ~xr.apply_ufunc(np.isnan, data)
    
    data, lat, lon = data[ii], ds.latitude.values, ds.longitude.values
    
    ### Calculation using data-, lat-, and lon- arrays
    ###
    ### Returns one array
    
    return data


# Upload dataset to pangeo bucket
ds = xr.tutorial.open_dataset("eraint_uvz")  # load example data 
ds_chunked = ds.chunk({"level":1, "longitude":480, "latitude":241, "month":2})
BUCKET = os.environ['PANGEO_SCRATCH']
gcs = gcsfs.GCSFileSystem()
mapper = gcs.get_mapper(f'{BUCKET}/dataset.zarr')
ds_chunked.to_zarr(mapper, consolidated=True)  # write data

<xarray.backends.zarr.ZarrStore at 0x7f3a442a5e70>

### Connect to a dask-cluster and set cluster-options

In [2]:
from dask_gateway import GatewayCluster, Gateway
from distributed import Client
g = Gateway()
g.list_clusters()

[ClusterReport<name=prod.b31750c8507544bc80653bf34f472ace, status=RUNNING>]

In [3]:
#options = g.cluster_options()
#options.worker_cores = 2; options.worker_memory = 4
# Create a cluster with those options
#cluster = g.new_cluster(options)

In [4]:
cluster = g.connect(g.list_clusters()[0].name)

In [6]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: /services/dask-gateway/clusters/prod.b31750c8507544bc80653bf34f472ace/status,


In [7]:
#cluster.scale(2)

### Loading of zarr-files from cloud-storage

Loading dataset only:

In [9]:
plevel = 500

In [10]:
%%time 
list_of_results = dask.compute(load(mapper, plevel))[0]

CPU times: user 11 ms, sys: 4.59 ms, total: 15.6 ms
Wall time: 419 ms


#### Scaling down and closing cluster

In [11]:
cluster.scale(0)

In [12]:
cluster.close()

In [13]:
cluster.shutdown()

In [15]:
gcs.rm(gcs.ls(BUCKET)[0], recursive=True)