In [1]:
import dask
from dask_kubernetes import KubeCluster
import numpy as np

In [2]:
# Specify a remote deployment using a load blanacer
dask.config.set({"kubernetes.scheduler-service-type": "LoadBalancer"})

<dask.config.set at 0x7fb6c02efa90>

In [3]:
cluster = KubeCluster.from_yaml('worker-spec.yaml', namespace='dask', deploy_mode='remote')

Creating scheduler pod on cluster. This may take some time.


In [4]:
cluster.adapt(minimum=1, maximum=10)

distributed.deploy.adaptive - INFO - Adaptive scaling started: minimum=1 maximum=10


<distributed.deploy.adaptive.Adaptive at 0x7fb644bd5f10>

In [9]:
# Example usage
from dask.distributed import Client
import dask.array as da

# Connect Dask to the cluster
client = Client(cluster)


In [10]:
client.scheduler_comm.comm.handshake_info()

{'compression': 'lz4', 'python': (3, 8, 0), 'pickle-protocol': 5}

In [11]:
# Create a large array and calculate the mean
array = da.ones((1000, 1000, 1000))
print(array.mean().compute())  # Should print 1.0|

1.0


So now we know the cluster is doing ok :)

In [16]:
class Counter:
    """ A simple class to manage an incrementing counter """
    n = 0

    def __init__(self):
        self.n = 0

    def increment(self):
        self.n += 1
        return self.n

    def add(self, x):
        self.n += x
        return self.n
    
    def value(self):
        return self.n


future = client.submit(Counter, actor=True)  # Create a Counter on a worker
counter = future.result()     

In [17]:
counter

<Actor: Counter, key=Counter-2bf3a1c4-6459-40c4-b7db-0dc78c79c45c>

In [22]:
counter.increment()

<ActorFuture>

In [23]:
counter.value().result()

3

In [57]:
import dask.bag as db
b = db.from_sequence(range(1,100), npartitions=10)
import time

In [64]:
def inc(x):
    time.sleep(x)
    f = counter.add(x)
    # Note: the counter ( above ) is serelizable, however the future we get back from it is not
    # this is likely because the future contains a network connection to the actor, so we don't return
    # that future. If we wanted to we could also block on f's value here.
    return x
j = b.map(inc)
j

dask.bag<inc, npartitions=10>

In [59]:
f = j.to_delayed()
f

[Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 0)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 1)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 2)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 3)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 4)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 5)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 6)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 7)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 8)),
 Delayed(('inc-b6916ff9c25b55f9de47814775298e39', 9))]

In [62]:
c = client
c.submit(*f)

In [63]:
counter.value().result()

KeyboardInterrupt: 

In [None]:
counter.value().result()

In [None]:
counter.value().result()

In [None]:
f[0]

In [None]:
# Create a large array and calculate the mean
array = da.ones((1000, 1000, 1000))
print(array.mean().compute())  # Should print 1.0|