In [1]:
from time import sleep

from dask.distributed import Client
from dask import delayed

In [2]:
client = Client(n_workers=8)

In [None]:
# Example 1:

In [3]:
def inc(x):
    sleep(1)
    return x + 1

def add(x, y):
    sleep(1)
    return x + y

In [4]:
%%time
# This runs immediately, all it does is build a graph

x = delayed(inc)(1)
y = delayed(inc)(2)
z = delayed(add)(x, y)

CPU times: user 396 µs, sys: 92 µs, total: 488 µs
Wall time: 471 µs


In [8]:
%%time
# This actually runs our computation using a local thread pool

z.compute()

CPU times: user 288 ms, sys: 38.3 ms, total: 327 ms
Wall time: 2.03 s


5

In [None]:
# Example 2:

In [9]:
data = [1, 2, 3, 4, 5, 6, 7, 8]

In [12]:
%%time

results = []

for x in data:
    y = delayed(inc)(x)
    results.append(y)
    
total = delayed(sum)(results)
print("Before computing:", total)  # Let's see what type of thing total is
result = total.compute()
print("After computing :", result)  # After it's computed

Before computing: Delayed('sum-a84c8c6b-871b-4b77-8e2d-e4770e3ee3ba')
After computing : 44
CPU times: user 159 ms, sys: 21.5 ms, total: 180 ms
Wall time: 1.03 s


In [16]:
client.close()

In [None]:
# Example 3

In [17]:
from dask.distributed import Client
c = Client(n_workers=4)
c.cluster

  arg_val = next(arg_vals)
Task was destroyed but it is pending!
task: <Task pending name='Task-229722' coro=<_needs_document_lock.<locals>._needs_document_lock_wrapper() running at /Users/martin/.local/share/virtualenvs/private_playground-5JPsKkiQ/lib/python3.9/site-packages/bokeh/server/session.py:51> cb=[multi_future.<locals>.callback() at /Users/martin/.local/share/virtualenvs/private_playground-5JPsKkiQ/lib/python3.9/site-packages/tornado/gen.py:520]>


VBox(children=(HTML(value='<h2>LocalCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    …

In [18]:
def inc(x):
    return x + 1

fut = c.submit(inc, 1)
fut

In [19]:
fut

In [20]:
# grab the information back - this blocks if fut is not ready
c.gather(fut)
# equivalent action when only considering a single future
# fut.result()

2

In [43]:
# Some trivial work that takes time
# repeated from the Distributed chapter.

from dask import delayed
import time

def inc(x):
    time.sleep(5)
    return x + 1

def dec(x):
    time.sleep(3)
    return x - 1

def add(x, y):
    time.sleep(7)
    return x + y

x = delayed(inc)(1)
y = delayed(dec)(2)
total = delayed(add)(x, y)

In [44]:
# notice the difference from total.compute()
# notice that this cell completes immediately
fut = c.compute(total)
fut

In [45]:
c.gather(fut) # waits until result is ready



3

In [47]:
c.close()

In [1]:
# Example 4: External workers

In [1]:
from dask.distributed import Client, wait, progress
from dask import delayed
import time
import numpy as np
import pandas as pd

In [2]:
c = Client(address="192.168.0.10:8786")

In [3]:
c

0,1
Client  Scheduler: tcp://192.168.0.10:8786  Dashboard: http://192.168.0.10:8787/status,Cluster  Workers: 16  Cores: 16  Memory: 15.64 GiB


In [7]:
def inc(x):
    return x + 1



In [8]:
fut = c.submit(inc, 1)

In [9]:
progress(fut)

VBox()

In [28]:
def inc(x):
    time.sleep(5)
    return x + 1

def dec(x):
    time.sleep(3)
    return x - 1

def add(x, y):
    time.sleep(7)
    return x + y

x = delayed(inc)(1)
y = delayed(dec)(2)
x1 = delayed(inc)(11)
y1 = delayed(dec)(21)
x2 = delayed(inc)(12)
y2 = delayed(dec)(22)
x3 = delayed(inc)(13)
y3 = delayed(dec)(23)
total = delayed(add)(x, y)
total_1 = total + delayed(add)(x1, y1)
total_2 = total_1 + delayed(add)(x2, y2)
total_3 = total_2 + delayed(add)(x3, y3)

In [29]:
fut = c.compute(total_3)
fut

In [30]:
c.gather(fut)

105

In [6]:
def counter(x):
    val = 0
    for i in range(x):
        val += 1
    return val

In [44]:
x1 = delayed(counter)(200000000)
x2 = delayed(counter)(200000000)
x3 = delayed(counter)(200000000)
x4 = delayed(counter)(200000000)
x5 = delayed(counter)(200000000)
x6 = delayed(counter)(200000000)
fut = c.compute(x1 + x2 + x3 + x4 + x5 + x6)
c.gather(fut)

1200000000

In [7]:
%%time

tasks = []
mul = 48
for i in range(mul):
    tasks.append(delayed(counter)(200000000))
fut = c.compute(tasks)
c.gather(fut)

CPU times: user 75.4 ms, sys: 9.79 ms, total: 85.2 ms
Wall time: 58.1 s


[200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000,
 200000000]

In [36]:
%%time

tasks = []
mul = 16
for i in range(mul):
    fut = c.submit(counter, int(2e8)+i)
    tasks.append(fut)
wait(tasks)

mysum = 0
for task in tasks:
    mysum += task.result()
mysum

CPU times: user 42.1 ms, sys: 7.35 ms, total: 49.5 ms
Wall time: 14.3 s


3200000120

In [63]:
def bench(size):
    df = pd.DataFrame(np.random.randint(0,100,size=(size, 1)), columns=list('A'))
    return df.A.mean()

In [64]:
%%time

tasks = []
mul = 64
for i in range(mul):
    fut = c.submit(bench, (int(1e7)+i))
    tasks.append(fut)
wait(tasks)

mysum = 0
for task in tasks:
    mysum += task.result()
mysum

CPU times: user 100 ms, sys: 15.8 ms, total: 116 ms
Wall time: 2.41 s


3168.0866766753143

In [65]:
c.close()

In [None]:
# Start scheduler via cli
# dask scheduler

# Start workers via cli
# dask worker

# Shellscript?