# Playing with Dask
Nov 2025

In [1]:
# Note: you have to install: pip install dask[distributed]
from dask.distributed import Client

## 1) Setting up a client

In [2]:
# Create a client
#  If you do not pass a scheduler address, Client will create a
#``LocalCluster`` object, passing any extra keyword arguments.
client = Client(n_workers=4, threads_per_worker=1)

In [3]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 4,Total memory: 7.57 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:38417,Workers: 0
Dashboard: http://127.0.0.1:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:41535,Total threads: 1
Dashboard: http://127.0.0.1:35239/status,Memory: 1.89 GiB
Nanny: tcp://127.0.0.1:37869,
Local directory: /tmp/dask-scratch-space/worker-e8p16bkw,Local directory: /tmp/dask-scratch-space/worker-e8p16bkw

0,1
Comm: tcp://127.0.0.1:39729,Total threads: 1
Dashboard: http://127.0.0.1:46811/status,Memory: 1.89 GiB
Nanny: tcp://127.0.0.1:42533,
Local directory: /tmp/dask-scratch-space/worker-et3l01q5,Local directory: /tmp/dask-scratch-space/worker-et3l01q5

0,1
Comm: tcp://127.0.0.1:44207,Total threads: 1
Dashboard: http://127.0.0.1:39065/status,Memory: 1.89 GiB
Nanny: tcp://127.0.0.1:35145,
Local directory: /tmp/dask-scratch-space/worker-m1tr2obd,Local directory: /tmp/dask-scratch-space/worker-m1tr2obd

0,1
Comm: tcp://127.0.0.1:35559,Total threads: 1
Dashboard: http://127.0.0.1:41225/status,Memory: 1.89 GiB
Nanny: tcp://127.0.0.1:41547,
Local directory: /tmp/dask-scratch-space/worker-6be9atg7,Local directory: /tmp/dask-scratch-space/worker-6be9atg7


## 2) Using delayed: 

It doesn't execute the function until we run compute(). Instead it's building a **task graph** to figure how to compute it efficiently

In [4]:
import time

In [5]:
def add(x,y):
    time.sleep(2)
    return x+y

def multiply(x,y):
    time.sleep(2)
    return x*y

def final_sum(a,b,c):
    time.sleep(2)
    return a+b+c

In [6]:
# Monitor execution time
# Without dask delayed, the functions will be called sequentially and take about 2 sec each

start= time.time()
x = add(2,3)
y = multiply(4,5)
z= final_sum(x,y,10)
end = time.time()

print(f"Result: {z}")
print(f"Execution time: {end-start} seconds")

Result: 35
Execution time: 6.001715898513794 seconds


In [7]:
# Let's parallelize it with dask delayed
from dask import delayed

In [8]:
# this time, add() and multiply() should be executed in parallel since they are independent

start= time.time()
a = delayed(add)(2,3)
b = delayed(multiply)(4,5)
c= delayed(final_sum)(a,b,10)

# Don't dorget to add compute, otherwise it's only generating task graphs
# you just need to apply compute to c, since it depends on a and b, dask figures out that they have to be executed
result = c.compute()

end = time.time()

# This time we expect to have only 4sec of execution
print(f"Result: {z}")
print(f"Execution time: {end-start} seconds")

Result: 35
Execution time: 4.0573742389678955 seconds


## 3) Futures: dynamic, parallel real-time execution

Submit tasks dynamically to a **scheduler** which will launch them asynchronously in the background

In [15]:
# Here there is no task graph, the 2 tasks are submitted to the scheduler, that sends them in parallel
# Note that if you relaunch the computation without changing the input params, Dask will use the ones it has alraedy computed
start= time.time()

future1 = client.submit(add,4,3) # runs in the background
future2 = client.submit(multiply, 5,9) # also runs in the background in parallel

result1 = future1.result() # blocks until result is ready
intermediate_time = time.time()
result2 = future2.result() # blocks until result is ready

end = time.time()

print(f"Add Result: {result1}")
print(f"Mult Result: {result2}")
print(f"Intermediate time: {intermediate_time-start} seconds")
print(f"Execution time: {end-start} seconds")

Add Result: 7
Mult Result: 45
Intermediate time: 0.010719537734985352 seconds
Execution time: 2.0255799293518066 seconds


In [16]:
# Creating dependencies between futures
future3 = client.submit(final_sum, result1, result2, 10)
print(f"Final sum: {future3.result()}")

Final sum: 62
