In [1]:
import pandas as pd
import dask
import dask.dataframe as dd
import dask.array as da
import numpy as np
import os
import glob
import time
import dask
import dask.array as da
import sys
import time

# Dask Delayed

### With decorators

In [2]:
@dask.delayed
def slow_increment(x):
    time.sleep(1)
    return x + 1

@dask.delayed
def add_nums(x, y):
    time.sleep(1)
    return x + y

In [3]:
z = add_nums(add_nums(slow_increment(1), slow_increment(2)), add_nums(slow_increment(3), slow_increment(4)))

In [4]:
z.visualize()

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

In [5]:
%%time
z.compute()

CPU times: user 245 ms, sys: 45.3 ms, total: 290 ms
Wall time: 3.56 s


14

### Without decorators

In [6]:
def slow_increment(x):
    time.sleep(1)
    return x + 1

def add_nums(x, y):
    time.sleep(1)
    return x + y

In [7]:
ds = dask.delayed(slow_increment)
da = dask.delayed(add_nums)

In [8]:
z = da(da(ds(1), ds(2)), da(ds(3), ds(4)))

In [9]:
z.visualize()

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

In [10]:
z.compute()

14

### Without decorators and without wrapping all functions

In [11]:
def slow_increment(x):
    time.sleep(1)
    return x + 1

def add_nums(x, y):
    time.sleep(1)
    return x + y

In [12]:
z = dask.delayed(add_nums)(add_nums(slow_increment(1), slow_increment(2)), add_nums(slow_increment(3), slow_increment(4)))

In [13]:
z.visualize()

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

# Simple case (processing a list in parallel)

In [14]:
img = np.random.random(size=[200, 1000, 10000])

### Without dask

In [15]:
def calculate_garbage(a):
    return np.std(a) * np.mean(a) / np.sqrt(a)

In [None]:
%%time
means = [calculate_garbage(a) for a in img]

### With dask

In [None]:
@dask.delayed
def calculate_garbage_delayed(a):
    return np.std(a) * np.mean(a) / np.sqrt(a)

In [None]:
%%time
means = dask.compute([calculate_garbage_delayed(a) for a in img])