## This notebook is meant for you to play around with!
## Try investigating `.dask` and other aspects of the taskgraphs below!

In [None]:
import dask
from dask.threaded import get
from dask.local import get_sync
from dask.optimization import cull, inline, inline_functions, fuse

def print_and_return(string):
    print(string)
    return string

def format_str(count, val, nwords):
    return (f'word list has {count} occurrences of '
            f'{val}, out of {nwords} words')

dsk = {'words': 'apple orange apple pear orange pear pear',
       'nwords': (len, (str.split, 'words')),
       'val1': 'orange',
       'val2': 'apple',
       'val3': 'pear',
       'count1': (str.count, 'words', 'val1'),
       'count2': (str.count, 'words', 'val2'),
       'count3': (str.count, 'words', 'val3'),
       'format1': (format_str, 'count1', 'val1', 'nwords'),
       'format2': (format_str, 'count2', 'val2', 'nwords'),
       'format3': (format_str, 'count3', 'val3', 'nwords'),
       'print1': (print_and_return, 'format1'),
       'print2': (print_and_return, 'format2'),
       'print3': (print_and_return, 'format3')}

In [None]:
dask.base.visualize_dsk(dsk, verbose=True)

In [None]:
outputs = ['print1', 'print2']
dsk1, dependencies = cull(dsk, outputs)  # remove unnecessary tasks from the graph

results = get_sync(dsk1, outputs)

dask.base.visualize_dsk(dsk1, verbose=True)

In [None]:
dsk2 = inline(dsk1, dependencies=dependencies)
results = get_sync(dsk2, outputs)

dask.base.visualize_dsk(dsk2, verbose=True)

In [None]:
dsk3 = inline_functions(dsk2, outputs, [len, str.split], dependencies=dependencies)
results = get_sync(dsk3, outputs)

dask.base.visualize_dsk(dsk3, verbose=True)

In [None]:
dsk4, dependencies = fuse(dsk3)
results = get_sync(dsk4, outputs)

dask.base.visualize_dsk(dsk4, verbose=True)

In [None]:
def optimize(dsk, keys):
    dsk1, deps = cull(dsk, keys)
    dsk2 = inline(dsk1, dependencies=deps)
    dsk3 = inline_functions(dsk2, keys, [len, str.split],
                            dependencies=deps)
    dsk4, deps = fuse(dsk3)
    return dsk4, deps

def optimize_and_get(dsk, keys):    
    dsk4, deps = fuse(dsk, keys)
    return get(dsk4, keys)

optimize_and_get(dsk, outputs)

In [None]:
dask.base.visualize_dsk(dsk, verbose=True, color="order")

In [None]:
dsk5, _ = optimize(dsk, outputs)

dask.base.visualize_dsk(dsk5, verbose=True, color="order")