<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Data" data-toc-modified-id="Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data</a></span></li><li><span><a href="#Diagnostic-local" data-toc-modified-id="Diagnostic-local-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Diagnostic local</a></span><ul class="toc-item"><li><span><a href="#diagnostic-local-progressbar" data-toc-modified-id="diagnostic-local-progressbar-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>diagnostic local progressbar</a></span></li><li><span><a href="#diagnostic-local-profiler" data-toc-modified-id="diagnostic-local-profiler-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>diagnostic local profiler</a></span></li></ul></li></ul></div>

# Imports

In [1]:
import numpy as np
import pandas as pd

In [2]:
import dask
import dask.dataframe as dd
import dask.array as da
import dask_ml

print([(x.__name__,x.__version__) for x in [dask, dask_ml]])

[('dask', '2.13.0'), ('dask_ml', '1.5.0')]


# Data

In [3]:
N = 1000

df = pd.DataFrame({'a': range(N), 'b': range(N)})
ddf = dd.from_pandas(df,npartitions=1)
ddf.head(2)

Unnamed: 0,a,b
0,0,0
1,1,1


# Diagnostic local

- https://docs.dask.org/en/latest/diagnostics-local.html

## diagnostic local progressbar

In [4]:
from dask.diagnostics import ProgressBar

In [5]:
a = da.random.normal(size=(2000, 2000), chunks=(1000, 1000))
res = a.dot(a.T).mean(axis=0)
with ProgressBar():
    out = res.compute()

[########################################] | 100% Completed |  0.4s


In [6]:
# register and unregister to avoid with statement
"""
pbar = ProgressBar()
pbar.register()
out = res.compute()

pbar.unregister()
""";

## diagnostic local profiler

In [7]:
from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler
from cachey import nbytes
from dask.diagnostics import visualize

import dask.array as da

import bokeh.io
from bokeh.resources import INLINE
bokeh.io.output_notebook(INLINE)

In [8]:
rprof = ResourceProfiler(dt=0.5)
cprof = CacheProfiler(metric=nbytes)

In [9]:
a = da.random.random(size=(10000, 1000), chunks=(1000, 1000))
q, r = da.linalg.qr(a)
a2 = q.dot(r)

with Profiler() as prof, ResourceProfiler(dt=0.25) as rprof, CacheProfiler() as cprof:
    out = a2.compute()

In [13]:
prof.visualize()

In [14]:
visualize([prof, rprof, cprof])