<img src="http://dask.readthedocs.io/en/latest/_images/dask_horizontal.svg"
     align="right"
     width="30%"
     alt="Dask logo">



## Dask reuses existing Python APIs

In [None]:
from dask.distributed import Client
client = Client()
client

### NumPy/Pandas

In [None]:
import dask.array as da

x = da.random.random((10000, 10000), chunks=(1000, 1000)).persist()
x

In [None]:
(x + x.T - x.mean())[::2, ::2].std(axis=0).compute()

### SKLearn APIs

In [None]:
# Code source: Gaël Varoquaux
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt

from sklearn import linear_model, decomposition, datasets
from sklearn.pipeline import Pipeline
# from sklearn.model_selection import GridSearchCV
from dask_searchcv import GridSearchCV  # <<---- We add this line

logistic = linear_model.LogisticRegression()

pca = decomposition.PCA()
pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])

digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target

In [None]:
%%time
n_components = [20, 40, 64]
Cs = np.logspace(-4, 4, 3)

#Parameters of pipelines can be set using ‘__’ separated parameter names:

estimator = GridSearchCV(pipe,
                         dict(pca__n_components=n_components,
                              logistic__C=Cs))
estimator.fit(X_digits, y_digits)

### Concurrent.futures  (PEP 3148)

In [None]:
import time, random

def inc(x):
    time.sleep(random.random())
    return x + 1


def dec(x):
    time.sleep(random.random())
    return x - 1


def add(x, y):
    time.sleep(random.random())
    return x + y

In [None]:
data = range(100)

futures = []
for x in data:
    if x % 2 == 0:
        future = client.submit(inc, x)
    else:
        future = client.submit(dec, x)
    futures.append(future)

In [None]:
from dask.distributed import as_completed

done = as_completed(futures)

while True:
    try:
        a = next(done)
        b = next(done)
    except StopIteration:
        break
    
    future = client.submit(add, a, b)
    done.add(future)

### Async/Await

In [None]:
async def f():
    total = 0
    async with Client('localhost:8786', start=False, set_as_default=False) as client:
        futures = client.map(inc, range(100))
        async for future in as_completed(futures):
            result = await future
            total += result
    print(total)
            
from tornado.ioloop import IOLoop
IOLoop.current().add_callback(f)

### Conclusions

By reusing existing Python APIs and protocols, Dask enables the parallization of existing codebases with minimal rewriting and retraining.