# Using Dask with climpred

This demo demonstrates `climpred`'s capabilities with `dask`.

In [None]:
import warnings

%matplotlib inline
import numpy as np
import xarray as xr
import dask
import climpred

warnings.filterwarnings("ignore")

### Load large data

In [None]:
ds = climpred.tutorial.load_dataset('MPI-PM-DP-3D')
control = climpred.tutorial.load_dataset('MPI-control-3D')

In [None]:
kw = {'comparison':'m2m', 'metric':'pearson_r'}

In [None]:
%time s = climpred.prediction.compute_perfect_model(ds, control, **kw)

In order to use `dask` efficient, we need to chunk the data appropriately. Processing chunks of data lazily with `dask` creates a tiny overhead per dask, therefore chunking mostly makes sense when applying it to large data.

In [None]:
chunked_dim = 'x'
chunks = {chunked_dim:ds[chunked_dim].size // 4}
ds = ds.chunk(chunks)
# if memory allows
ds = ds.persist()

In [None]:
%%time
s_chunked = climpred.prediction.compute_perfect_model(ds, control, **kw)
assert dask.is_dask_collection(s_chunked)
s_chunked = s_chunked.compute()

In [None]:
try:
    xr.testing.assert_allclose(s,s_chunked)
except AssertionError:
    for v in s.data_vars:
        (s-s_chunked)[v].plot(robust=True, col='lead')

In [None]:
kwp = kw.copy()
kwp['bootstrap'] = 4

In [None]:
v='tos'
ds = climpred.tutorial.load_dataset('MPI-PM-DP-3D')[v]
control = climpred.tutorial.load_dataset('MPI-control-3D')[v]

In [None]:
%time s_p = climpred.bootstrap.bootstrap_perfect_model(ds, control, **kwp)

In [None]:
chunked_dim = 'x'
chunks = {chunked_dim:ds[chunked_dim].size // 4}
ds = ds.chunk(chunks)
# if memory allows
ds = ds.persist()

In [None]:
%time s_p_chunked = climpred.bootstrap.bootstrap_perfect_model(ds, control, **kwp)