Skip to content

Commit

Permalink
Merge branch 'master' of github.com:dask/dask into pandas-warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed Mar 16, 2019
2 parents b44620a + 87f3a48 commit de12ca6
Show file tree
Hide file tree
Showing 24 changed files with 549 additions and 172 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Expand Up @@ -11,6 +11,7 @@ _base_envs:
- &no_optimize XTRATESTARGS=
- &imports TEST_IMPORTS='true'
- &no_imports TEST_IMPORTS='false'
- &array_function NUMPY_EXPERIMENTAL_ARRAY_FUNCTION='1'

jobs:
fast_finish: true
Expand Down Expand Up @@ -44,7 +45,7 @@ jobs:

- env: &py37_env
- PYTHON=3.7
- NUMPY=1.15.0
- NUMPY=1.16.2
- PANDAS>=0.24.1
- *test_and_lint
- *no_coverage
Expand Down
4 changes: 4 additions & 0 deletions continuous_integration/travis/run_tests.sh
Expand Up @@ -13,3 +13,7 @@ else
echo "py.test dask --runslow $XTRATESTARGS"
py.test dask --runslow $XTRATESTARGS
fi

# This needs to be enabled to test __array_function__ protocol with
# NumPy v1.16.x, enabled by default starting in v1.17
export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1
14 changes: 14 additions & 0 deletions dask/array/core.py
Expand Up @@ -1002,6 +1002,20 @@ def __array__(self, dtype=None, **kwargs):
x = np.array(x)
return x

def __array_function__(self, func, types, args, kwargs):
import dask.array as module
for submodule in func.__module__.split('.')[1:]:
try:
module = getattr(module, submodule)
except AttributeError:
return NotImplemented
if not hasattr(module, func.__name__):
return NotImplemented
da_func = getattr(module, func.__name__)
if da_func is func:
return NotImplemented
return da_func(*args, **kwargs)

@property
def _elemwise(self):
return elemwise
Expand Down
95 changes: 95 additions & 0 deletions dask/array/tests/test_array_function.py
@@ -0,0 +1,95 @@
import pytest
np = pytest.importorskip('numpy', minversion='1.16')

import os

import dask.array as da
from dask.array.utils import assert_eq


env_name = "NUMPY_EXPERIMENTAL_ARRAY_FUNCTION"
missing_arrfunc_cond = env_name not in os.environ or os.environ[env_name] != "1"
missing_arrfunc_reason = env_name + " undefined or disabled"


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.parametrize('func', [
lambda x: np.concatenate([x, x, x]),
lambda x: np.cov(x, x),
lambda x: np.dot(x, x),
lambda x: np.dstack(x),
lambda x: np.flip(x, axis=0),
lambda x: np.hstack(x),
lambda x: np.matmul(x, x),
lambda x: np.mean(x),
lambda x: np.stack([x, x]),
lambda x: np.sum(x),
lambda x: np.var(x),
lambda x: np.vstack(x),
lambda x: np.fft.fft(x.rechunk(x.shape) if isinstance(x, da.Array) else x),
lambda x: np.fft.fft2(x.rechunk(x.shape) if isinstance(x, da.Array) else x),
lambda x: np.linalg.norm(x)])
def test_array_function_dask(func):
x = np.random.random((100, 100))
y = da.from_array(x, chunks=(50, 50))
res_x = func(x)
res_y = func(y)

assert isinstance(res_y, da.Array)
assert_eq(res_y, res_x)


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.parametrize('func', [
lambda x: np.min_scalar_type(x),
lambda x: np.linalg.det(x),
lambda x: np.linalg.eigvals(x)])
def test_array_notimpl_function_dask(func):
x = np.random.random((100, 100))
y = da.from_array(x, chunks=(50, 50))

with pytest.raises(TypeError):
func(y)


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
def test_array_function_sparse_transpose():
sparse = pytest.importorskip('sparse')
x = da.random.random((500, 500), chunks=(100, 100))
x[x < 0.9] = 0

y = x.map_blocks(sparse.COO)

assert_eq(np.transpose(x), np.transpose(y))


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.xfail(reason="requires sparse support for __array_function__",
strict=False)
def test_array_function_sparse_tensordot():
sparse = pytest.importorskip('sparse')
x = np.random.random((2, 3, 4))
x[x < 0.9] = 0
y = np.random.random((4, 3, 2))
y[y < 0.9] = 0

xx = sparse.COO(x)
yy = sparse.COO(y)

assert_eq(np.tensordot(x, y, axes=(2, 0)),
np.tensordot(xx, yy, axes=(2, 0)).todense())


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
def test_array_function_cupy_svd():
cupy = pytest.importorskip('cupy')
x = cupy.random.random((500, 100))

y = da.from_array(x, chunks=(100, 100), asarray=False)

u_base, s_base, v_base = da.linalg.svd(y)
u, s, v = np.linalg.svd(y)

assert_eq(u, u_base)
assert_eq(s, s_base)
assert_eq(v, v_base)
30 changes: 17 additions & 13 deletions dask/array/tests/test_reductions.py
Expand Up @@ -468,35 +468,38 @@ def test_topk_argtopk1(npfunc, daskfunc, split_every):
k = 5
# Test at least 3 levels of aggregation when split_every=2
# to stress the different chunk, combine, aggregate kernels
a = da.random.random(800, chunks=((120, 80, 100, 200, 300), ))
b = da.random.random((10, 20, 30), chunks=(4, 8, 8))
npa = np.random.random(800)
npb = np.random.random((10, 20, 30))

a = da.from_array(npa, chunks=((120, 80, 100, 200, 300), ))
b = da.from_array(npb, chunks=(4, 8, 8))

# 1-dimensional arrays
# top 5 elements, sorted descending
assert_eq(npfunc(a)[-k:][::-1],
assert_eq(npfunc(npa)[-k:][::-1],
daskfunc(a, k, split_every=split_every))
# bottom 5 elements, sorted ascending
assert_eq(npfunc(a)[:k],
assert_eq(npfunc(npa)[:k],
daskfunc(a, -k, split_every=split_every))

# n-dimensional arrays
# also testing when k > chunk
# top 5 elements, sorted descending
assert_eq(npfunc(b, axis=0)[-k:, :, :][::-1, :, :],
assert_eq(npfunc(npb, axis=0)[-k:, :, :][::-1, :, :],
daskfunc(b, k, axis=0, split_every=split_every))
assert_eq(npfunc(b, axis=1)[:, -k:, :][:, ::-1, :],
assert_eq(npfunc(npb, axis=1)[:, -k:, :][:, ::-1, :],
daskfunc(b, k, axis=1, split_every=split_every))
assert_eq(npfunc(b, axis=-1)[:, :, -k:][:, :, ::-1],
assert_eq(npfunc(npb, axis=-1)[:, :, -k:][:, :, ::-1],
daskfunc(b, k, axis=-1, split_every=split_every))
with pytest.raises(ValueError):
daskfunc(b, k, axis=3, split_every=split_every)

# bottom 5 elements, sorted ascending
assert_eq(npfunc(b, axis=0)[:k, :, :],
assert_eq(npfunc(npb, axis=0)[:k, :, :],
daskfunc(b, -k, axis=0, split_every=split_every))
assert_eq(npfunc(b, axis=1)[:, :k, :],
assert_eq(npfunc(npb, axis=1)[:, :k, :],
daskfunc(b, -k, axis=1, split_every=split_every))
assert_eq(npfunc(b, axis=-1)[:, :, :k],
assert_eq(npfunc(npb, axis=-1)[:, :, :k],
daskfunc(b, -k, axis=-1, split_every=split_every))
with pytest.raises(ValueError):
daskfunc(b, -k, axis=3, split_every=split_every)
Expand All @@ -510,14 +513,15 @@ def test_topk_argtopk1(npfunc, daskfunc, split_every):
@pytest.mark.parametrize('chunksize', [1, 2, 3, 4, 5, 10])
def test_topk_argtopk2(npfunc, daskfunc, split_every, chunksize):
"""Fine test use cases when k is larger than chunk size"""
a = da.random.random((10, ), chunks=chunksize)
npa = np.random.random((10, ))
a = da.from_array(npa, chunks=chunksize)
k = 5

# top 5 elements, sorted descending
assert_eq(npfunc(a)[-k:][::-1],
assert_eq(npfunc(npa)[-k:][::-1],
daskfunc(a, k, split_every=split_every))
# bottom 5 elements, sorted ascending
assert_eq(npfunc(a)[:k],
assert_eq(npfunc(npa)[:k],
daskfunc(a, -k, split_every=split_every))


Expand Down
2 changes: 1 addition & 1 deletion dask/array/tests/test_routines.py
Expand Up @@ -502,7 +502,7 @@ def test_bincount_with_weights():

dweights = da.from_array(weights, chunks=2)
e = da.bincount(d, weights=dweights, minlength=6)
assert_eq(e, np.bincount(x, weights=dweights, minlength=6))
assert_eq(e, np.bincount(x, weights=dweights.compute(), minlength=6))
assert same_keys(da.bincount(d, weights=dweights, minlength=6), e)


Expand Down
17 changes: 14 additions & 3 deletions dask/array/tests/test_ufunc.py
Expand Up @@ -56,10 +56,10 @@ def test_ufunc():
unary_ufuncs = ['absolute', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan',
'arctanh', 'bitwise_not', 'cbrt', 'ceil', 'conj', 'cos',
'cosh', 'deg2rad', 'degrees', 'exp', 'exp2', 'expm1', 'fabs',
'fix', 'floor', 'i0', 'invert','isfinite', 'isinf', 'isnan', 'log',
'log10', 'log1p', 'log2', 'logical_not', 'nan_to_num',
'fix', 'floor', 'invert','isfinite', 'isinf', 'isnan', 'log',
'log10', 'log1p', 'log2', 'logical_not',
'negative', 'rad2deg', 'radians', 'reciprocal', 'rint', 'sign',
'signbit', 'sin', 'sinc', 'sinh', 'spacing', 'sqrt', 'square',
'signbit', 'sin', 'sinh', 'spacing', 'sqrt', 'square',
'tan', 'tanh', 'trunc']


Expand Down Expand Up @@ -276,6 +276,17 @@ def test_issignedinf():
assert_eq(np.isposinf(arr), da.isposinf(darr))


@pytest.mark.parametrize('func', ['i0', 'sinc', 'nan_to_num'])
def test_non_ufunc_others(func):
arr = np.random.randint(1, 100, size=(20, 20))
darr = da.from_array(arr, 3)

dafunc = getattr(da, func)
npfunc = getattr(np, func)

assert_eq(dafunc(darr), npfunc(arr), equal_nan=True)


def test_frompyfunc():
myadd = da.frompyfunc(add, 2, 1)
np_myadd = np.frompyfunc(add, 2, 1)
Expand Down
5 changes: 4 additions & 1 deletion dask/bytes/local.py
Expand Up @@ -33,7 +33,10 @@ def _normalize_path(self, path):

def glob(self, path):
"""For a template path, return matching files"""
return sorted(glob(self._normalize_path(path)))
try:
return sorted(glob(self._normalize_path(path), recursive=True))
except TypeError: # recursive kwarg is new in Python 3.5
return sorted(glob(self._normalize_path(path)))

def mkdirs(self, path):
"""Make any intermediate directories to make path writable"""
Expand Down
13 changes: 12 additions & 1 deletion dask/bytes/tests/test_local.py
Expand Up @@ -31,7 +31,9 @@
csv_files = {'.test.fakedata.1.csv': (b'a,b\n'
b'1,2\n'),
'.test.fakedata.2.csv': (b'a,b\n'
b'3,4\n')}
b'3,4\n'),
'subdir/.test.fakedata.2.csv': (b'a,b\n'
b'5,6\n')}


try:
Expand Down Expand Up @@ -101,6 +103,15 @@ def test_urlpath_expand_read():
assert len(paths) == 2


@pytest.mark.skipif(sys.version_info < (3, 5),
reason="Recursive glob is new in Python 3.5")
def test_recursive_glob_expand():
"""Make sure * is expanded in file paths when reading."""
with filetexts(csv_files, mode='b'):
_, _, paths = get_fs_token_paths('**/.*.csv')
assert len(paths) == 3


def test_urlpath_expand_write():
"""Make sure * is expanded in file paths when writing."""
_, _, paths = get_fs_token_paths('prefix-*.csv', mode='wb', num=2)
Expand Down

0 comments on commit de12ca6

Please sign in to comment.