Skip to content

Commit

Permalink
respond to comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed May 24, 2018
1 parent b26272c commit d4b639e
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 5 deletions.
18 changes: 17 additions & 1 deletion dask/array/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2013,6 +2013,16 @@ def normalize_chunks(chunks, shape=None, limit=None, dtype=None,
def auto_chunks(chunks, shape, limit, dtype, previous_chunks=None):
""" Determine automatic chunks
Parameters
----------
chunks: Tuple
A tuple of either dimensions or tuples of explicit chunk dimensions
Some entries should be "auto"
shape: Tuple[int]
limit: int
The maximum allowable size of a chunk
previous_chunks: Tuple[Tuple[int]]
See also
--------
normalize_chunks: for full docstring and parameters
Expand All @@ -2036,6 +2046,12 @@ def auto_chunks(chunks, shape, limit, dtype, previous_chunks=None):
"Can not use auto rechunking with object dtype. "
"We are unable to estimate the size in bytes of object data")

for x in tuple(chunks) + tuple(shape):
if (isinstance(x, Number) and np.isnan(x) or
isinstance(x, tuple) and np.isnan(x).any()):
raise ValueError("Can not perform automatic rechunking with unknown "
"(nan) chunk sizes")

limit = max(1, limit // dtype.itemsize)

largest_block = np.prod([cs if isinstance(cs, Number) else max(cs)
Expand Down Expand Up @@ -2088,7 +2104,7 @@ def round_to(c, s):
"""
try:
return max(f for f in factors(s) if c / 2 <= f <= c)
except ValueError:
except ValueError: # no matching factors within factor of two
return max(1, int(c))


Expand Down
1 change: 1 addition & 0 deletions dask/array/rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def rechunk(x, chunks, threshold=None,
intermediate step.
block_size_limit: int
The maximum block size (in bytes) we want to produce
Defaults to the configuration value ``array.chunk-size``
Examples
--------
Expand Down
15 changes: 15 additions & 0 deletions dask/array/tests/test_array_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3313,3 +3313,18 @@ def test_normalize_chunks_object_dtype(dtype):
x = np.array(['a', 'abc'], dtype=object)
with pytest.raises(NotImplementedError):
da.from_array(x, chunks='auto')


def test_normalize_chunks_tuples_of_tuples():
result = normalize_chunks(((2, 3, 5), 'auto'), (10, 10), limit=10, dtype=np.uint8)
expected = ((2, 3, 5), (2, 2, 2, 2, 2))
assert result == expected


def test_normalize_chunks_nan():
with pytest.raises(ValueError) as info:
normalize_chunks('auto', (np.nan,), limit=10, dtype=np.uint8)
assert "auto" in str(info.value)
with pytest.raises(ValueError) as info:
normalize_chunks(((np.nan, np.nan), 'auto'), (10, 10), limit=10, dtype=np.uint8)
assert "auto" in str(info.value)
2 changes: 0 additions & 2 deletions dask/array/tests/test_rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,9 +603,7 @@ def test_rechunk_zero_dim():
(100, 1, 10, (10,) * 10),
(100, 50, 10, (10,) * 10),
(100, 100, 10, (10,) * 10),
# (20, 7, 10, (7, 7, 6)),
(20, 7, 10, (10, 10)),
# (20, (1, 1, 1, 1, 10, 2, 1, 7), 5, (4, 5, 5, 3, 4, 3)),
(20, (1, 1, 1, 1, 6, 2, 1, 7), 5, (5, 5, 5, 5)),
])
def test_rechunk_auto_1d(shape, chunks, bs, expected):
Expand Down
12 changes: 10 additions & 2 deletions dask/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,8 @@ def parse_bytes(s):
1000000000
>>> parse_bytes('MB')
1000000
>>> parse_bytes('5 foos') # doctest: +SKIP
ValueError: Could not interpret 'foos' as a byte unit
"""
s = s.replace(' ', '')
if not s[0].isdigit():
Expand All @@ -969,9 +971,15 @@ def parse_bytes(s):
prefix = s[:index]
suffix = s[index:]

n = float(prefix)
try:
n = float(prefix)
except ValueError:
raise ValueError("Could not interpret '%s' as a number" % prefix)

multiplier = byte_sizes[suffix.lower()]
try:
multiplier = byte_sizes[suffix.lower()]
except KeyError:
raise ValueError("Could not interpret '%s' as a byte unit" % suffix)

result = n * multiplier
return int(result)
Expand Down

0 comments on commit d4b639e

Please sign in to comment.