Skip to content

Commit

Permalink
Merge pull request #6 from nens/casper-reclassify
Browse files Browse the repository at this point in the history
Fix multiple issues in reclassify
  • Loading branch information
arjanverkerk committed Oct 4, 2019
2 parents bfa121f + f5b0384 commit a8c9862
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 90 deletions.
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ Changelog of dask-geomodeling

- Added documentation.

- Fixed MemorySource incase of a request outside of the data boundary.

- Fixed multiple bugs in Reclassify and added some tests. The 'from' dtype can
now be boolean or integer, and the 'to' dtype integer or float. The returned
dtype is now decided by numpy (int64 or float64).


2.0.2 (2019-09-04)
------------------
Expand Down
137 changes: 70 additions & 67 deletions dask_geomodeling/raster/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,103 +283,106 @@ class Reclassify(BaseSingle):
:param store: rasterdata to reclassify
:param data: list of (from, to) values defining the reclassification
the from values can be of bool or int datatype; the to values can be
of int or float datatype
:param select: leave only reclassified values, set others to 'no data'.
Default False.
:type store: RasterBlock
:type data: list
:type select: bool
Specify the mapping in the form of a list of (from, to)-tuples. If there
is any float value in the 'to' data, then all 'to' values are treated as
float values.
The maximum of the store's fillvalue and the 'from' values determine the
size of the lookup array. For float target values, the dtype is 'f4' and
the fillvalue is the maximum possible value for that dtype. for integer
target values, the fillvalue of the raster is the maximum of:
- the target values plus one,
- the source values plus one and
- the fillvalue of the store
the raster's dtype is the unsigned integer dtype with the smallest
itemsize that still accomodates the raster's fillvalue.
These properties make it possible to keep the lookup array for the
conversion small, but the drawback is that stores with fillvalues below
data values will not work.
"""

def __init__(self, store, data, select=False):
if not np.issubdtype(store.dtype, np.integer):
raise TypeError("The store must be of integer datatype")
dtype = store.dtype
if dtype != np.bool and not np.issubdtype(dtype, np.integer):
raise TypeError("The store must be of boolean or integer datatype")

# validate "data"
if not hasattr(data, "__iter__"):
raise TypeError("'{}' object is not allowed".format(type(data)))
try:
source, target = map(np.asarray, zip(*data))
except ValueError:
raise ValueError("Please supply a list of [from, to] values")
# "from" can have bool or int dtype, "to" can also be float
if source.dtype != np.bool and not np.issubdtype(source.dtype, np.integer):
raise TypeError(
"Cannot reclassify from value with type '{}'".format(source.dtype)
)
if len(np.unique(source)) != len(source):
raise ValueError("There are duplicates in the reclassify values")
if not np.issubdtype(target.dtype, np.number):
raise TypeError(
"Cannot reclassify to value with type '{}'".format(target.dtype)
)
# put 'data' into a list with consistent dtypes
data = [list(x) for x in zip(source.tolist(), target.tolist())]

if select is not True and select is not False:
raise TypeError("'{}' object is not allowed".format(type(select)))
super(Reclassify, self).__init__(store, data, select)
super().__init__(store, data, select)

@property
def data(self):
return self.args[1]

@property
def dtype(self):
source, target = map(np.asarray, zip(*self.data))
def select(self):
return self.args[2]

if np.issubdtype(target.dtype, np.floating):
dtype = np.dtype("f4")
else:
# integer mapping
fillvalue = max(source.max() + 1, target.max() + 1, self.store.fillvalue)
for dtype in np.uint8, np.uint16, np.uint32, np.uint64:
if fillvalue <= dtype(-1):
break
return dtype
@property
def dtype(self):
_, target = map(np.asarray, zip(*self.data))
return target.dtype

@property
def fillvalue(self):
source, target = map(np.asarray, zip(*self.data))
return get_dtype_max(self.dtype)

if np.issubdtype(target.dtype, np.floating):
fillvalue = np.finfo("f4").max
else:
fillvalue = max(source.max() + 1, target.max() + 1, self.store.fillvalue)
return fillvalue
def get_sources_and_requests(self, **request):
process_kwargs = {
"dtype": self.dtype.str,
"fillvalue": self.fillvalue,
"data": self.data,
"select": self.select,
}
return [(self.store, request), (process_kwargs, None)]

@staticmethod
def process(store_data, mapping_data, select):
def process(store_data, process_kwargs):
if store_data is None or "values" not in store_data:
return store_data

no_data_value = store_data["no_data_value"]
source, target = map(np.asarray, zip(*mapping_data))

if np.issubdtype(target.dtype, np.floating):
dtype = np.dtype("f4")
fillvalue = np.finfo("f4").max
else:
# integer mapping
fillvalue = max(source.max() + 1, target.max() + 1, no_data_value)
for dtype in np.uint8, np.uint16, np.uint32, np.uint64:
if fillvalue <= dtype(-1):
break

# create mapping
size = max(source.max(), no_data_value) + 1
if select:
# map everything, including old fillvalue, to fillvalue
mapping = np.full(size, fillvalue, dtype=dtype)
else:
# map every value onto itself
mapping = np.arange(size, dtype=dtype)

# modify mapping using provided data
mapping[source] = target

values = mapping[store_data["values"]]
return {"values": values, "no_data_value": fillvalue}
values = store_data["values"]
source, target = map(np.asarray, zip(*process_kwargs["data"]))
dtype = np.dtype(process_kwargs["dtype"])
fillvalue = process_kwargs["fillvalue"]

# add the nodata value to the source array and map it to the target
# nodata
if no_data_value is not None and no_data_value not in source:
source = np.append(source, no_data_value)
target = np.append(target, fillvalue)

# sort the source and target values
inds = np.argsort(source)
source = source[inds]
target = target[inds]

# create the result array
if process_kwargs["select"]: # select = True: initialize with nodata
result = np.full(values.shape, fillvalue, dtype=dtype)
else: # select = True: initialize with existing data
result = values.astype(dtype) # makes a copy

# find all values in the source data that are to be mapped
mask = np.in1d(values.ravel(), source)
mask.shape = values.shape
# place the target values (this also maps nodata values)
result[mask] = target[np.searchsorted(source, values[mask])]
return {"values": result, "no_data_value": fillvalue}


class Rasterize(RasterBlock):
Expand Down
21 changes: 0 additions & 21 deletions dask_geomodeling/tests/test_raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -1606,27 +1606,6 @@ def test_step(self):
self.assertEqual(view.get_data(**self.meta_request)["meta"], self.expected_meta)
self.assertEqual(view.get_data(**self.time_request)["time"], self.expected_time)

def test_reclassify_meta_time(self):
view = raster.Reclassify(store=self.raster, data=[[7, 1000]])
self.assertEqual(view.get_data(**self.meta_request)["meta"], self.expected_meta)
self.assertEqual(view.get_data(**self.time_request)["time"], self.expected_time)

def test_reclassify_with_integers(self):
for select in True, False:
view = raster.Reclassify(store=self.raster, data=[[7, 1000]], select=select)

data = view.get_data(**self.vals_request)
self.assertEqual(view.dtype, np.uint16)
assert_equal(data["values"], 1000)

def test_reclassify_with_floats(self):
for select in True, False:
view = raster.Reclassify(store=self.raster, data=[[7, 8.0]], select=select)
data = view.get_data(**self.vals_request)
self.assertEqual(view.dtype, np.float32)
assert_equal(data["values"], 8.0)
self.assertEqual(data["no_data_value"], view.fillvalue)

def test_classify_meta_time(self):
view = raster.Classify(store=self.raster, bins=[1, 2, 3])
self.assertEqual(view.get_data(**self.meta_request)["meta"], self.expected_meta)
Expand Down
129 changes: 129 additions & 0 deletions dask_geomodeling/tests/test_raster_misc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from datetime import datetime, timedelta

import numpy as np
import pytest
from numpy.testing import assert_equal

from dask_geomodeling import raster
from dask_geomodeling.raster.sources import MemorySource


@pytest.fixture
def source():
bands = 3
time_first = datetime(2000, 1, 1)
time_delta = timedelta(hours=1)
yield MemorySource(
data=[
np.full((10, 10), 1, dtype=np.uint8),
np.full((10, 10), 7, dtype=np.uint8),
np.full((10, 10), 255, dtype=np.uint8),
],
no_data_value=255,
projection="EPSG:28992",
pixel_size=0.5,
pixel_origin=(135000, 456000),
time_first=time_first,
time_delta=time_delta,
metadata=["Testmeta for band {}".format(i) for i in range(bands)],
)


@pytest.fixture
def vals_request():
bands = 3
time_first = datetime(2000, 1, 1)
time_delta = timedelta(hours=1)
yield {
"mode": "vals",
"start": time_first,
"stop": time_first + bands * time_delta,
"width": 4,
"height": 6,
"bbox": (135000, 456000 - 3, 135000 + 2, 456000),
"projection": "EPSG:28992",
}


@pytest.fixture
def expected_meta():
bands = 3
return ["Testmeta for band {}".format(i) for i in range(bands)]


@pytest.fixture
def expected_time():
bands = 3
time_first = datetime(2000, 1, 1)
time_delta = timedelta(hours=1)
return [time_first + i * time_delta for i in range(bands)]


def test_reclassify(source, vals_request):
view = raster.Reclassify(store=source, data=[[7, 1000]])
data = view.get_data(**vals_request)

assert_equal(data["values"][:, 0, 0], [1, 1000, data["no_data_value"]])


def test_reclassify_select(source, vals_request):
view = raster.Reclassify(store=source, data=[[7, 1000]], select=True)
data = view.get_data(**vals_request)

expected = [data["no_data_value"], 1000, data["no_data_value"]]
assert_equal(data["values"][:, 0, 0], expected)


def test_reclassify_to_float(source, vals_request):
view = raster.Reclassify(store=source, data=[[7, 8.2]])
data = view.get_data(**vals_request)

assert_equal(data["values"][:, 0, 0], [1.0, 8.2, data["no_data_value"]])


def test_reclassify_bool(source, vals_request):
source_bool = source == 7
view = raster.Reclassify(store=source_bool, data=[[True, 1000]])
data = view.get_data(**vals_request)

assert_equal(data["values"][:, 0, 0], [0, 1000, 0])


def test_reclassify_int32(source, vals_request):
# this will have a high fillvalue that may lead to a MemoryError
source_int32 = source * 1
assert source_int32.dtype == np.int32

view = raster.Reclassify(store=source_int32, data=[[7, 1000]])
data = view.get_data(**vals_request)

assert_equal(data["values"][:, 0, 0], [1, 1000, data["no_data_value"]])


def test_reclassify_float_raster(source):
source_float = source / 2
assert source_float.dtype == np.float32
with pytest.raises(TypeError):
raster.Reclassify(store=source_float, data=[[7.0, 1000]])


def test_reclassify_float_data(source):
with pytest.raises(TypeError):
raster.Reclassify(store=source, data=[[7.4, 1000]])


def test_reclassify_wrong_mapping_shape(source):
with pytest.raises(ValueError):
raster.Reclassify(store=source, data=[[[7, 1000]], [1, 100]])


def test_reclassify_meta_request(source, vals_request, expected_meta):
view = raster.Reclassify(store=source, data=[[7, 1000]])
vals_request["mode"] = "meta"
assert view.get_data(**vals_request)["meta"] == expected_meta


def test_reclassify_time_request(source, vals_request, expected_time):
view = raster.Reclassify(store=source, data=[[7, 1000]])
vals_request["mode"] = "time"
assert view.get_data(**vals_request)["time"] == expected_time
6 changes: 4 additions & 2 deletions dask_geomodeling/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,13 +296,15 @@ def get_array_ranges(self, bbox, shape):
if j1 == j2:
j2 += 1

# clip the indices to the array bounds and get the data
# clip the indices to the array bounds for getting the data
_i1, _i2 = np.clip([i1, i2], 0, shape[1])
_j1, _j2 = np.clip([j1, j2], 0, shape[2])
ranges = (_i1, _i2), (_j1, _j2)

# pad the data to the shape given by the index
padding = (_i1 - i1, i2 - _i2), (_j1 - j1, j2 - _j2)
padding_i = (i2 - i1, 0) if _i1 == _i2 else (_i1 - i1, i2 - _i2)
padding_j = (j2 - j1, 0) if _j1 == _j2 else (_j1 - j1, j2 - _j2)
padding = padding_i, padding_j
if np.all(np.array(padding) <= 0):
padding = None
return ranges, padding
Expand Down

0 comments on commit a8c9862

Please sign in to comment.