Skip to content

Commit

Permalink
Fix AggregateRaster: it now returns NaN for no data pixels (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
caspervdw committed Feb 28, 2020
1 parent 1ef8053 commit 31f6398
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 23 deletions.
36 changes: 20 additions & 16 deletions dask_geomodeling/geometry/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,23 +397,21 @@ def process(geom_data, raster_data, process_kwargs):
# process in groups of disjoint subsets of the features
agg = np.full((depth, len(features)), np.nan, dtype="f4")
for select in bucketize(features.bounds.values):
agg_geometries_bucket = agg_geometries.iloc[select]
index = features.index[select]

rasterize_result = utils.rasterize_geoseries(
agg_geometries_bucket,
agg_geometries.iloc[select],
process_kwargs["agg_bbox"],
agg_srs,
height,
width,
values=index,
values=np.asarray(select, dtype=np.int32), # GDAL needs int32
)
labels = rasterize_result["values"][0]

# if there is a threshold, generate a raster with thresholds
if threshold_name:
thresholds = features[threshold_name].reindex(labels.ravel())\
.values.reshape(labels.shape)
threshold_values = features[threshold_name].iloc[select]
threshold_values = np.append(threshold_values.values, np.nan)
thresholds = np.take(threshold_values, labels, mode="clip")
else:
thresholds = None

Expand All @@ -430,15 +428,21 @@ def process(geom_data, raster_data, process_kwargs):
if not active.any():
continue

with warnings.catch_warnings():
# we may get divide by 0 if any geometry does not contain
# any 'active' values
warnings.simplefilter("ignore")
agg[frame_no][select] = agg_func(
1 if statistic == "count" else frame[active],
labels=labels[active],
index=index,
)
# select features that actually have data
# (min, max, median, and percentile cannot handle it otherwise)
active_labels = labels[active]
select_and_active = list(
set(np.unique(active_labels)) & set(select)
)

if not select_and_active:
continue

agg[frame_no][select_and_active] = agg_func(
1 if statistic == "count" else frame[active],
labels=active_labels,
index=select_and_active,
)

if extensive: # sum and count
agg[~np.isfinite(agg)] = 0
Expand Down
38 changes: 31 additions & 7 deletions dask_geomodeling/tests/test_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
MockRaster,
)

from dask_geomodeling.raster import MemorySource
from dask_geomodeling.geometry import aggregate
from dask_geomodeling.geometry import set_operations
from dask_geomodeling.geometry import field_operations
Expand Down Expand Up @@ -1013,6 +1014,28 @@ def test_overlapping_geometries(self):
result = view.get_data(**self.request)
self.assertEqual(result["features"]["agg"].values.tolist(), [36.0, 18.0])

def test_aggregate_percentile_one_empty(self):
# if there are only nodata pixels in the geometries, we expect the
# statistic of mean, min, max, median and percentile to be NaN.
for agg in ["mean", "min", "max", "median", "p90.0"]:
data = np.ones((1, 10, 10), dtype=np.uint8)
data[:, :5, :] = 255
raster = MemorySource(
data, 255, "EPSG:3857", pixel_size=1, pixel_origin=(0, 10)
)
source = MockGeometry(
polygons=[
((2.0, 2.0), (4.0, 2.0), (4.0, 4.0), (2.0, 4.0)),
((6.0, 6.0), (8.0, 6.0), (8.0, 8.0), (6.0, 8.0)),
],
properties=[{"id": 1}, {"id": 2}],
)
view = geometry.AggregateRaster(
source=source, raster=raster, statistic=agg
)
result = view.get_data(**self.request)
assert np.isnan(result["features"]["agg"].values[1])

def test_empty_dataset(self):
source = MockGeometry(polygons=[], properties=[])
view = geometry.AggregateRaster(
Expand Down Expand Up @@ -1079,17 +1102,18 @@ def test_bucketize(self):
class TestSetGetSeries(unittest.TestCase):
def setUp(self):
self.N = 10
self.properties = [{"id": i, "col_1": i * 2} for i in range(self.N)]
properties = [{"id": i, "col_1": i * 2} for i in range(self.N)]
polygons = [((2.0, 2.0), (8.0, 2.0), (8.0, 8.0), (2.0, 8.0))] * self.N
self.source1 = MockGeometry(
polygons=[((2.0, 2.0), (8.0, 2.0), (8.0, 8.0), (2.0, 8.0))] * self.N,
properties=self.properties,
polygons=polygons,
properties=properties,
)
self.properties = [
{"id": i, "col_2": i * 3, "col_3": i * 4} for i in range(self.N)
properties = [
{"id": i, "col_2": i * 3, "col_3": i * 4, "col_4": i if i % 2 else np.nan} for i in range(self.N)
]
self.source2 = MockGeometry(
polygons=[((2.0, 2.0), (8.0, 2.0), (8.0, 8.0), (2.0, 8.0))] * self.N,
properties=self.properties,
polygons=polygons,
properties=properties,
)
self.request = dict(
mode="intersects", projection="EPSG:3857", geometry=box(0, 0, 10, 10)
Expand Down

0 comments on commit 31f6398

Please sign in to comment.