Skip to content

Commit

Permalink
Merge pull request #140 from jpolchlo/fix/reclassify-nodata
Browse files Browse the repository at this point in the history
Added flag to reclassify to allow NODATA to be remapped
  • Loading branch information
Jacob Bouffard committed Apr 28, 2017
2 parents 7bfda90 + 9643b3c commit 8b40067
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,13 @@ abstract class TileRDD[K: ClassTag] {

def reclassify(
intMap: java.util.Map[Int, Int],
boundaryType: String
boundaryType: String,
replaceNoDataWith: Int
): TileRDD[_] = {
val scalaMap = intMap.asScala.toMap

val boundary = getBoundary(boundaryType)
val mapStrategy = new MapStrategy(boundary, NODATA, NODATA, false)
val mapStrategy = new MapStrategy(boundary, replaceNoDataWith, NODATA, false)
val breakMap = new BreakMap(scalaMap, mapStrategy, { i: Int => isNoData(i) })

val reclassifiedRDD =
Expand All @@ -89,12 +90,13 @@ abstract class TileRDD[K: ClassTag] {

def reclassifyDouble(
doubleMap: java.util.Map[Double, Double],
boundaryType: String
boundaryType: String,
replaceNoDataWith: Double
): TileRDD[_] = {
val scalaMap = doubleMap.asScala.toMap

val boundary = getBoundary(boundaryType)
val mapStrategy = new MapStrategy(boundary, doubleNODATA, doubleNODATA, false)
val mapStrategy = new MapStrategy(boundary, replaceNoDataWith, doubleNODATA, false)
val breakMap = new BreakMap(scalaMap, mapStrategy, { d: Double => isNoData(d) })

val reclassifiedRDD =
Expand Down
45 changes: 31 additions & 14 deletions geopyspark/geotrellis/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
FLOAT,
TILE,
SPATIAL,
LESSTHANOREQUALTO
LESSTHANOREQUALTO,
NODATAINT
)

def _reclassify(srdd, value_map, data_type, boundary_strategy):
def _reclassify(srdd, value_map, data_type, boundary_strategy, replace_nodata_with):
new_dict = {}

for key, value in value_map.items():
Expand All @@ -30,9 +31,15 @@ def _reclassify(srdd, value_map, data_type, boundary_strategy):
new_dict[key] = value

if data_type is int:
return srdd.reclassify(new_dict, boundary_strategy)
if not replace_nodata_with:
return srdd.reclassify(new_dict, boundary_strategy, NODATAINT)
else:
return srdd.reclassify(new_dict, boundary_strategy, replace_nodata_with)
else:
return srdd.reclassifyDouble(new_dict, boundary_strategy)
if not replace_nodata_with:
return srdd.reclassifyDouble(new_dict, boundary_strategy, float('nan'))
else:
return srdd.reclassifyDouble(new_dict, boundary_strategy, replace_nodata_with)


class RasterRDD(object):
Expand Down Expand Up @@ -199,7 +206,7 @@ def tile_to_layout(self, layer_metadata, resample_method=NEARESTNEIGHBOR):
srdd = self.srdd.tileToLayout(json.dumps(layer_metadata), resample_method)
return TiledRasterRDD(self.geopysc, self.rdd_type, srdd)

def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO, replace_nodata_with=None):
"""Changes the cell values of a raster based on how the data is broken up.
Args:
Expand All @@ -209,18 +216,23 @@ def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
``float``.
boundary_strategy (str, optional): How the cells should be classified along the breaks.
If unspecified, then ``LESSTHANOREQUALTO`` will be used.
replace_nodata_with (data_type, optional): When remapping values, nodata values must be
treated separately. If nodata values are intended to be replaced during the
reclassify, this variable should be set to the intended value. If unspecified,
nodata values will be preserved.
NOTE:
Simbolizing a NoData value differs depending on if the ``data_type`` is an ``int`` or a
``float``. For an ``int``, the constant ``NODATAINT`` can be used which represents the
NoData value for ``int`` in GeoTrellis. If ``float``, then ``float('nan')`` is used to
NoData symbolizes a different value depending on if ``data_type`` is ``int`` or
``float``. For ``int``, the constant ``NODATAINT`` can be used which represents the
NoData value for ``int`` in GeoTrellis. For ``float``, ``float('nan')`` is used to
represent NoData.
Returns:
:class:`~geopyspark.geotrellis.rdd.RasterRDD`
"""

srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy)
srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy, replace_nodata_with)

return RasterRDD(self.geopysc, self.rdd_type, srdd)


Expand Down Expand Up @@ -517,7 +529,7 @@ def cost_distance(self, geometries, max_distance):

return TiledRasterRDD(self.geopysc, self.rdd_type, srdd)

def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO, replace_nodata_with=None):
"""Changes the cell values of a raster based on how the data is broken up.
Args:
Expand All @@ -527,18 +539,23 @@ def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
``float``.
boundary_strategy (str, optional): How the cells should be classified along the breaks.
If unspecified, then ``LESSTHANOREQUALTO`` will be used.
replace_nodata_with (data_type, optional): When remapping values, nodata values must be
treated separately. If nodata values are intended to be replaced during the
reclassify, this variable should be set to the intended value. If unspecified,
nodata values will be preserved.
NOTE:
Simbolizing a NoData value differs depending on if the ``data_type`` is an ``int`` or a
``float``. For an ``int``, the constant ``NODATAINT`` can be used which represents the
NoData value for ``int`` in GeoTrellis. If ``float``, then ``float('nan')`` is used to
NoData symbolizes a different value depending on if ``data_type`` is ``int`` or
``float``. For ``int``, the constant ``NODATAINT`` can be used which represents the
NoData value for ``int`` in GeoTrellis. For ``float``, ``float('nan')`` is used to
represent NoData.
Returns:
:class:`~geopyspark.geotrellis.rdd.TiledRasterRDD`
"""

srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy)
srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy, replace_nodata_with)

return TiledRasterRDD(self.geopysc, self.rdd_type, srdd)

def _process_operation(self, value, operation):
Expand Down
32 changes: 32 additions & 0 deletions geopyspark/tests/reclassify_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import sys
import math
import numpy as np
Expand Down Expand Up @@ -148,6 +149,37 @@ def test_no_data_floats(self):
for x in list(result.flatten()):
self.assertTrue(math.isnan(x))

@pytest.mark.skipif('TRAVIS' in os.environ,
reason="Encoding using methods in Main causes issues on Travis")
def test_ignore_no_data_ints(self):
arr = np.ones((1, 16, 16), int)
np.fill_diagonal(arr[0], NODATAINT)
tile = {'data': arr, 'no_data_value': NODATAINT}

rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)])
raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd)

value_map = {1: 0}

result = raster_rdd.reclassify(value_map, int, replace_nodata_with=1).to_numpy_rdd().first()[1]['data']

self.assertTrue((result == np.identity(16, int)).all())

@pytest.mark.skipif('TRAVIS' in os.environ,
reason="Encoding using methods in Main causes issues on Travis")
def test_ignore_no_data_floats(self):
arr = np.ones((1, 4, 4))
np.fill_diagonal(arr[0], float('nan'))
tile = {'data': arr, 'no_data_value': float('nan')}

rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)])
raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd)

value_map = {1.0: 0.0}

result = raster_rdd.reclassify(value_map, float, replace_nodata_with=1.0).to_numpy_rdd().first()[1]['data']

self.assertTrue((result == np.identity(4)).all())

if __name__ == "__main__":
unittest.main()

0 comments on commit 8b40067

Please sign in to comment.