Merge pull request #140 from jpolchlo/fix/reclassify-nodata

Added flag to reclassify to allow NODATA to be remapped
locationtech-labs · Apr 28, 2017 · 8b40067 · 8b40067
2 parents 7bfda90 + 9643b3c
commit 8b40067
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 18 deletions.
diff --git a/geopyspark-backend/geotrellis/src/main/scala/geopyspark/geotrellis/RasterRDD.scala b/geopyspark-backend/geotrellis/src/main/scala/geopyspark/geotrellis/RasterRDD.scala
@@ -64,12 +64,13 @@ abstract class TileRDD[K: ClassTag] {
 
   def reclassify(
     intMap: java.util.Map[Int, Int],
-    boundaryType: String
+    boundaryType: String,
+    replaceNoDataWith: Int
   ): TileRDD[_] = {
     val scalaMap = intMap.asScala.toMap
 
     val boundary = getBoundary(boundaryType)
-    val mapStrategy = new MapStrategy(boundary, NODATA, NODATA, false)
+    val mapStrategy = new MapStrategy(boundary, replaceNoDataWith, NODATA, false)
     val breakMap = new BreakMap(scalaMap, mapStrategy, { i: Int => isNoData(i) })
 
     val reclassifiedRDD =
@@ -89,12 +90,13 @@ abstract class TileRDD[K: ClassTag] {
 
   def reclassifyDouble(
     doubleMap: java.util.Map[Double, Double],
-    boundaryType: String
+    boundaryType: String,
+    replaceNoDataWith: Double
   ): TileRDD[_] = {
     val scalaMap = doubleMap.asScala.toMap
 
     val boundary = getBoundary(boundaryType)
-    val mapStrategy = new MapStrategy(boundary, doubleNODATA, doubleNODATA, false)
+    val mapStrategy = new MapStrategy(boundary, replaceNoDataWith, doubleNODATA, false)
     val breakMap = new BreakMap(scalaMap, mapStrategy, { d: Double => isNoData(d) })
 
     val reclassifiedRDD =

diff --git a/geopyspark/geotrellis/rdd.py b/geopyspark/geotrellis/rdd.py
@@ -15,10 +15,11 @@
                                              FLOAT,
                                              TILE,
                                              SPATIAL,
-                                             LESSTHANOREQUALTO
+                                             LESSTHANOREQUALTO,
+                                             NODATAINT
                                             )
 
-def _reclassify(srdd, value_map, data_type, boundary_strategy):
+def _reclassify(srdd, value_map, data_type, boundary_strategy, replace_nodata_with):
     new_dict = {}
 
     for key, value in value_map.items():
@@ -30,9 +31,15 @@ def _reclassify(srdd, value_map, data_type, boundary_strategy):
             new_dict[key] = value
 
     if data_type is int:
-        return srdd.reclassify(new_dict, boundary_strategy)
+        if not replace_nodata_with:
+            return srdd.reclassify(new_dict, boundary_strategy, NODATAINT)
+        else:
+            return srdd.reclassify(new_dict, boundary_strategy, replace_nodata_with)
     else:
-        return srdd.reclassifyDouble(new_dict, boundary_strategy)
+        if not replace_nodata_with:
+            return srdd.reclassifyDouble(new_dict, boundary_strategy, float('nan'))
+        else:
+            return srdd.reclassifyDouble(new_dict, boundary_strategy, replace_nodata_with)
 
 
 class RasterRDD(object):
@@ -199,7 +206,7 @@ def tile_to_layout(self, layer_metadata, resample_method=NEARESTNEIGHBOR):
         srdd = self.srdd.tileToLayout(json.dumps(layer_metadata), resample_method)
         return TiledRasterRDD(self.geopysc, self.rdd_type, srdd)
 
-    def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
+    def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO, replace_nodata_with=None):
         """Changes the cell values of a raster based on how the data is broken up.
 
         Args:
@@ -209,18 +216,23 @@ def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
                 ``float``.
             boundary_strategy (str, optional): How the cells should be classified along the breaks.
                 If unspecified, then ``LESSTHANOREQUALTO`` will be used.
+            replace_nodata_with (data_type, optional): When remapping values, nodata values must be 
+                treated separately.  If nodata values are intended to be replaced during the 
+                reclassify, this variable should be set to the intended value.  If unspecified, 
+                nodata values will be preserved.
 
         NOTE:
-            Simbolizing a NoData value differs depending on if the ``data_type`` is an ``int`` or a
-            ``float``. For an ``int``, the constant ``NODATAINT`` can be used which represents the
-            NoData value for ``int`` in GeoTrellis. If ``float``, then ``float('nan')`` is used to
+            NoData symbolizes a different value depending on if ``data_type`` is ``int`` or
+            ``float``. For ``int``, the constant ``NODATAINT`` can be used which represents the
+            NoData value for ``int`` in GeoTrellis. For ``float``, ``float('nan')`` is used to
             represent NoData.
 
         Returns:
             :class:`~geopyspark.geotrellis.rdd.RasterRDD`
         """
 
-        srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy)
+        srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy, replace_nodata_with)
+
         return RasterRDD(self.geopysc, self.rdd_type, srdd)
 
 
@@ -517,7 +529,7 @@ def cost_distance(self, geometries, max_distance):
 
         return TiledRasterRDD(self.geopysc, self.rdd_type, srdd)
 
-    def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
+    def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO, replace_nodata_with=None):
         """Changes the cell values of a raster based on how the data is broken up.
 
         Args:
@@ -527,18 +539,23 @@ def reclassify(self, value_map, data_type, boundary_strategy=LESSTHANOREQUALTO):
                 ``float``.
             boundary_strategy (str, optional): How the cells should be classified along the breaks.
                 If unspecified, then ``LESSTHANOREQUALTO`` will be used.
+            replace_nodata_with (data_type, optional): When remapping values, nodata values must be 
+                treated separately.  If nodata values are intended to be replaced during the 
+                reclassify, this variable should be set to the intended value.  If unspecified, 
+                nodata values will be preserved.
 
         NOTE:
-            Simbolizing a NoData value differs depending on if the ``data_type`` is an ``int`` or a
-            ``float``. For an ``int``, the constant ``NODATAINT`` can be used which represents the
-            NoData value for ``int`` in GeoTrellis. If ``float``, then ``float('nan')`` is used to
+            NoData symbolizes a different value depending on if ``data_type`` is ``int`` or
+            ``float``. For ``int``, the constant ``NODATAINT`` can be used which represents the
+            NoData value for ``int`` in GeoTrellis. For ``float``, ``float('nan')`` is used to
             represent NoData.
 
         Returns:
             :class:`~geopyspark.geotrellis.rdd.TiledRasterRDD`
         """
 
-        srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy)
+        srdd = _reclassify(self.srdd, value_map, data_type, boundary_strategy, replace_nodata_with)
+
         return TiledRasterRDD(self.geopysc, self.rdd_type, srdd)
 
     def _process_operation(self, value, operation):

diff --git a/geopyspark/tests/reclassify_test.py b/geopyspark/tests/reclassify_test.py
@@ -1,3 +1,4 @@
+import os
 import sys
 import math
 import numpy as np
@@ -148,6 +149,37 @@ def test_no_data_floats(self):
         for x in list(result.flatten()):
             self.assertTrue(math.isnan(x))
 
+    @pytest.mark.skipif('TRAVIS' in os.environ,
+                         reason="Encoding using methods in Main causes issues on Travis")
+    def test_ignore_no_data_ints(self):
+        arr = np.ones((1, 16, 16), int)
+        np.fill_diagonal(arr[0], NODATAINT)
+        tile = {'data': arr, 'no_data_value': NODATAINT}
+
+        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)])
+        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd)
+
+        value_map = {1: 0}
+
+        result = raster_rdd.reclassify(value_map, int, replace_nodata_with=1).to_numpy_rdd().first()[1]['data']
+
+        self.assertTrue((result == np.identity(16, int)).all())
+
+    @pytest.mark.skipif('TRAVIS' in os.environ,
+                         reason="Encoding using methods in Main causes issues on Travis")
+    def test_ignore_no_data_floats(self):
+        arr = np.ones((1, 4, 4))
+        np.fill_diagonal(arr[0], float('nan'))
+        tile = {'data': arr, 'no_data_value': float('nan')}
+
+        rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)])
+        raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd)
+
+        value_map = {1.0: 0.0}
+
+        result = raster_rdd.reclassify(value_map, float, replace_nodata_with=1.0).to_numpy_rdd().first()[1]['data']
+
+        self.assertTrue((result == np.identity(4)).all())
 
 if __name__ == "__main__":
     unittest.main()