Skip to content

Commit

Permalink
Merge pull request #166 from jbouffard/type-conversion
Browse files Browse the repository at this point in the history
Convert Raster Type
  • Loading branch information
lossyrob committed May 15, 2017
2 parents cc012b3 + 75390fd commit 2370c60
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ abstract class TileRDD[K: ClassTag] {
protected def reclassifyDouble(reclassifiedRDD: RDD[(K, MultibandTile)]): TileRDD[_]
}


/**
* RDD of Rasters, untiled and unsorted
*/
Expand Down Expand Up @@ -156,13 +157,14 @@ abstract class RasterRDD[K: AvroRecordCodec: ClassTag] extends TileRDD[K] {
collectMetadata(layoutScheme, TileRDD.getCRS(crs))
}

protected def collectMetadata(layout: Either[LayoutScheme, LayoutDefinition], crs: Option[CRS]): String
def convertDataType(newType: String): RasterRDD[_] =
withRDD(rdd.map { x => (x._1, x._2.convert(CellType.fromName(newType))) })

protected def collectMetadata(layout: Either[LayoutScheme, LayoutDefinition], crs: Option[CRS]): String
protected def cutTiles(layerMetadata: String, resampleMethod: String): TiledRasterRDD[_]

protected def tileToLayout(tileLayerMetadata: String, resampleMethod: String): TiledRasterRDD[_]

protected def reproject(target_crs: String, resampleMethod: String): RasterRDD[_]
protected def withRDD(result: RDD[(K, MultibandTile)]): RasterRDD[_]
}

class ProjectedRasterRDD(val rdd: RDD[(ProjectedExtent, MultibandTile)]) extends RasterRDD[ProjectedExtent] {
Expand Down Expand Up @@ -203,6 +205,9 @@ class ProjectedRasterRDD(val rdd: RDD[(ProjectedExtent, MultibandTile)]) extends

def reclassifyDouble(reclassifiedRDD: RDD[(ProjectedExtent, MultibandTile)]): RasterRDD[ProjectedExtent] =
ProjectedRasterRDD(reclassifiedRDD)

def withRDD(result: RDD[(ProjectedExtent, MultibandTile)]): RasterRDD[ProjectedExtent] =
ProjectedRasterRDD(result)
}


Expand Down Expand Up @@ -245,6 +250,9 @@ class TemporalRasterRDD(val rdd: RDD[(TemporalProjectedExtent, MultibandTile)])

def reclassifyDouble(reclassifiedRDD: RDD[(TemporalProjectedExtent, MultibandTile)]): RasterRDD[TemporalProjectedExtent] =
TemporalRasterRDD(reclassifiedRDD)

def withRDD(result: RDD[(TemporalProjectedExtent, MultibandTile)]): RasterRDD[TemporalProjectedExtent] =
TemporalRasterRDD(result)
}

object ProjectedRasterRDD {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ abstract class TiledRasterRDD[K: SpatialComponent: AvroRecordCodec: JsonFormat:
}
})

def convertDataType(newType: String): TiledRasterRDD[_] =
withRDD(rdd.convert(CellType.fromName(newType)))

protected def withRDD(result: RDD[(K, MultibandTile)]): TiledRasterRDD[_]
}

Expand Down Expand Up @@ -358,6 +361,12 @@ class SpatialTiledRasterRDD(

def withRDD(result: RDD[(SpatialKey, MultibandTile)]): TiledRasterRDD[SpatialKey] =
SpatialTiledRasterRDD(zoomLevel, MultibandTileLayerRDD(result, rdd.metadata))

def toInt(converted: RDD[(SpatialKey, MultibandTile)]): TiledRasterRDD[SpatialKey] =
SpatialTiledRasterRDD(zoomLevel, MultibandTileLayerRDD(converted, rdd.metadata))

def toDouble(converted: RDD[(SpatialKey, MultibandTile)]): TiledRasterRDD[SpatialKey] =
SpatialTiledRasterRDD(zoomLevel, MultibandTileLayerRDD(converted, rdd.metadata))
}


Expand Down Expand Up @@ -500,6 +509,12 @@ class TemporalTiledRasterRDD(

def withRDD(result: RDD[(SpaceTimeKey, MultibandTile)]): TiledRasterRDD[SpaceTimeKey] =
TemporalTiledRasterRDD(zoomLevel, MultibandTileLayerRDD(result, rdd.metadata))

def toInt(converted: RDD[(SpaceTimeKey, MultibandTile)]): TiledRasterRDD[SpaceTimeKey] =
TemporalTiledRasterRDD(zoomLevel, MultibandTileLayerRDD(converted, rdd.metadata))

def toDouble(converted: RDD[(SpaceTimeKey, MultibandTile)]): TiledRasterRDD[SpaceTimeKey] =
TemporalTiledRasterRDD(zoomLevel, MultibandTileLayerRDD(converted, rdd.metadata))
}


Expand Down
12 changes: 9 additions & 3 deletions geopyspark/avroregistry.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Contains the various encoding/decoding methods to bring values to/from python from scala."""
import array
from bitstring import BitArray
from functools import partial
import numpy as np

Expand All @@ -13,11 +14,16 @@ class AvroRegistry(object):
def _tile_decoder(schema_dict):
cells = schema_dict['cells']

if isinstance(cells, bytes):
# cols and rows are opposite for GeoTrellis ArrayTiles and Numpy Arrays
cols = schema_dict['rows']
rows = schema_dict['cols']

if isinstance(cells, bytes) and cols * rows == len(cells):
cells = bytearray(cells)
elif isinstance(cells, bytes) and cols * rows != len(cells):
cells = bytearray(BitArray(cells))

# cols and rows are opposite for GeoTrellis ArrayTiles and Numpy Arrays
arr = np.array(cells).reshape(schema_dict['rows'], schema_dict['cols'])
arr = np.array(cells).reshape(cols, rows)

return arr

Expand Down
97 changes: 97 additions & 0 deletions geopyspark/geotrellis/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,100 @@

"""A classification strategy."""
EXACT = "Exact"


"""Representes Bit Cells."""
BOOLRAW = "boolraw"

"""Representes Byte Cells."""
INT8RAW = "int8raw"

"""Representes UByte Cells."""
UINT8RAW = "uint8raw"

"""Representes Short Cells."""
INT16RAW = "int16raw"

"""Representes UShort Cells."""
UINT16RAW = "uint16raw"

"""Representes Int Cells."""
INT32RAW = "int32raw"

"""Representes Float Cells."""
FLOAT32RAW = "float32raw"

"""Representes Double Cells."""
FLOAT64RAW = "float64raw"

"""Representes Bit Cells."""
BOOL = "bool"

"""Representes Byte Cells with constant NoData values."""
INT8 = "int8"

"""Representes UByte Cells with constant NoData values."""
UINT8 = "uint8"

"""Representes Short Cells with constant NoData values."""
INT16 = "int16"

"""Representes UShort Cells with constant NoData values."""
UINT16 = "uint16"

"""Representes Int Cells with constant NoData values."""
INT32 = "int32"

"""Representes Float Cells with constant NoData values."""
FLOAT32 = "float32"

"""Representes Double Cells with constant NoData values."""
FLOAT64 = "float64"

"""Representes Byte Cells with user defined NoData values."""
INT8UD = "int8ud"

"""Representes UByte Cells with user defined NoData values."""
UINT8UD = "uint8ud"

"""Representes Short Cells with user defined NoData values."""
INT16UD = "int16ud"

"""Representes UShort Cells with user defined NoData values."""
UINT16UD = "uint16ud"

"""Representes Int Cells with user defined NoData values."""
INT32UD = "int32ud"

"""Representes Float Cells with user defined NoData values."""
FLOAT32UD = "float32ud"

"""Representes Double Cells with user defined NoData values."""
FLOAT64UD = "float64ud"


CELL_TYPES = [
BOOLRAW,
INT8RAW,
UINT8RAW,
INT16RAW,
UINT16RAW,
INT32RAW,
FLOAT32RAW,
FLOAT64RAW,
BOOL,
INT8,
UINT8,
INT16,
UINT16,
INT32,
FLOAT32,
FLOAT64,
INT8UD,
UINT8UD,
INT16UD,
UINT16UD,
INT32UD,
FLOAT32UD,
FLOAT64UD
]
33 changes: 32 additions & 1 deletion geopyspark/geotrellis/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
TILE,
SPATIAL,
LESSTHANOREQUALTO,
NODATAINT
NODATAINT,
CELL_TYPES
)
from geopyspark.geotrellis.neighborhoods import Neighborhood

Expand Down Expand Up @@ -151,6 +152,25 @@ def to_tiled_layer(self, extent=None, layout=None, crs=None, tile_size=256,
return self.tile_to_layout(self.collect_metadata(extent, layout, crs, tile_size),
resample_method)

def convert_data_type(self, new_type):
"""Converts the underlying, raster values to a new ``CellType``.
Args:
new_type (str): The string representation of the ``CellType`` to convert to. It is
represented by a constant such as ``INT16``, ``FLOAT64UD``, etc.
Returns:
:class:`~geopyspark.geotrellis.rdd.RasterRDD`
Raises:
ValueError: When an unsupported cell type is entered.
"""

if new_type not in CELL_TYPES:
raise ValueError(new_type, "Is not a know Cell Type")

return RasterRDD(self.geopysc, self.rdd_type, self.srdd.convertDataType(new_type))

def collect_metadata(self, extent=None, layout=None, crs=None, tile_size=256):
"""Iterate over RDD records and generates layer metadata desribing the contained rasters.
Expand Down Expand Up @@ -427,6 +447,17 @@ def to_numpy_rdd(self):
ser = self.geopysc.create_tuple_serializer(result._2(), key_type="Projected", value_type=TILE)
return self.geopysc.create_python_rdd(result._1(), ser)

def convert_data_type(self, new_type):
"""Converts the underlying, raster values to a new ``CellType``.
Args:
new_type (str): The string representation of the ``CellType`` to convert to. It is
represented by a constant such as ``INT16``, ``FLOAT64UD``, etc.
Returns:
:class:`~geopyspark.geotrellis.rdd.TiledRasterRDD`
"""
return RasterRDD(self.geopysc, self.rdd_type, self.srdd.convertDataType(new_type))

def reproject(self, target_crs, extent=None, layout=None, scheme=FLOAT, tile_size=256,
resolution_threshold=0.1, resample_method=NEARESTNEIGHBOR):
"""Reproject RDD as tiled raster layer, samples surrounding tiles.
Expand Down
27 changes: 26 additions & 1 deletion geopyspark/tests/geotiff_raster_rdd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from os import walk, path
import rasterio
import pytest
import numpy as np

from geopyspark.geotrellis.constants import SPATIAL
from geopyspark.geotrellis.constants import SPATIAL, INT32, BOOLRAW
from geopyspark.tests.python_test_utils import geotiff_test_path
from geopyspark.geotrellis.geotiff_rdd import get
from geopyspark.geotrellis.rdd import RasterRDD
from geopyspark.tests.base_test_class import BaseTestClass


Expand Down Expand Up @@ -82,6 +84,29 @@ def test_to_tiled_raster(self):

self.assertDictEqual(tiled.layer_metadata, converted.layer_metadata)

def test_to_int(self):
arr = np.array([[0.4324323432124, 0.0, 0.0],
[1.0, 1.0, 1.0]], dtype=float)

epsg_code = 3857
extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 10.0, 'ymax': 10.0}
projected_extent = {'extent': extent, 'epsg': epsg_code}

tile = {'data': arr, 'no_data_value': float('nan')}
rdd = BaseTestClass.geopysc.pysc.parallelize([(self.projected_extent, tile)])
raster_rdd = RasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd)

converted = raster_rdd.convert_data_type(INT32)
arr = converted.to_numpy_rdd().first()[1]['data']

self.assertEqual(arr.dtype, np.int64)

def test_to_boolraw(self):
converted = self.result.convert_data_type(BOOLRAW)
arr = converted.to_numpy_rdd().first()[1]['data']

self.assertEqual(arr.dtype, np.uint8)


if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ fastavro>=0.13.0
shapely>=1.6b3
rasterio>=1.0a7
setuptools
bitstring>=3.1.5
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
install_requires=[
'fastavro>=0.13.0',
'numpy>=1.8',
'shapely>=1.6b3'
'shapely>=1.6b3',
'bitstring>=3.1.5'
],
packages=[
'geopyspark',
Expand Down

0 comments on commit 2370c60

Please sign in to comment.