Skip to content

Commit

Permalink
Merge pull request #89 from jamesmcclain/feature/cost-distance
Browse files Browse the repository at this point in the history
Add Support For Cost-Distance and Stitch
  • Loading branch information
jamesmcclain committed Apr 6, 2017
2 parents 5dc51d4 + 984256c commit bc4d5ec
Show file tree
Hide file tree
Showing 7 changed files with 463 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package geopyspark.geotrellis.spark.costdistance

import geopyspark.geotrellis._

import geotrellis.raster._
import geotrellis.spark._
import geotrellis.spark.io._
import geotrellis.spark.io.avro._
import geotrellis.vector._
import geotrellis.vector.io.wkt.WKT
import geotrellis.spark.costdistance.IterativeCostDistance

import spray.json._

import org.apache.spark.api.java.JavaRDD
import org.apache.spark.SparkContext

import scala.collection.JavaConverters._
import scala.reflect.ClassTag


object CostDistanceWrapper {

implicit def convertion(k: SpaceTimeKey): SpatialKey =
k.spatialKey

private def costdistance[K: (? => SpatialKey): ClassTag: AvroRecordCodec: JsonFormat](
javaRdd: JavaRDD[Array[Byte]],
schema: String,
metadata: TileLayerMetadata[K],
geometries: Seq[Geometry],
maxDistance: Double,
_sc: SparkContext
): (JavaRDD[Array[Byte]], String) = {
implicit val sc = _sc
val _rdd = PythonTranslator.fromPython[(K, MultibandTile)](javaRdd, Some(schema))
val rdd = ContextRDD(
_rdd.map({ case (k, v) => (k, v.band(0)) }),
metadata
)

PythonTranslator.toPython(IterativeCostDistance(rdd, geometries, maxDistance))
}

def costdistance(
keyType: String,
javaRdd: JavaRDD[Array[Byte]],
schema: String,
metadataStr: String,
wkts: java.util.ArrayList[String],
maxDistance: java.lang.Double,
sc: SparkContext
): (JavaRDD[Array[Byte]], String) = {
val geometries = wkts.asScala.map({ wkt => WKT.read(wkt) })

keyType match {
case "SpatialKey" => {
val metadataAST = metadataStr.parseJson
val metadata = metadataAST.convertTo[TileLayerMetadata[SpatialKey]]
costdistance[SpatialKey](javaRdd, schema, metadata, geometries, maxDistance, sc)
}
case "SpaceTimeKey" => {
val metadataAST = metadataStr.parseJson
val metadata = metadataAST.convertTo[TileLayerMetadata[SpaceTimeKey]]
costdistance[SpaceTimeKey](javaRdd, schema, metadata, geometries, maxDistance, sc)
}
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package geopyspark.geotrellis.spark.stitch

import geopyspark.geotrellis._

import geotrellis.raster._
import geotrellis.spark._
import geotrellis.spark.io._
import geotrellis.spark.io.avro._
import geotrellis.spark.stitch.TileLayoutStitcher

import spray.json._

import org.apache.spark.api.java.JavaRDD
import org.apache.spark.SparkContext

import scala.collection.JavaConverters._
import scala.reflect.ClassTag


object StitchWrapper {

def stitch(
javaRdd: JavaRDD[Array[Byte]],
schema: String,
metadataStr: String
): (Array[Byte], String) = {
val metadataAST = metadataStr.parseJson
val metadata = metadataAST.convertTo[TileLayerMetadata[SpatialKey]]
val rdd = ContextRDD(
PythonTranslator
.fromPython[(SpatialKey, MultibandTile)](javaRdd, Some(schema))
.map({ case (k, v) => (k, v.band(0)) }),
metadata
)
val tile: Tile = rdd.stitch.tile

PythonTranslator.toPython[Tile](tile)
}

}
8 changes: 8 additions & 0 deletions geopyspark/geopycontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ def _rdd_reprojector(self):
def rdd_focal(self):
return self._jvm.geopyspark.geotrellis.spark.focal.FocalWrapper

@property
def rdd_stitch(self):
return self._jvm.geopyspark.geotrellis.spark.stitch.StitchWrapper

@property
def rdd_costdistance(self):
return self._jvm.geopyspark.geotrellis.spark.costdistance.CostDistanceWrapper

@staticmethod
def map_key_input(key_type, is_boundable):
if is_boundable:
Expand Down
1 change: 1 addition & 0 deletions geopyspark/geopyspark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def setup_environment():
os.environ["PYSPARK_DRIVER_PYTHON"] = "python3"
os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars {} \
--conf spark.ui.enabled=false \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--driver-memory 8G \
--executor-memory 8G \
pyspark-shell".format(jar_string)
222 changes: 221 additions & 1 deletion geopyspark/geotrellis/tile_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
area information of the layer.
"""
import json
import shapely.wkt

from geopyspark.avroserializer import AvroSerializer
from geopyspark.geotrellis.constants import NEARESTNEIGHBOR, TILE, SPATIAL
Expand Down Expand Up @@ -978,7 +979,7 @@ def focal(geopysc,
neighborhood,
param1=0.0, param2=0.0, param3=0.0):

"""Reprojects the tiles within a RDD to a new projection.
"""Performs the given focal operation on the layer contained in the RDD.
The returned RDD is the result of applying the given focal
operation to the input RDD.
Expand Down Expand Up @@ -1109,3 +1110,222 @@ def focal(geopysc,
returned_rdd = geopysc.create_python_rdd(rdd, ser)

return returned_rdd

def costdistance(geopysc,
rdd_type,
keyed_rdd,
metadata,
geometries,
max_distance):

"""Perform cost distance with the given (friction) layer and the given input geometries.
The returned RDD contains the cost-distance layer RDD.
Args:
geopysc (GeoPyContext): The GeoPyContext being used this session.
rdd_type (str): What the spatial type of the geotiffs are. This is
represented by the constants: SPATIAL and SPACETIME. Note: All of the
GeoTiffs must have the same spatial type.
keyed_rdd (RDD): A RDD that contains tuples of dictionaries, (key, tile).
key (dict): The index of the tile within the layer. There are two different types
of keys, SpatialKeys and SpaceTimeKeys. SpatialKeys deal with data that have just
a spatial component, whereas SpaceTimeKeys are for data with both a spatial and
time component.
Both SpatialKeys and SpaceTimeKeys share these fields:
col (int): The column number of the grid, runs east to west.
row (int): The row number of the grid, runs north to south.
SpaceTimeKeys also have an additional field:
instant (int): The time stamp of the tile.
tile (dict): The data of the tile.
The fields to represent the tile:
data (np.ndarray): The tile data itself is represented as a 3D, numpy array.
Note, even if the data was originally singleband, it will be reformatted as
a multiband tile and read and saved as such.
no_data_value (optional): The no data value of the tile. Can be a range of
types including None.
metadata (dict): The metadata for this tile layer. This provides
the information needed to resample the old tiles and create new ones.
The fields that are used to represent the metadata:
cellType (str): The value type of every cell within the rasters.
layoutDefinition (dict): Defines the raster layout of the rasters.
The fields that are used to represent the layoutDefinition:
extent (dict): The area covered by the layout tiles.
tileLayout (dict): The tile layout of the rasters.
extent (dict): The extent that covers the tiles.
crs (str): The CRS that the rasters are projected in.
bounds (dict): Represents the positions of the tile layer tiles within a gird.
The fields that are used to represent the bounds:
minKey (dict): Represents where the tile layer begins in the gird.
maxKey (dict): Represents where the tile layer ends in the gird.
The fields that are used to represent the minKey and maxKey:
col (int): The column number of the grid, runs east to west.
row (int): The row number of the grid, runs north to south.
geometries (list): A list of shapely geometries to use as starting points (must be in the same CRS as the friction layer).
max_distance (float): The maximum cost that a path may reach before pruning.
Returns:
tuple: A tuple containing (rdd, metadata).
rdd (RDD): A layer containing the result of the focal operation.
key (dict): The index of the tile within the layer. There are two different
types of keys, SpatialKeys and SpaceTimeKeys. SpatialKeys deal with data that
have just a spatial component, whereas SpaceTimeKeys are for data with both a
spatial and time component.
Both SpatialKeys and SpaceTimeKeys share these fields:
col (int): The column number of the grid, runs east to west.
row (int): The row number of the grid, runs north to south.
SpaceTimeKeys also have an additional field:
instant (int): The time stamp of the tile.
tile (dict): The data of the tile.
The fields to represent the tile:
data (np.ndarray): The tile data itself is represented as a 3D, numpy
array. Note, even if the data was originally singleband, it will
be reformatted as a multiband tile and read and saved as such.
no_data_value (optional): The no data value of the tile. Can be a range of
types including None.
metadata (dict): The metadata for the RDD.
dict: The dictionary representation of the RDD's metadata.
The fields that are used to represent the metadata:
cellType (str): The value type of every cell within the rasters.
layoutDefinition (dict): Defines the raster layout of the rasters.
The fields that are used to represent the layoutDefinition:
extent (dict): The area covered by the layout tiles.
tileLayout (dict): The tile layout of the rasters.
extent (dict): The extent that covers the tiles.
crs (str): The CRS that the rasters are projected in.
bounds (dict): Represents the positions of the tile layer's tiles within
a gird. These positions are represented by keys. There are two
different types of keys, SpatialKeys and SpaceTimeKeys. SpatialKeys are
for data that only have a spatial component while SpaceTimeKeys are for
data with both spatial and temporal components.
Both SpatialKeys and SpaceTimeKeys share these fields:
The fields that are used to represent the bounds:
minKey (dict): Represents where the tile layer begins in the
gird.
maxKey (dict): Represents where the tile layer ends in the gird.
The fields that are used to represent the minKey and maxKey:
col (int): The column number of the grid, runs east to west.
row (int): The row number of the grid, runs north to south.
SpaceTimeKeys also have an additional field:
instant (int): The time stamp of the tile.
"""

costdistance_wrapper = geopysc.rdd_costdistance
key_type = geopysc.map_key_input(rdd_type, True)

(java_rdd, schema1) = _convert_to_java_rdd(geopysc, key_type, keyed_rdd)

wkts = [shapely.wkt.dumps(g) for g in geometries]

result = costdistance_wrapper.costdistance(key_type,
java_rdd,
schema1,
json.dumps(metadata),
wkts, max_distance,
geopysc.pysc._jsc.sc())

rdd = result._1()
schema2 = result._2()
ser = geopysc.create_tuple_serializer(schema2, value_type=TILE)
returned_rdd = geopysc.create_python_rdd(rdd, ser)

return returned_rdd

def stitch(geopysc,
rdd_type,
keyed_rdd,
metadata):

""" Stitch the tiles contained in the RDD into one tile.
Returns a tile.
Args:
geopysc (GeoPyContext): The GeoPyContext being used this session.
rdd_type (str): What the spatial type of the geotiffs are. This is
represented by the constants: SPATIAL and SPACETIME. Note: All of the
GeoTiffs must have the same spatial type.
keyed_rdd (RDD): A RDD that contains tuples of dictionaries, (key, tile).
key (dict): The index of the tile within the layer. There are two different types
of keys, SpatialKeys and SpaceTimeKeys. SpatialKeys deal with data that have just
a spatial component, whereas SpaceTimeKeys are for data with both a spatial and
time component.
Both SpatialKeys and SpaceTimeKeys share these fields:
col (int): The column number of the grid, runs east to west.
row (int): The row number of the grid, runs north to south.
SpaceTimeKeys also have an additional field:
instant (int): The time stamp of the tile.
tile (dict): The data of the tile.
The fields to represent the tile:
data (np.ndarray): The tile data itself is represented as a 3D, numpy array.
Note, even if the data was originally singleband, it will be reformatted as
a multiband tile and read and saved as such.
no_data_value (optional): The no data value of the tile. Can be a range of
types including None.
metadata (dict): The metadata for this tile layer. This provides
the information needed to resample the old tiles and create new ones.
The fields that are used to represent the metadata:
cellType (str): The value type of every cell within the rasters.
layoutDefinition (dict): Defines the raster layout of the rasters.
The fields that are used to represent the layoutDefinition:
extent (dict): The area covered by the layout tiles.
tileLayout (dict): The tile layout of the rasters.
extent (dict): The extent that covers the tiles.
crs (str): The CRS that the rasters are projected in.
bounds (dict): Represents the positions of the tile layer tiles within a gird.
The fields that are used to represent the bounds:
minKey (dict): Represents where the tile layer begins in the gird.
maxKey (dict): Represents where the tile layer ends in the gird.
The fields that are used to represent the minKey and maxKey:
col (int): The column number of the grid, runs east to west.
row (int): The row number of the grid, runs north to south.
Returns:
tile (dict): The data of the tile.
The fields to represent the tile:
data (np.ndarray): The tile data itself is represented as a 3D, numpy
array. Note, even if the data was originally singleband, it will
be reformatted as a multiband tile and read and saved as such.
no_data_value (optional): The no data value of the tile. Can be a range of
types including None.
"""

stitch_wrapper = geopysc.rdd_stitch
key_type = geopysc.map_key_input(rdd_type, True)
assert(key_type == 'SpatialKey')

(java_rdd, schema1) = _convert_to_java_rdd(geopysc, key_type, keyed_rdd)

result = stitch_wrapper.stitch(java_rdd,
schema1,
json.dumps(metadata))

tile = result._1()
schema2 = result._2()
ser = geopysc.create_value_serializer(schema2, TILE)

return ser.loads(tile)

0 comments on commit bc4d5ec

Please sign in to comment.