Skip to content

Commit

Permalink
Merge pull request #144 from jamesmcclain/feature/lookup
Browse files Browse the repository at this point in the history
Tile Lookup
  • Loading branch information
jamesmcclain committed Apr 28, 2017
2 parents 8b40067 + e80acdf commit d29ecb7
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ object PythonTranslator {
(data, schema)
}

def toPython[T : AvroRecordCodec](tiles: Seq[T]): (java.util.ArrayList[Array[Byte]], String) = {
val array_list: java.util.ArrayList[Array[Byte]] = new java.util.ArrayList()
val schema = implicitly[AvroRecordCodec[T]].schema.toString

tiles
.map({ v => AvroEncoder.toBinary(v, deflate = false) })
.foreach({ ar => array_list.add(ar) })

(array_list, schema)
}

def fromPython[T: AvroRecordCodec: ClassTag]
(rdd: RDD[Array[Byte]], schemaJson: Option[String] = None): RDD[T] = {
val schema = schemaJson.map { json => (new Schema.Parser).parse(json) }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ import spray.json._
import spray.json.DefaultJsonProtocol._

import org.apache.spark._
import org.apache.spark.rdd._
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.rdd._
import org.apache.spark.SparkContext._

import scala.reflect._
import scala.collection.JavaConverters._
Expand Down Expand Up @@ -199,6 +200,14 @@ class SpatialTiledRasterRDD(
val rdd: RDD[(SpatialKey, MultibandTile)] with Metadata[TileLayerMetadata[SpatialKey]]
) extends TiledRasterRDD[SpatialKey] {

def lookup(
col: Int,
row: Int
): (java.util.ArrayList[Array[Byte]], String) = {
val tiles = rdd.lookup(SpatialKey(col, row))
PythonTranslator.toPython(tiles)
}

def reproject(
layout: Either[LayoutScheme, LayoutDefinition],
crs: CRS,
Expand Down
29 changes: 29 additions & 0 deletions geopyspark/geotrellis/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,35 @@ def reproject(self, target_crs, extent=None, layout=None, scheme=FLOAT, tile_siz

return TiledRasterRDD(self.geopysc, self.rdd_type, srdd)

def lookup(self, col, row):
"""Return the value(s) in the image of a particular SpatialKey (given by col and row)
Args:
col (int): The SpatialKey column
row (int): The SpatialKey row
Returns: An array of numpy arrays (the tiles)
"""
if self.rdd_type != SPATIAL:
raise ValueError("Only TiledRasterRDDs with a rdd_type of Spatial can use lookup()")
bounds = self.layer_metadata['bounds']
min_col = bounds['minKey']['col']
min_row = bounds['minKey']['row']
max_col = bounds['maxKey']['col']
max_row = bounds['maxKey']['row']

if col < min_col or col > max_col:
raise IndexError("column out of bounds")
if row < min_row or row > max_row:
raise IndexError("row out of bounds")

tup = self.srdd.lookup(col, row)
array_of_tiles = tup._1()
schema = tup._2()
ser = self.geopysc.create_value_serializer(schema, TILE)

return [ser.loads(tile)[0] for tile in array_of_tiles]

def tile_to_layout(self, layout, resample_method=NEARESTNEIGHBOR):
"""Cut tiles to layout and merge overlapping tiles. This will produce unique keys.
Expand Down
2 changes: 1 addition & 1 deletion geopyspark/tests/base_test_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

class BaseTestClass(unittest.TestCase):
if 'TRAVIS' in os.environ:
master_str = "local[1]"
master_str = "local[2]"
else:
master_str = "local[*]"
geopysc = GeoPyContext(master=master_str, appName="test")
Expand Down
71 changes: 71 additions & 0 deletions geopyspark/tests/lookup_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import unittest
import numpy as np

import pytest

from geopyspark.geotrellis.constants import ZOOM
from geopyspark.tests.base_test_class import BaseTestClass
from geopyspark.geotrellis.rdd import TiledRasterRDD
from geopyspark.geotrellis.constants import SPATIAL


class LookupTest(BaseTestClass):
data = np.array([[
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 0.0]]])

layer = [({'row': 0, 'col': 0}, {'no_data_value': -1.0, 'data': data + 0}),
({'row': 1, 'col': 0}, {'no_data_value': -1.0, 'data': data + 1}),
({'row': 0, 'col': 1}, {'no_data_value': -1.0, 'data': data + 2}),
({'row': 1, 'col': 1}, {'no_data_value': -1.0, 'data': data + 3})]
rdd = BaseTestClass.geopysc.pysc.parallelize(layer)

extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 33.0, 'ymax': 33.0}
layout = {'layoutCols': 2, 'layoutRows': 2, 'tileCols': 5, 'tileRows': 5}
metadata = {'cellType': 'float32ud-1.0',
'extent': extent,
'crs': '+proj=longlat +datum=WGS84 +no_defs ',
'bounds': {
'minKey': {'col': 0, 'row': 0},
'maxKey': {'col': 1, 'row': 1}},
'layoutDefinition': {
'extent': extent,
'tileLayout': {'tileCols': 5, 'tileRows': 5, 'layoutCols': 2, 'layoutRows': 2}}}

raster_rdd = TiledRasterRDD.from_numpy_rdd(BaseTestClass.geopysc, SPATIAL, rdd, metadata)

@pytest.fixture(autouse=True)
def tearDown(self):
yield
BaseTestClass.geopysc.pysc._gateway.close()

def test_lookup_1(self):
result = self.raster_rdd.lookup(0, 0)[0]
n = np.sum(result['data'])
self.assertEqual(n, 24 + 0*25)

def test_lookup_2(self):
result = self.raster_rdd.lookup(0, 1)[0]
n = np.sum(result['data'])
self.assertEqual(n, 24 + 1*25)

def test_lookup_3(self):
result = self.raster_rdd.lookup(1, 0)[0]
n = np.sum(result['data'])
self.assertEqual(n, 24 + 2*25)

def test_lookup_4(self):
result = self.raster_rdd.lookup(1, 1)[0]
n = np.sum(result['data'])
self.assertEqual(n, 24 + 3*25)

def test_lookup_5(self):
with pytest.raises(IndexError):
result = self.raster_rdd.lookup(13, 33)

if __name__ == "__main__":
unittest.main()

0 comments on commit d29ecb7

Please sign in to comment.