diff --git a/docs/tutorials/kernel-density.md b/docs/tutorials/kernel-density.md index b4fcf48994..e3d41750d1 100644 --- a/docs/tutorials/kernel-density.md +++ b/docs/tutorials/kernel-density.md @@ -24,22 +24,22 @@ Features, in general, combine a geometry with an attribute value in a type-safe way. ```scala - import geotrellis.vector._ - import scala.util._ - - val extent = Extent(-109, 37, -102, 41) // Extent of Colorado - - def randomPointFeature(extent: Extent): PointFeature[Double] = { - def randInRange (low: Double, high: Double): Double = { - val x = Random.nextDouble - low * (1-x) + high * x - } - Feature(Point(randInRange(extent.xmin, extent.xmax), // the geometry - randInRange(extent.ymin, extent.ymax)), - Random.nextInt % 16 + 16) // the weight (attribute) +import geotrellis.vector._ +import scala.util._ + +val extent = Extent(-109, 37, -102, 41) // Extent of Colorado + +def randomPointFeature(extent: Extent): PointFeature[Double] = { + def randInRange (low: Double, high: Double): Double = { + val x = Random.nextDouble + low * (1-x) + high * x } + Feature(Point(randInRange(extent.xmin, extent.xmax), // the geometry + randInRange(extent.ymin, extent.ymax)), + Random.nextInt % 16 + 16) // the weight (attribute) +} - val pts = (for (i <- 1 to 1000) yield randomPointFeature(extent)).toList +val pts = (for (i <- 1 to 1000) yield randomPointFeature(extent)).toList ``` The choice of extent is largely arbitrary in this example, but note that the @@ -55,14 +55,14 @@ will generate the desired CRS. Next, we will create a tile containing the kernel density estimate: ```scala - import geotrellis.raster.mapalgebra.focal.Kernel +import geotrellis.raster.mapalgebra.focal.Kernel - val kernelWidth: Int = 9 +val kernelWidth: Int = 9 - /* Gaussian kernel with std. deviation 1.5, amplitude 25 */ - val kern: Kernel = Kernel.gaussian(kernelWidth, 1.5, 25) +/* Gaussian kernel with std. deviation 1.5, amplitude 25 */ +val kern: Kernel = Kernel.gaussian(kernelWidth, 1.5, 25) - val kde: Tile = pts.kernelDensity(kern, RasterExtent(extent, 700, 400)) +val kde: Tile = pts.kernelDensity(kern, RasterExtent(extent, 700, 400)) ``` This populates a 700x400 tile with the desired kernel density estimate. In @@ -72,14 +72,14 @@ to PNG or TIFF. In the following snippet, a PNG is created in the directory are mapped to colors that smoothly interpolate from blue to yellow to red. ```scala - import geotrellis.raster.render._ +import geotrellis.raster.render._ - val colorMap = ColorMap( - (0 to kde.findMinMax._2 by 4).toArray, - ColorRamps.HeatmapBlueToYellowToRedSpectrum - ) +val colorMap = ColorMap( + (0 to kde.findMinMax._2 by 4).toArray, + ColorRamps.HeatmapBlueToYellowToRedSpectrum +) - kde.renderPng(colorMap).write("test.png") +kde.renderPng(colorMap).write("test.png") ``` The advantage of using a TIFF output is that it will be tagged with the @@ -88,9 +88,9 @@ viewing program such as QGIS. That output is generated by the following statements. ```scala - import geotrellis.raster.io.geotiff._ +import geotrellis.raster.io.geotiff._ - GeoTiff(kde, extent, LatLng).write("test.tif") +GeoTiff(kde, extent, LatLng).write("test.tif") ``` Subdividing Tiles @@ -109,7 +109,7 @@ in space, but we must now specify how that extent is broken up into a grid of `Tile`s. This requires a statement of the form: ```scala - val tl = TileLayout(7, 4, 100, 100) +val tl = TileLayout(7, 4, 100, 100) ``` Here, we have specified a 7x4 grid of Tiles, each of which has 100x100 @@ -118,7 +118,7 @@ Tile (`kde`) into 28 uniformly-sized subtiles. The TileLayout is then combined with the extent in a `LayoutDefinition` object: ```scala - val ld = LayoutDefinition(extent, tl) +val ld = LayoutDefinition(extent, tl) ``` In preparation for reimplementing the previous kernel density estimation @@ -135,18 +135,18 @@ By incorporating all these ideas, we can create the following function to generate the extent of the kernel centered at a given point: ```scala - import geotrellis.spark.tiling._ +import geotrellis.spark.tiling._ - def pointFeatureToExtent[D](krnwdth: Double, ld: LayoutDefinition, ptf: PointFeature[D]): Extent = { - val p = ptf.geom +def pointFeatureToExtent[D](kwidth: Double, ld: LayoutDefinition, ptf: PointFeature[D]): Extent = { + val p = ptf.geom - Extent(p.x - kernelWidth * ld.cellwidth / 2, - p.y - kernelWidth * ld.cellheight / 2, - p.x + kernelWidth * ld.cellwidth / 2, - p.y + kernelWidth * ld.cellheight / 2) - } + Extent(p.x - kwidth * ld.cellwidth / 2, + p.y - kwidth * ld.cellheight / 2, + p.x + kwidth * ld.cellwidth / 2, + p.y + kwidth * ld.cellheight / 2) +} - def ptfToExtent[D](p: PointFeature[D]) = pointFeatureToExtent(9, ld, p) +def ptfToExtent[D](p: PointFeature[D]) = pointFeatureToExtent(9, ld, p) ``` When we consider the kernel extent of a point in the context of a @@ -159,17 +159,17 @@ in a given layout are indexed by `SpatialKey`s, which are effectively `(Int,Int)` pairs giving the (column,row) of each tile as follows: ``` - +-------+-------+-------+ +-------+ - | (0,0) | (1,0) | (2,0) | . . . | (6,0) | - +-------+-------+-------+ +-------+ - | (0,1) | (1,1) | (2,1) | . . . | (6,1) | - +-------+-------+-------+ +-------+ - . . . . . - . . . . . - . . . . . - +-------+-------+-------+ +-------+ - | (0,3) | (1,3) | (2,3) | . . . | (6,3) | - +-------+-------+-------+ +-------+ ++-------+-------+-------+ +-------+ +| (0,0) | (1,0) | (2,0) | . . . | (6,0) | ++-------+-------+-------+ +-------+ +| (0,1) | (1,1) | (2,1) | . . . | (6,1) | ++-------+-------+-------+ +-------+ + . . . . . + . . . . . + . . . . . ++-------+-------+-------+ +-------+ +| (0,3) | (1,3) | (2,3) | . . . | (6,3) | ++-------+-------+-------+ +-------+ ``` Specifically, in our running example, @@ -186,34 +186,34 @@ on each subtile, as indexed by their SpatialKeys. The following snippet accomplishes that. ```scala - def ptfToSpatialKey[D](ptf: PointFeature[D]): Seq[(SpatialKey,PointFeature[D])] = { - val ptextent = ptfToExtent(ptf) - val gridBounds = ld.mapTransform(ptextent) - - for { - (c, r) <- gridBounds.coords - if r < tl.totalRows - if c < tl.totalCols - } yield (SpatialKey(c,r), ptf) - } - - val keyfeatures: Map[SpatialKey, List[PointFeature[Double]]] = - pts - .flatMap(ptfToSpatialKey) - .groupBy(_._1) - .map { case (sk, v) => (sk, v.unzip._2) } +def ptfToSpatialKey[D](ptf: PointFeature[D]): Seq[(SpatialKey,PointFeature[D])] = { + val ptextent = ptfToExtent(ptf) + val gridBounds = ld.mapTransform(ptextent) + + for { + (c, r) <- gridBounds.coords + if r < tl.totalRows + if c < tl.totalCols + } yield (SpatialKey(c,r), ptf) +} + +val keyfeatures: Map[SpatialKey, List[PointFeature[Double]]] = + pts + .flatMap(ptfToSpatialKey) + .groupBy(_._1) + .map { case (sk, v) => (sk, v.unzip._2) } ``` Now, all the subtiles may be generated in the same fashion as the monolithic tile case above. ```scala - val keytiles = keyfeatures.map { case (sk, pfs) => - (sk, pfs.kernelDensity( - kern, - RasterExtent(ld.mapTransform(sk), tl.tileDimensions._1, tl.tileDimensions._2) - )) - } +val keytiles = keyfeatures.map { case (sk, pfs) => + (sk, pfs.kernelDensity( + kern, + RasterExtent(ld.mapTransform(sk), tl.tileDimensions._1, tl.tileDimensions._2) + )) +} ``` Finally, it is necessary to combine the results. Note that, in order to @@ -223,18 +223,18 @@ the full range. This is only necessary if it is important to generate a tile that spans the full extent. ```scala - import geotrellis.spark.stitch.TileLayoutStitcher - - val tileList = - for { - r <- 0 until ld.layoutRows - c <- 0 until ld.layoutCols - } yield { - val k = SpatialKey(c,r) - (k, keytiles.getOrElse(k, IntArrayTile.empty(tl.tileCols, tl.tileRows))) - } - - val stitched = TileLayoutStitcher.stitch(tileList)._1 +import geotrellis.spark.stitch.TileLayoutStitcher + +val tileList = + for { + r <- 0 until ld.layoutRows + c <- 0 until ld.layoutCols + } yield { + val k = SpatialKey(c,r) + (k, keytiles.getOrElse(k, IntArrayTile.empty(tl.tileCols, tl.tileRows))) + } + +val stitched = TileLayoutStitcher.stitch(tileList)._1 ``` It is now the case that `stitched` is identical to `kde.` @@ -262,19 +262,19 @@ will call the `parallelize()` method on a `SparkContext` object, which can be generated by the following set of statements. ```scala - import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.{SparkConf, SparkContext} - val conf = new SparkConf().setMaster("local").setAppName("Kernel Density") - val sc = new SparkContext(conf) +val conf = new SparkConf().setMaster("local").setAppName("Kernel Density") +val sc = new SparkContext(conf) ``` In our case, we have a collection of PointFeatures that we wish to parallelize, so we issue the command ```scala - import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.RDD - val pointRdd = sc.parallelize(pts, 10) +val pointRdd = sc.parallelize(pts, 10) ``` Here, the 10 indicates that we want to distribute the data, as 10 @@ -306,43 +306,50 @@ accumulated states of type `T`. In our case, `U = PointFeature[Double]` and the merging function is a tile adder. ```scala - import geotrellis.raster.density.KernelStamper - - def stampPointFeature(tile: MutableArrayTile, tup: (SpatialKey, PointFeature[Double])): MutableArrayTile = { - val (spatialKey, pointFeature) = tup - val tileExtent = ld.mapTransform(spatialKey) - val re = RasterExtent(tileExtent, tile) - val result = tile.copy.asInstanceOf[MutableArrayTile] - KernelStamper(result, kern).stampKernelDouble(re.mapToGrid(pointFeature.geom), pointFeature.data) - result - } - - import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp - - object Adder extends LocalTileBinaryOp { - def combine(z1: Int, z2: Int) = { - if (isNoData(z1)) { - z2 - } else if (isNoData(z2)) { - z1 - } else { - z1 + z2 - } - } - def combine(r1: Double, r2:Double) = { - if (isNoData(r1)) { - r2 - } else if (isNoData(r2)) { - r1 - } else { - r1 + r2 - } - } - } - - def sumTiles(t1: MutableArrayTile, t2: MutableArrayTile): MutableArrayTile = { - Adder(t1, t2).asInstanceOf[MutableArrayTile] - } + import geotrellis.raster.density.KernelStamper + + def stampPointFeature( + tile: MutableArrayTile, + tup: (SpatialKey, PointFeature[Double]) + ): MutableArrayTile = { + val (spatialKey, pointFeature) = tup + val tileExtent = ld.mapTransform(spatialKey) + val re = RasterExtent(tileExtent, tile) + val result = tile.copy.asInstanceOf[MutableArrayTile] + + KernelStamper(result, kern) + .stampKernelDouble(re.mapToGrid(pointFeature.geom), pointFeature.data) + + result + } + + import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp + + object Adder extends LocalTileBinaryOp { + def combine(z1: Int, z2: Int) = { + if (isNoData(z1)) { + z2 + } else if (isNoData(z2)) { + z1 + } else { + z1 + z2 + } + } + + def combine(r1: Double, r2:Double) = { + if (isNoData(r1)) { + r2 + } else if (isNoData(r2)) { + r1 + } else { + r1 + r2 + } + } + } + + def sumTiles(t1: MutableArrayTile, t2: MutableArrayTile): MutableArrayTile = { + Adder(t1, t2).asInstanceOf[MutableArrayTile] + } ``` Note that we require a custom Adder implementation because the built-in tile @@ -353,16 +360,17 @@ Creating the desired result is then a matter of the following series of operations on `pointRdd`: ```scala - val tileRdd: RDD[(SpatialKey, Tile)] = - pointRdd - .flatMap(ptfToSpatialKey) - .mapPartitions({ partition => - partition.map { case (spatialKey, pointFeature) => - (spatialKey, (spatialKey, pointFeature)) - } - }, preservesPartitioning = true) - .aggregateByKey(ArrayTile.empty(DoubleCellType, ld.tileCols, ld.tileRows))(stampPointFeature, sumTiles) - .mapValues{ tile: MutableArrayTile => tile.asInstanceOf[Tile] } +val tileRdd: RDD[(SpatialKey, Tile)] = + pointRdd + .flatMap(ptfToSpatialKey) + .mapPartitions({ partition => + partition.map { case (spatialKey, pointFeature) => + (spatialKey, (spatialKey, pointFeature)) + } + }, preservesPartitioning = true) + .aggregateByKey(ArrayTile.empty(DoubleCellType, ld.tileCols, ld.tileRows)) + (stampPointFeature, sumTiles) + .mapValues{ tile: MutableArrayTile => tile.asInstanceOf[Tile] } ``` The `mapPartitions` operation simply applies a transformation to an RDD @@ -375,13 +383,13 @@ to carry along a Metadata object that describes the context of the RDD. This is created like so: ```scala - val metadata = TileLayerMetadata(DoubleCellType, - ld, - ld.extent, - LatLng, - KeyBounds(SpatialKey(0,0), - SpatialKey(ld.layoutCols-1, - ld.layoutRows-1))) +val metadata = TileLayerMetadata(DoubleCellType, + ld, + ld.extent, + LatLng, + KeyBounds(SpatialKey(0,0), + SpatialKey(ld.layoutCols-1, + ld.layoutRows-1))) ``` To combine the RDD and the metadata, we write `val resultRdd = ContextRDD(tileRdd, metadata)`. @@ -420,26 +428,26 @@ of that project, execute the provided `sbt` script. Once SBT is loaded, the following commands can be executed: ```scala - project spark-etl - assembly +project spark-etl +assembly ``` This packages the required class files into a JAR file. Now, again from the GeoTrellis source tree root directory, issue the command ```bash - spark-shell --jars spark-etl/target/scala-2.11/geotrellis-spark-etl-assembly-[version].jar +spark-shell --jars spark-etl/target/scala-2.11/geotrellis-spark-etl-assembly-[version].jar ``` From the resulting interpreter prompt, perform the following imports: ```scala - import geotrellis.raster._ - import geotrellis.vector._ - import geotrellis.proj4._ - import geotrellis.spark._ - import geotrellis.spark.util._ - import geotrellis.spark.tiling._ +import geotrellis.raster._ +import geotrellis.vector._ +import geotrellis.proj4._ +import geotrellis.spark._ +import geotrellis.spark.util._ +import geotrellis.spark.tiling._ ``` It should then be possible to input the example code from above (excluding