diff --git a/build.sbt b/build.sbt index a04cadd66..51991d480 100644 --- a/build.sbt +++ b/build.sbt @@ -59,7 +59,7 @@ lazy val core = project circe("generic").value, circe("parser").value, circe("generic-extras").value, - frameless excludeAll ExclusionRule("com.github.mpilquist", "simulacrum"), + frameless excludeAll ExclusionRule(organization = "com.github.mpilquist"), `jts-core`, `spray-json`, geomesa("z3").value, diff --git a/core/src/main/scala/geotrellis/raster/BufferTile.scala b/core/src/main/scala/geotrellis/raster/BufferTile.scala new file mode 100644 index 000000000..a6a473a22 --- /dev/null +++ b/core/src/main/scala/geotrellis/raster/BufferTile.scala @@ -0,0 +1,411 @@ +/* + * Copyright 2021 Azavea + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package geotrellis.raster + +import geotrellis.raster.mapalgebra.focal.BufferedFocalMethods +import spire.syntax.cfor._ + +/** + * When combined with another BufferTile the two tiles will be aligned on (0, 0) pixel of tile center. + * The operation will be carried over all overlapping pixels. + * For instance: combining a tile padded with 5 pixels on all sides with tile padded with 3 pixels on all sides will + * result in buffer tile with 3 pixel padding on all sides. + * + * When combined with another BufferTile the operation will be executed over the maximum shared in + * + * TODO: + * - What should .map do? Map the buffer pixels or not? + * - toString method is friendly + * - mutable version makes sense + * - toBytes needs to encode padding size? + */ +case class BufferTile( + sourceTile: Tile, + gridBounds: GridBounds[Int] +) extends Tile { + require( + gridBounds.colMin >=0 && gridBounds.rowMin >= 0 && gridBounds.colMax < sourceTile.cols && gridBounds.rowMax < sourceTile.rows, + s"Tile center bounds $gridBounds exceed underlying tile dimensions ${sourceTile.dimensions}" + ) + + val cols: Int = gridBounds.width + val rows: Int = gridBounds.height + + val cellType: CellType = sourceTile.cellType + + private def colMin: Int = gridBounds.colMin + private def rowMin: Int = gridBounds.rowMin + private def sourceCols: Int = sourceTile.cols + private def sourceRows: Int = sourceTile.rows + + def bufferTop: Int = gridBounds.rowMin + def bufferLeft: Int = gridBounds.colMin + def bufferRight: Int = sourceTile.cols - gridBounds.colMin - gridBounds.colMax + def bufferBottom: Int = sourceTile.rows - gridBounds.rowMin - gridBounds.rowMax + + /** + * Returns a [[Tile]] equivalent to this tile, except with cells of + * the given type. + * + * @param targetCellType The type of cells that the result should have + * @return The new Tile + */ + def convert(targetCellType: CellType): Tile = + mutable(targetCellType) + + def withNoData(noDataValue: Option[Double]): BufferTile = + BufferTile(sourceTile.withNoData(noDataValue), gridBounds) + + def interpretAs(newCellType: CellType): BufferTile = + BufferTile(sourceTile.interpretAs(newCellType), gridBounds) + + /** + * Fetch the datum at the given column and row of the tile. + * + * @param col The column + * @param row The row + * @return The Int datum found at the given location + */ + def get(col: Int, row: Int): Int = { + val c = col + colMin + val r = row + rowMin + if(c < 0 || r < 0 || c >= sourceCols || r >= sourceRows) { + throw new IndexOutOfBoundsException(s"(col=$col, row=$row) is out of tile bounds") + } else { + sourceTile.get(c, r) + } + } + + /** + * Fetch the datum at the given column and row of the tile. + * + * @param col The column + * @param row The row + * @return The Double datum found at the given location + */ + def getDouble(col: Int, row: Int): Double = { + val c = col + colMin + val r = row + rowMin + + if(c < 0 || r < 0 || c >= sourceCols || r >= sourceRows) { + throw new IndexOutOfBoundsException(s"(col=$col, row=$row) is out of tile bounds") + } else { + sourceTile.getDouble(col + gridBounds.colMin, row + gridBounds.rowMin) + } + } + + /** + * Another name for the 'mutable' method on this class. + * + * @return An [[ArrayTile]] + */ + def toArrayTile: ArrayTile = mutable + + /** + * Return the [[MutableArrayTile]] equivalent of this tile. + * + * @return An MutableArrayTile + */ + def mutable(): MutableArrayTile = + mutable(cellType) + + /** + * Return the [[MutableArrayTile]] equivalent of this tile. + * + * @return An MutableArrayTile + */ + def mutable(targetCellType: CellType): MutableArrayTile = { + val tile = ArrayTile.alloc(targetCellType, cols, rows) + + if(!cellType.isFloatingPoint) { + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + tile.set(col, row, get(col, row)) + } + } + } else { + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + tile.setDouble(col, row, getDouble(col, row)) + } + } + } + + tile + } + + /** + * Return the data behind this tile as an array of integers. + * + * @return The copy as an Array[Int] + */ + def toArray: Array[Int] = { + val arr = Array.ofDim[Int](cols * rows) + + var i = 0 + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + arr(i) = get(col, row) + i += 1 + } + } + + arr + } + + /** + * Return the data behind this tile as an array of doubles. + * + * @return The copy as an Array[Int] + */ + def toArrayDouble: Array[Double] = { + val arr = Array.ofDim[Double](cols * rows) + + var i = 0 + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + arr(i) = getDouble(col, row) + i += 1 + } + } + + arr + } + + /** + * Return the underlying data behind this tile as an array. + * + * @return An array of bytes + */ + def toBytes(): Array[Byte] = toArrayTile.toBytes + + /** + * Execute a function on each cell of the tile. The function + * returns Unit, so it presumably produces side-effects. + * + * @param f A function from Int to Unit + */ + def foreach(f: Int => Unit): Unit = { + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + f(get(col, row)) + } + } + } + + /** + * Execute a function on each cell of the tile. The function + * returns Unit, so it presumably produces side-effects. + * + * @param f A function from Double to Unit + */ + def foreachDouble(f: Double => Unit): Unit = { + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + f(getDouble(col, row)) + } + } + } + + /** + * Execute an [[IntTileVisitor]] at each cell of the present tile. + * + * @param visitor An IntTileVisitor + */ + def foreachIntVisitor(visitor: IntTileVisitor): Unit = { + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + visitor(col, row, get(col, row)) + } + } + } + + /** + * Execute an [[DoubleTileVisitor]] at each cell of the present tile. + * + * @param visitor An DoubleTileVisitor + */ + def foreachDoubleVisitor(visitor: DoubleTileVisitor): Unit = { + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + visitor(col, row, getDouble(col, row)) + } + } + } + + /** + * Map each cell in the given tile to a new one, using the given + * function. + * + * @param f A function from Int to Int, executed at each point of the tile + * @return The result, a [[Tile]] + */ + def map(f: Int => Int): BufferTile = mapTile(_.map(f)) + + /** + * Map each cell in the given tile to a new one, using the given + * function. + * + * @param f A function from Double to Double, executed at each point of the tile + * @return The result, a [[Tile]] + */ + def mapDouble(f: Double => Double): BufferTile = mapTile(_.mapDouble(f)) + + /** + * Map an [[IntTileMapper]] over the present tile. + * + * @param mapper The mapper + * @return The result, a [[Tile]] + */ + def mapIntMapper(mapper: IntTileMapper): BufferTile = mapTile(_.mapIntMapper(mapper)) + + /** + * Map an [[DoubleTileMapper]] over the present tile. + * + * @param mapper The mapper + * @return The result, a [[Tile]] + */ + def mapDoubleMapper(mapper: DoubleTileMapper): Tile = mapTile(_.mapDoubleMapper(mapper)) + + private def combine(other: BufferTile)(f: (Int, Int) => Int): Tile = { + if((this.gridBounds.width != other.gridBounds.width) || (this.gridBounds.height != other.gridBounds.height)) { + throw new GeoAttrsError("Cannot combine rasters with different dimensions: " + + s"${this.gridBounds.width}x${this.gridBounds.height} != ${other.gridBounds.width}x${other.gridBounds.height}") + } + + val bufferTop = math.min(this.bufferTop, other.bufferTop) + val bufferLeft = math.min(this.bufferLeft, other.bufferLeft) + val bufferRight = math.min(this.bufferRight, other.bufferRight) + val bufferBottom = math.min(this.bufferBottom, other.bufferBottom) + val cols = bufferLeft + gridBounds.width + bufferRight + val rows = bufferTop + gridBounds.height + bufferBottom + + val tile = ArrayTile.alloc(cellType.union(other.cellType), cols, rows) + + // index both tiles relative to (0, 0) pixel + cfor(-bufferTop)(_ < gridBounds.height + bufferRight, _ + 1) { row => + cfor(-bufferLeft)(_ < gridBounds.width + bufferRight, _ + 1) { col => + val leftV = this.get(col, row) + val rightV = other.get(col, row) + tile.set(col + bufferLeft, row + bufferTop, f(leftV, rightV)) + } + } + + if (bufferTop + bufferLeft + bufferRight + bufferBottom == 0) + tile + else + BufferTile(tile, GridBounds[Int]( + colMin = bufferLeft, + rowMin = bufferTop, + colMax = bufferLeft + gridBounds.width - 1, + rowMax = bufferTop + gridBounds.height - 1 + )) + } + + def combineDouble(other: BufferTile)(f: (Double, Double) => Double): Tile = { + if((this.gridBounds.width != other.gridBounds.width) || (this.gridBounds.height != other.gridBounds.height)) { + throw new GeoAttrsError("Cannot combine rasters with different dimensions: " + + s"${this.gridBounds.width}x${this.gridBounds.height} != ${other.gridBounds.width}x${other.gridBounds.height}") + } + + val bufferTop = math.min(this.bufferTop, other.bufferTop) + val bufferLeft = math.min(this.bufferLeft, other.bufferLeft) + val bufferRight = math.min(this.bufferRight, other.bufferRight) + val bufferBottom = math.min(this.bufferBottom, other.bufferBottom) + val cols = bufferLeft + gridBounds.width + bufferRight + val rows = bufferTop + gridBounds.height + bufferBottom + + val tile = ArrayTile.alloc(cellType.union(other.cellType), cols, rows) + + // index both tiles relative to (0, 0) pixel + cfor(-bufferTop)(_ < gridBounds.height + bufferRight, _ + 1) { row => + cfor(-bufferLeft)(_ < gridBounds.width + bufferRight, _ + 1) { col => + val leftV = this.getDouble(col, row) + val rightV = other.getDouble(col, row) + tile.setDouble(col + bufferLeft, row + bufferTop, f(leftV, rightV)) + } + } + + if (bufferTop + bufferLeft + bufferRight + bufferBottom == 0) + tile + else + BufferTile(tile, GridBounds[Int]( + colMin = bufferLeft, + rowMin = bufferTop, + colMax = bufferLeft + gridBounds.width - 1, + rowMax = bufferTop + gridBounds.height - 1)) + } + + /** + * Combine two tiles' cells into new cells using the given integer + * function. For every (x, y) cell coordinate, get each of the + * tiles' integer values, map them to a new value, and assign it to + * the output's (x, y) cell. + * + * @param other The other Tile + * @param f A function from (Int, Int) to Int + * @return The result, an Tile + */ + def combine(other: Tile)(f: (Int, Int) => Int): Tile = { + (this, other).assertEqualDimensions + + other match { + case bt: BufferTile => this.combine(bt)(f) + case _ => + val tile = ArrayTile.alloc(cellType.union(other.cellType), cols, rows) + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + tile.set(col, row, f(get(col, row), other.get(col, row))) + } + } + tile + } + } + + /** + * Combine two tiles' cells into new cells using the given double + * function. For every (x, y) cell coordinate, get each of the + * tiles' double values, map them to a new value, and assign it to + * the output's (x, y) cell. + * + * @param other The other Tile + * @param f A function from (Int, Int) to Int + * @return The result, an Tile + */ + def combineDouble(other: Tile)(f: (Double, Double) => Double): Tile = { + (this, other).assertEqualDimensions + + other match { + case bt: BufferTile => + this.combineDouble(bt)(f) + case _ => + val tile = ArrayTile.alloc(cellType, cols, rows) + cfor(0)(_ < rows, _ + 1) { row => + cfor(0)(_ < cols, _ + 1) { col => + tile.setDouble(col, row, f(getDouble(col, row), other.getDouble(col, row))) + } + } + tile + } + } + + def mapTile(f: Tile => Tile): BufferTile = BufferTile(f(sourceTile), gridBounds) +} + +object BufferTile { + implicit class BufferTileOps(val self: BufferTile) extends BufferedFocalMethods +} diff --git a/core/src/main/scala/geotrellis/raster/mapalgebra/focal/BufferedFocalMethods.scala b/core/src/main/scala/geotrellis/raster/mapalgebra/focal/BufferedFocalMethods.scala new file mode 100644 index 000000000..bf1987d66 --- /dev/null +++ b/core/src/main/scala/geotrellis/raster/mapalgebra/focal/BufferedFocalMethods.scala @@ -0,0 +1,90 @@ +/* + * Copyright 2021 Azavea + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package geotrellis.raster.mapalgebra.focal + +import geotrellis.raster._ +import geotrellis.util.MethodExtensions + +trait BufferedFocalMethods extends MethodExtensions[BufferTile] { + + /** Computes the minimum value of a neighborhood */ + def focalMin(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.focalMin(n, bounds, target)) + + /** Computes the maximum value of a neighborhood */ + def focalMax(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.focalMax(n, bounds, target)) + + /** Computes the mode of a neighborhood */ + def focalMode(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.focalMode(n, bounds, target)) + + /** Computes the median of a neighborhood */ + def focalMedian(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.focalMedian(n, bounds, target)) + + /** Computes the mean of a neighborhood */ + def focalMean(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.focalMean(n, bounds, target)) + + /** Computes the sum of a neighborhood */ + def focalSum(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.focalSum(n, bounds, target)) + + /** Computes the standard deviation of a neighborhood */ + def focalStandardDeviation(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.focalStandardDeviation(n, bounds, target)) + + /** Computes the next step of Conway's Game of Life */ + def focalConway(bounds: Option[GridBounds[Int]] = None): BufferTile = + self.mapTile(_.focalConway(bounds)) + + /** Computes the convolution of the raster for the given kernl */ + def convolve(kernel: Kernel, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.convolve(kernel, bounds, target)) + + /** + * Calculates spatial autocorrelation of cells based on the + * similarity to neighboring values. + */ + def tileMoransI(n: Neighborhood, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.tileMoransI(n, bounds, target)) + + /** + * Calculates global spatial autocorrelation of a raster based on + * the similarity to neighboring values. + */ + def scalarMoransI(n: Neighborhood, bounds: Option[GridBounds[Int]] = None): Double = + self.sourceTile.scalarMoransI(n, bounds) + + /** + * Calculates the slope of each cell in a raster. + * + * @param cs cellSize of the raster + * @param zFactor Number of map units to one elevation unit. + */ + def slope(cs: CellSize, zFactor: Double = 1.0, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.slope(cs, zFactor, bounds, target)) + + /** + * Calculates the aspect of each cell in a raster. + * + * @param cs cellSize of the raster + */ + def aspect(cs: CellSize, bounds: Option[GridBounds[Int]] = None, target: TargetCell = TargetCell.All): BufferTile = + self.mapTile(_.aspect(cs, bounds, target)) +} diff --git a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala index 6c4f38654..4c8fa341e 100644 --- a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala +++ b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala @@ -20,11 +20,13 @@ */ package org.apache.spark.sql.rf -import geotrellis.raster._ + +import geotrellis.raster.{ArrayTile, BufferTile, CellType, ConstantTile, GridBounds, Tile} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.parquet.ParquetReadSupport -import org.apache.spark.sql.types.{DataType, _} +import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String +import org.locationtech.rasterframes._ import org.locationtech.rasterframes.encoders.syntax._ import org.locationtech.rasterframes.ref.RasterRef import org.locationtech.rasterframes.tiles.{ProjectedRasterTile, ShowableTile} @@ -49,6 +51,7 @@ class TileUDT extends UserDefinedType[Tile] { StructField("cols", IntegerType, false), StructField("rows", IntegerType, false), StructField("cells", BinaryType, true), + StructField("gridBounds", gridBoundsEncoder[Int].schema, true), // make it parquet compliant, only expanded UDTs can be in a UDT schema StructField("ref", ParquetReadSupport.expandUDT(RasterRef.rasterRefEncoder.schema), true) )) @@ -59,22 +62,26 @@ class TileUDT extends UserDefinedType[Tile] { // TODO: review matches there case ref: RasterRef => val ct = UTF8String.fromString(ref.cellType.toString()) - InternalRow(ct, ref.cols, ref.rows, null, ref.toInternalRow) + InternalRow(ct, ref.cols, ref.rows, null, null, ref.toInternalRow) case ProjectedRasterTile(ref: RasterRef, _, _) => val ct = UTF8String.fromString(ref.cellType.toString()) - InternalRow(ct, ref.cols, ref.rows, null, ref.toInternalRow) + InternalRow(ct, ref.cols, ref.rows, null, null, ref.toInternalRow) case prt: ProjectedRasterTile => val tile = prt.tile val ct = UTF8String.fromString(tile.cellType.toString()) - InternalRow(ct, tile.cols, tile.rows, tile.toBytes(), null) + InternalRow(ct, tile.cols, tile.rows, tile.toBytes(), null, null) + case bt: BufferTile => + val tile = bt.sourceTile.toArrayTile() + val ct = UTF8String.fromString(tile.cellType.toString()) + InternalRow(ct, tile.cols, tile.rows, tile.toBytes(), bt.gridBounds.toInternalRow, null) case const: ConstantTile => // Must expand constant tiles so they can be interpreted properly in catalyst and Python. val tile = const.toArrayTile() val ct = UTF8String.fromString(tile.cellType.toString()) - InternalRow(ct, tile.cols, tile.rows, tile.toBytes(), null) + InternalRow(ct, tile.cols, tile.rows, tile.toBytes(), null, null) case tile => val ct = UTF8String.fromString(tile.cellType.toString()) - InternalRow(ct, tile.cols, tile.rows, tile.toBytes(), null) + InternalRow(ct, tile.cols, tile.rows, tile.toBytes(), null, null) } } @@ -82,11 +89,11 @@ class TileUDT extends UserDefinedType[Tile] { if (datum == null) return null val row = datum.asInstanceOf[InternalRow] - /** TODO: a compatible encoder for the ProjectedRasterTile */ + /** TODO: a compatible encoder for the ProjectedRasterTile? */ val tile: Tile = - if (! row.isNullAt(4)) { + if (!row.isNullAt(5)) { Try { - val ir = row.getStruct(4, 4) + val ir = row.getStruct(5, 5) val ref = ir.as[RasterRef] ref }/*.orElse { @@ -98,6 +105,13 @@ class TileUDT extends UserDefinedType[Tile] { .tile ) }*/.get + } else if(!row.isNullAt(4)) { + val ct = CellType.fromName(row.getString(0)) + val cols = row.getInt(1) + val rows = row.getInt(2) + val bytes = row.getBinary(3) + val gridBounds = row.getStruct(4, 5).as[GridBounds[Int]] + BufferTile(ArrayTile.fromBytes(bytes, ct, cols, rows), gridBounds) } else { val ct = CellType.fromName(row.getString(0)) val cols = row.getInt(1) diff --git a/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala b/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala index c8bfa3813..accca888d 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala @@ -26,4 +26,4 @@ import org.locationtech.rasterframes.functions._ * Mix-in for UDFs for working with Tiles in Spark DataFrames. * @since 4/3/17 */ -trait RasterFunctions extends TileFunctions with LocalFunctions with SpatialFunctions with AggregateFunctions +trait RasterFunctions extends TileFunctions with LocalFunctions with SpatialFunctions with AggregateFunctions with FocalFunctions diff --git a/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala index 25cadd0d2..3301bca70 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala @@ -24,7 +24,7 @@ package org.locationtech.rasterframes.encoders import org.locationtech.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics} import org.locationtech.jts.geom.Envelope import geotrellis.proj4.CRS -import geotrellis.raster.{CellSize, CellType, Dimensions, Raster, Tile, TileLayout, GridBounds, CellGrid} +import geotrellis.raster.{CellGrid, CellSize, CellType, Dimensions, GridBounds, Raster, Tile, TileLayout} import geotrellis.layer._ import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -32,10 +32,10 @@ import org.apache.spark.sql.catalyst.util.QuantileSummaries import org.locationtech.geomesa.spark.jts.encoders.SpatialEncoders import org.locationtech.rasterframes.model.{CellContext, LongExtent, TileContext, TileDataContext} import frameless.TypedEncoder +import geotrellis.raster.mapalgebra.focal.{Kernel, Neighborhood} import java.net.URI import java.sql.Timestamp - import scala.reflect.ClassTag import scala.reflect.runtime.universe._ @@ -54,6 +54,8 @@ trait StandardEncoders extends SpatialEncoders with TypedEncoders { implicit lazy val localCellStatsEncoder: ExpressionEncoder[LocalCellStatistics] = ExpressionEncoder() implicit lazy val uriEncoder: ExpressionEncoder[URI] = typedExpressionEncoder[URI] + implicit lazy val neighborhoodEncoder: ExpressionEncoder[Neighborhood] = typedExpressionEncoder[Neighborhood] + implicit lazy val kernelEncoder: ExpressionEncoder[Kernel] = typedExpressionEncoder[Kernel] implicit lazy val quantileSummariesEncoder: ExpressionEncoder[QuantileSummaries] = typedExpressionEncoder[QuantileSummaries] implicit lazy val envelopeEncoder: ExpressionEncoder[Envelope] = typedExpressionEncoder implicit lazy val longExtentEncoder: ExpressionEncoder[LongExtent] = typedExpressionEncoder diff --git a/core/src/main/scala/org/locationtech/rasterframes/encoders/TypedEncoders.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/TypedEncoders.scala index d0dc97f1b..dff2453b2 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/encoders/TypedEncoders.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/TypedEncoders.scala @@ -3,13 +3,14 @@ package org.locationtech.rasterframes.encoders import frameless._ import geotrellis.layer.{KeyBounds, LayoutDefinition, TileLayerMetadata} import geotrellis.proj4.CRS -import geotrellis.raster.{CellType, Dimensions, GridBounds, Raster, Tile, CellGrid} +import geotrellis.raster.mapalgebra.focal.{Kernel, Neighborhood} +import geotrellis.raster.{CellGrid, CellType, Dimensions, GridBounds, Raster, Tile} import geotrellis.vector.Extent import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.util.QuantileSummaries import org.apache.spark.sql.rf.{CrsUDT, RasterSourceUDT, TileUDT} import org.locationtech.jts.geom.Envelope -import org.locationtech.rasterframes.util.KryoSupport +import org.locationtech.rasterframes.util.{FocalNeighborhood, KryoSupport} import java.net.URI import java.nio.ByteBuffer @@ -33,6 +34,9 @@ trait TypedEncoders { implicit val uriInjection: Injection[URI, String] = Injection(_.toString, new URI(_)) implicit val uriTypedEncoder: TypedEncoder[URI] = TypedEncoder.usingInjection + implicit val neighborhoodInjection: Injection[Neighborhood, String] = Injection(FocalNeighborhood(_), FocalNeighborhood.fromString(_).get) + implicit val neighborhoodTypedEncoder: TypedEncoder[Neighborhood] = TypedEncoder.usingInjection + implicit val envelopeTypedEncoder: TypedEncoder[Envelope] = ManualTypedEncoder.newInstance[Envelope]( fields = List( @@ -81,6 +85,8 @@ trait TypedEncoders { implicit val tileTypedEncoder: TypedEncoder[Tile] = TypedEncoder.usingUserDefinedType[Tile] implicit def rasterTileTypedEncoder[T <: CellGrid[Int]: TypedEncoder]: TypedEncoder[Raster[T]] = TypedEncoder.usingDerivation + + implicit val kernelTypedEncoder: TypedEncoder[Kernel] = TypedEncoder.usingDerivation } object TypedEncoders extends TypedEncoders diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterFunction.scala similarity index 95% rename from core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterFunction.scala index 18d337bdc..425e6c4e7 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterFunction.scala @@ -31,7 +31,7 @@ import org.locationtech.rasterframes.expressions.DynamicExtractors._ import org.slf4j.LoggerFactory /** Operation combining two tiles or a tile and a scalar into a new tile. */ -trait BinaryLocalRasterOp extends BinaryExpression with RasterResult { +trait BinaryRasterFunction extends BinaryExpression with RasterResult { @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) @@ -59,6 +59,7 @@ trait BinaryLocalRasterOp extends BinaryExpression with RasterResult { if(leftCtx.isDefined && rightCtx.isDefined && leftCtx != rightCtx) logger.warn(s"Both '${left}' and '${right}' provided an extent and CRS, but they are different. Left-hand side will be used.") + // TODO: extract BufferTile here to preserve the buffer op(leftTile, rightTile) case DoubleArg(d) => op(fpTile(leftTile), d) case IntegerArg(i) => op(leftTile, i) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala index e92326b03..1dcd15ce6 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions import geotrellis.proj4.CRS -import geotrellis.raster.{CellGrid, Raster, Tile} +import geotrellis.raster.{CellGrid, Neighborhood, Raster, Tile} import geotrellis.vector.Extent import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow @@ -38,6 +38,7 @@ import org.locationtech.rasterframes.model.{LazyCRS, LongExtent, TileContext} import org.locationtech.rasterframes.ref.{ProjectedRasterLike, RasterRef} import org.locationtech.rasterframes.tiles.ProjectedRasterTile import org.apache.spark.sql.rf.CrsUDT +import org.locationtech.rasterframes.util.FocalNeighborhood private[rasterframes] object DynamicExtractors { @@ -224,4 +225,9 @@ object DynamicExtractors { case c: Char => IntegerArg(c.toInt) } } + + lazy val neighborhoodExtractor: PartialFunction[DataType, Any => Neighborhood] = { + case _: StringType => (v: Any) => FocalNeighborhood.fromString(v.asInstanceOf[UTF8String].toString).get + case n if n.conformsToSchema(neighborhoodEncoder.schema) => { case ir: InternalRow => ir.as[Neighborhood] } + } } diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterFunction.scala similarity index 70% rename from core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterFunction.scala index 2904fe57d..6eb4e7a69 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterFunction.scala @@ -21,34 +21,27 @@ package org.locationtech.rasterframes.expressions -import com.typesafe.scalalogging.Logger +import org.locationtech.rasterframes.expressions.DynamicExtractors._ import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions.UnaryExpression -import org.apache.spark.sql.types.DataType -import org.locationtech.rasterframes.expressions.DynamicExtractors._ -import org.slf4j.LoggerFactory - -/** Operation on a tile returning a tile. */ -trait UnaryLocalRasterOp extends UnaryExpression with RasterResult { - @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) - - def dataType: DataType = child.dataType +import org.locationtech.rasterframes.model.TileContext +/** Boilerplate for expressions operating on a single Tile-like . */ +trait UnaryRasterFunction extends UnaryExpression { override def checkInputDataTypes(): TypeCheckResult = { if (!tileExtractor.isDefinedAt(child.dataType)) { TypeCheckFailure(s"Input type '${child.dataType}' does not conform to a raster type.") - } - else TypeCheckSuccess + } else TypeCheckSuccess } override protected def nullSafeEval(input: Any): Any = { - val (childTile, childCtx) = tileExtractor(child.dataType)(row(input)) - val result = op(childTile) - toInternalRow(result, childCtx) + // TODO: Ensure InternalRowTile is preserved + val (tile, ctx) = tileExtractor(child.dataType)(row(input)) + eval(tile, ctx) } - protected def op(child: Tile): Tile + protected def eval(tile: Tile, ctx: Option[TileContext]): Any } diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala index 8d2b532c8..dcb4871c8 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala @@ -21,27 +21,21 @@ package org.locationtech.rasterframes.expressions -import org.locationtech.rasterframes.expressions.DynamicExtractors._ +import com.typesafe.scalalogging.Logger import geotrellis.raster.Tile -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} -import org.apache.spark.sql.catalyst.expressions.UnaryExpression +import org.apache.spark.sql.types.DataType import org.locationtech.rasterframes.model.TileContext +import org.slf4j.LoggerFactory -/** Boilerplate for expressions operating on a single Tile-like . */ -trait UnaryRasterOp extends UnaryExpression { - override def checkInputDataTypes(): TypeCheckResult = { - if (!tileExtractor.isDefinedAt(child.dataType)) { - TypeCheckFailure(s"Input type '${child.dataType}' does not conform to a raster type.") - } else TypeCheckSuccess - } +/** Operation on a tile returning a tile. */ +trait UnaryRasterOp extends UnaryRasterFunction with RasterResult { + @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) - override protected def nullSafeEval(input: Any): Any = { - // TODO: Ensure InternalRowTile is preserved - val (tile, ctx) = tileExtractor(child.dataType)(row(input)) - eval(tile, ctx) - } + def dataType: DataType = child.dataType - protected def eval(tile: Tile, ctx: Option[TileContext]): Any + protected def eval(tile: Tile, ctx: Option[TileContext]): Any = + toInternalRow(op(tile), ctx) + + protected def op(child: Tile): Tile } diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala index 529c88996..ea615843a 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala @@ -21,7 +21,7 @@ package org.locationtech.rasterframes.expressions.accessors -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -32,7 +32,7 @@ import org.locationtech.rasterframes.tiles.ProjectedRasterTile import org.locationtech.rasterframes._ /** Expression to extract at tile from several types that contain tiles.*/ -case class ExtractTile(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class ExtractTile(child: Expression) extends UnaryRasterFunction with CodegenFallback { def dataType: DataType = tileUDT override def nodeName: String = "rf_extract_tile" diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala index 52bc4074e..eb1fb9675 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala @@ -28,10 +28,10 @@ import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} import org.locationtech.rasterframes._ import org.locationtech.rasterframes.encoders._ -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext -case class GetTileContext(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class GetTileContext(child: Expression) extends UnaryRasterFunction with CodegenFallback { def dataType: DataType = tileContextEncoder.schema override def nodeName: String = "get_tile_context" diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Aspect.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Aspect.scala new file mode 100644 index 000000000..10ba6727d --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Aspect.scala @@ -0,0 +1,75 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2021 Azavea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import geotrellis.raster.{BufferTile, CellSize} +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.locationtech.rasterframes.expressions.{NullToValue, RasterResult, UnaryRasterFunction, row} +import org.locationtech.rasterframes.encoders.syntax._ +import org.locationtech.rasterframes.expressions.DynamicExtractors._ +import org.locationtech.rasterframes.model.TileContext +import geotrellis.raster.Tile +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.DataType +import org.slf4j.LoggerFactory +import com.typesafe.scalalogging.Logger + +@ExpressionDescription( + usage = "_FUNC_(tile) - Performs aspect on tile.", + arguments = """ + Arguments: + * tile - a tile to apply operation""", + examples = """ + Examples: + > SELECT _FUNC_(tile); + ...""" +) +case class Aspect(child: Expression) extends UnaryRasterFunction with RasterResult with NullToValue with CodegenFallback { + @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) + + def na: Any = null + + def dataType: DataType = child.dataType + + override protected def nullSafeEval(input: Any): Any = { + val (tile, ctx) = tileExtractor(child.dataType)(row(input)) + eval(extractBufferTile(tile), ctx) + } + + protected def eval(tile: Tile, ctx: Option[TileContext]): Any = ctx match { + case Some(ctx) => ctx.toProjectRasterTile(op(tile, ctx)).toInternalRow + case None => new NotImplementedError("Surface operation requires ProjectedRasterTile") + } + + override def nodeName: String = Aspect.name + + def op(t: Tile, ctx: TileContext): Tile = t match { + case bt: BufferTile => bt.aspect(CellSize(ctx.extent, cols = t.cols, rows = t.rows)) + case _ => t.aspect(CellSize(ctx.extent, cols = t.cols, rows = t.rows)) + } +} + +object Aspect { + def name: String = "rf_aspect" + def apply(tile: Column): Column = new Column(Aspect(tile.expr)) +} \ No newline at end of file diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Convolve.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Convolve.scala new file mode 100644 index 000000000..594c8b871 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Convolve.scala @@ -0,0 +1,80 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2021 Azavea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import com.typesafe.scalalogging.Logger +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Kernel +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, ExpressionDescription} +import org.apache.spark.sql.types.DataType +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders._ +import org.locationtech.rasterframes.encoders.syntax._ +import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor +import org.locationtech.rasterframes.expressions.{RasterResult, row} +import org.slf4j.LoggerFactory + +@ExpressionDescription( + usage = "_FUNC_(tile, kernel) - Performs convolve on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * kernel - a focal operation kernel""", + examples = """ + Examples: + > SELECT _FUNC_(tile, kernel); + ...""" +) +case class Convolve(left: Expression, right: Expression) extends BinaryExpression with RasterResult with CodegenFallback { + @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) + + override def nodeName: String = Convolve.name + + def dataType: DataType = left.dataType + + override def checkInputDataTypes(): TypeCheckResult = + if (!tileExtractor.isDefinedAt(left.dataType)) TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.") + else if (!right.dataType.conformsToSchema(kernelEncoder.schema)) { + TypeCheckFailure(s"Input type '${right.dataType}' does not conform to a kernel type.") + } else TypeCheckSuccess + + override protected def nullSafeEval(tileInput: Any, kernelInput: Any): Any = { + val (tile, ctx) = tileExtractor(left.dataType)(row(tileInput)) + val kernel = row(kernelInput).as[Kernel] + val result = op(extractBufferTile(tile), kernel) + toInternalRow(result, ctx) + } + + protected def op(t: Tile, kernel: Kernel): Tile = t match { + case bt: BufferTile => bt.convolve(kernel) + case _ => t.convolve(kernel) + } +} + +object Convolve { + def name: String = "rf_convolve" + def apply(tile: Column, kernel: Column): Column = new Column(Convolve(tile.expr, kernel.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMax.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMax.scala new file mode 100644 index 000000000..a7220f941 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMax.scala @@ -0,0 +1,51 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2021 Azavea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Neighborhood +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} + +@ExpressionDescription( + usage = "_FUNC_(tile, neighborhood) - Performs focalMax on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * neighborhood - a focal operation neighborhood""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 'square-1'); + ...""" +) +case class FocalMax(left: Expression, right: Expression) extends FocalNeighborhoodOp { + override def nodeName: String = FocalMax.name + protected def op(t: Tile, neighborhood: Neighborhood): Tile = t match { + case bt: BufferTile => bt.focalMax(neighborhood) + case _ => t.focalMax(neighborhood) + } +} + +object FocalMax { + def name: String = "rf_focal_max" + def apply(tile: Column, neighborhood: Column): Column = new Column(FocalMax(tile.expr, neighborhood.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMean.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMean.scala new file mode 100644 index 000000000..b72019d2b --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMean.scala @@ -0,0 +1,51 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Neighborhood +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} + +@ExpressionDescription( + usage = "_FUNC_(tile, neighborhood) - Performs focalMean on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * neighborhood - a focal operation neighborhood""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 'square-1'); + ...""" +) +case class FocalMean(left: Expression, right: Expression) extends FocalNeighborhoodOp { + override def nodeName: String = FocalMean.name + protected def op(t: Tile, neighborhood: Neighborhood): Tile = t match { + case bt: BufferTile => bt.focalMean(neighborhood) + case _ => t.focalMean(neighborhood) + } +} + +object FocalMean { + def name:String = "rf_focal_mean" + def apply(tile: Column, neighborhood: Column): Column = new Column(FocalMean(tile.expr, neighborhood.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMedian.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMedian.scala new file mode 100644 index 000000000..4dc11d029 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMedian.scala @@ -0,0 +1,51 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Neighborhood +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} + +@ExpressionDescription( + usage = "_FUNC_(tile, neighborhood) - Performs focalMedian on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * neighborhood - a focal operation neighborhood""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 'square-1'); + ...""" +) +case class FocalMedian(left: Expression, right: Expression) extends FocalNeighborhoodOp { + override def nodeName: String = FocalMedian.name + protected def op(t: Tile, neighborhood: Neighborhood): Tile = t match { + case bt: BufferTile => bt.focalMedian(neighborhood) + case _ => t.focalMedian(neighborhood) + } +} + +object FocalMedian { + def name: String = "rf_focal_median" + def apply(tile: Column, neighborhood: Column): Column = new Column(FocalMedian(tile.expr, neighborhood.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMin.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMin.scala new file mode 100644 index 000000000..fc5cfac70 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMin.scala @@ -0,0 +1,50 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Neighborhood +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} + +@ExpressionDescription( + usage = "_FUNC_(tile, neighborhood) - Performs focalMin on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * neighborhood - a focal operation neighborhood""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 'square-1'); + ...""" +) +case class FocalMin(left: Expression, right: Expression) extends FocalNeighborhoodOp { + override def nodeName: String = FocalMin.name + protected def op(t: Tile, neighborhood: Neighborhood): Tile = t match { + case bt: BufferTile => bt.focalMin(neighborhood) + case _ => t.focalMin(neighborhood) + } +} + +object FocalMin { + def name: String = "rf_focal_min" + def apply(tile: Column, neighborhood: Column): Column = new Column(FocalMin(tile.expr, neighborhood.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMode.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMode.scala new file mode 100644 index 000000000..af5ff14fd --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMode.scala @@ -0,0 +1,51 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Neighborhood +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} + +@ExpressionDescription( + usage = "_FUNC_(tile, neighborhood) - Performs focalMode on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * neighborhood - a focal operation neighborhood""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 'square-1'); + ...""" +) +case class FocalMode(left: Expression, right: Expression) extends FocalNeighborhoodOp { + override def nodeName: String = FocalMode.name + protected def op(t: Tile, neighborhood: Neighborhood): Tile = t match { + case bt: BufferTile => bt.focalMode(neighborhood) + case _ => t.focalMode(neighborhood) + } +} + +object FocalMode { + def name: String = "rf_focal_mode" + def apply(tile: Column, neighborhood: Column): Column = new Column(FocalMode(tile.expr, neighborhood.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMoransI.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMoransI.scala new file mode 100644 index 000000000..e09dd5681 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalMoransI.scala @@ -0,0 +1,51 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Neighborhood +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} + +@ExpressionDescription( + usage = "_FUNC_(tile, neighborhood) - Performs focalMoransI on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * neighborhood - a focal operation neighborhood""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 'square-1'); + ...""" +) +case class FocalMoransI(left: Expression, right: Expression) extends FocalNeighborhoodOp { + override def nodeName: String = FocalMoransI.name + protected def op(t: Tile, neighborhood: Neighborhood): Tile = t match { + case bt: BufferTile => bt.tileMoransI(neighborhood) + case _ => t.tileMoransI(neighborhood) + } +} + +object FocalMoransI { + def name: String = "rf_focal_moransi" + def apply(tile: Column, neighborhood: Column): Column = new Column(FocalMoransI(tile.expr, neighborhood.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalNeighborhoodOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalNeighborhoodOp.scala new file mode 100644 index 000000000..b73db0341 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalNeighborhoodOp.scala @@ -0,0 +1,60 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2021 Azavea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import com.typesafe.scalalogging.Logger +import geotrellis.raster.{Neighborhood, Tile} +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.DataType +import org.locationtech.rasterframes.expressions.DynamicExtractors.{neighborhoodExtractor, tileExtractor} +import org.locationtech.rasterframes.expressions.{RasterResult, row} +import org.slf4j.LoggerFactory + +trait FocalNeighborhoodOp extends BinaryExpression with RasterResult with CodegenFallback { + @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) + + // tile + def left: Expression + // neighborhood + def right: Expression + + def dataType: DataType = left.dataType + + override def checkInputDataTypes(): TypeCheckResult = + if (!tileExtractor.isDefinedAt(left.dataType)) TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.") + else if(!neighborhoodExtractor.isDefinedAt(right.dataType)) { + TypeCheckFailure(s"Input type '${right.dataType}' does not conform to a string neighborhood type.") + } else TypeCheckSuccess + + override protected def nullSafeEval(tileInput: Any, neighborhoodInput: Any): Any = { + val (tile, ctx) = tileExtractor(left.dataType)(row(tileInput)) + val neighborhood = neighborhoodExtractor(right.dataType)(neighborhoodInput) + val result = op(extractBufferTile(tile), neighborhood) + toInternalRow(result, ctx) + } + + protected def op(child: Tile, neighborhood: Neighborhood): Tile +} + diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalStdDev.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalStdDev.scala new file mode 100644 index 000000000..7ec881544 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/FocalStdDev.scala @@ -0,0 +1,51 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import geotrellis.raster.{BufferTile, Tile} +import geotrellis.raster.mapalgebra.focal.Neighborhood +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} + +@ExpressionDescription( + usage = "_FUNC_(tile, neighborhood) - Performs focalStandardDeviation on tile in the neighborhood.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * neighborhood - a focal operation neighborhood""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 'square-1'); + ...""" +) +case class FocalStdDev(left: Expression, right: Expression) extends FocalNeighborhoodOp { + override def nodeName: String = FocalStdDev.name + protected def op(t: Tile, neighborhood: Neighborhood): Tile = t match { + case bt: BufferTile => bt.focalStandardDeviation(neighborhood) + case _ => t.focalStandardDeviation(neighborhood) + } +} + +object FocalStdDev { + def name: String = "rf_focal_stddev" + def apply(tile: Column, neighborhood: Column): Column = new Column(FocalStdDev(tile.expr, neighborhood.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Hillshade.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Hillshade.scala new file mode 100644 index 000000000..256419435 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Hillshade.scala @@ -0,0 +1,94 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2021 Azavea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import com.typesafe.scalalogging.Logger +import geotrellis.raster.{BufferTile, CellSize, Tile} +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, QuaternaryExpression} +import org.apache.spark.sql.types.DataType +import org.locationtech.rasterframes.encoders.syntax._ +import org.locationtech.rasterframes.expressions.DynamicExtractors.{DoubleArg, IntegerArg, numberArgExtractor, tileExtractor} +import org.locationtech.rasterframes.expressions.{RasterResult, row} +import org.locationtech.rasterframes.model.TileContext +import org.slf4j.LoggerFactory + +@ExpressionDescription( + usage = "_FUNC_(tile, azimuth, altitude, zFactor) - Performs hillshade on tile.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * azimuth + * altitude + * zFactor""", + examples = """ + Examples: + > SELECT _FUNC_(tile, azimuth, altitude, zFactor); + ...""" +) +case class Hillshade(first: Expression, second: Expression, third: Expression, fourth: Expression) extends QuaternaryExpression with RasterResult with CodegenFallback { + @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) + + override def nodeName: String = Hillshade.name + + def dataType: DataType = first.dataType + + val children: Seq[Expression] = Seq(first, second, third, fourth) + + override def checkInputDataTypes(): TypeCheckResult = + if (!tileExtractor.isDefinedAt(first.dataType)) TypeCheckFailure(s"Input type '${first.dataType}' does not conform to a raster type.") + else if (!children.tail.forall(expr => numberArgExtractor.isDefinedAt(expr.dataType))) { + TypeCheckFailure(s"Input type '${second.dataType}', '${third.dataType}' or '${fourth.dataType}' do not conform to a numeric type.") + } else TypeCheckSuccess + + override protected def nullSafeEval(tileInput: Any, azimuthInput: Any, altitudeInput: Any, zFactorInput: Any): Any = { + val (tile, ctx) = tileExtractor(first.dataType)(row(tileInput)) + val List(azimuth, altitude, zFactor) = + children + .tail + .zip(List(azimuthInput, altitudeInput, zFactorInput)) + .map { case (expr, datum) => numberArgExtractor(expr.dataType)(datum) match { + case DoubleArg(value) => value + case IntegerArg(value) => value.toDouble + } } + eval(extractBufferTile(tile), ctx, azimuth, altitude, zFactor) + } + + protected def eval(tile: Tile, ctx: Option[TileContext], azimuth: Double, altitude: Double, zFactor: Double): Any = ctx match { + case Some(ctx) => ctx.toProjectRasterTile(op(tile, ctx, azimuth, altitude, zFactor)).toInternalRow + case None => new NotImplementedError("Surface operation requires ProjectedRasterTile") + } + + protected def op(t: Tile, ctx: TileContext, azimuth: Double, altitude: Double, zFactor: Double): Tile = t match { + case bt: BufferTile => bt.mapTile(_.hillshade(CellSize(ctx.extent, cols = t.cols, rows = t.rows), azimuth, altitude, zFactor)) + case _ => t.hillshade(CellSize(ctx.extent, cols = t.cols, rows = t.rows), azimuth, altitude, zFactor) + } +} + +object Hillshade { + def name: String = "rf_hillshade" + def apply(tile: Column, azimuth: Column, altitude: Column, zFactor: Column): Column = + new Column(Hillshade(tile.expr, azimuth.expr, altitude.expr, zFactor.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Slope.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Slope.scala new file mode 100644 index 000000000..9932b4406 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/Slope.scala @@ -0,0 +1,84 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2021 Azavea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.focalops + +import com.typesafe.scalalogging.Logger +import geotrellis.raster.{BufferTile, CellSize, Tile} +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, ExpressionDescription} +import org.apache.spark.sql.types.DataType +import org.locationtech.rasterframes.encoders.syntax._ +import org.locationtech.rasterframes.expressions.DynamicExtractors.{DoubleArg, IntegerArg, numberArgExtractor, tileExtractor} +import org.locationtech.rasterframes.expressions.{RasterResult, row} +import org.locationtech.rasterframes.model.TileContext +import org.slf4j.LoggerFactory + +@ExpressionDescription( + usage = "_FUNC_(tile, zFactor) - Performs slope on tile.", + arguments = """ + Arguments: + * tile - a tile to apply operation + * zFactor - a slope operation zFactor""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 0.2); + ...""" +) +case class Slope(left: Expression, right: Expression) extends BinaryExpression with RasterResult with CodegenFallback { + @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) + + override def nodeName: String = Slope.name + + def dataType: DataType = left.dataType + + override def checkInputDataTypes(): TypeCheckResult = + if (!tileExtractor.isDefinedAt(left.dataType)) TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.") + else if (!numberArgExtractor.isDefinedAt(right.dataType)) { + TypeCheckFailure(s"Input type '${right.dataType}' does not conform to a numeric type.") + } else TypeCheckSuccess + + override protected def nullSafeEval(tileInput: Any, zFactorInput: Any): Any = { + val (tile, ctx) = tileExtractor(left.dataType)(row(tileInput)) + val zFactor = numberArgExtractor(right.dataType)(zFactorInput) match { + case DoubleArg(value) => value + case IntegerArg(value) => value.toDouble + } + eval(extractBufferTile(tile), ctx, zFactor) + } + protected def eval(tile: Tile, ctx: Option[TileContext], zFactor: Double): Any = ctx match { + case Some(ctx) => ctx.toProjectRasterTile(op(tile, ctx, zFactor)).toInternalRow + case None => new NotImplementedError("Surface operation requires ProjectedRasterTile") + } + + protected def op(t: Tile, ctx: TileContext, zFactor: Double): Tile = t match { + case bt: BufferTile => bt.slope(CellSize(ctx.extent, cols = t.cols, rows = t.rows), zFactor) + case _ => t.slope(CellSize(ctx.extent, cols = t.cols, rows = t.rows), zFactor) + } +} + +object Slope { + def name: String = "rf_slope" + def apply(tile: Column, zFactor: Column): Column = new Column(Slope(tile.expr, zFactor.expr)) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/package.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/package.scala new file mode 100644 index 000000000..2221b4d68 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/focalops/package.scala @@ -0,0 +1,20 @@ +package org.locationtech.rasterframes.expressions + +import geotrellis.raster.Tile +import org.locationtech.rasterframes.ref.RasterRef +import org.locationtech.rasterframes.tiles.ProjectedRasterTile + +package object focalops extends Serializable { + private [focalops] def extractBufferTile(tile: Tile): Tile = tile match { + // if it is RasterRef, we want the BufferTile + case ref: RasterRef => ref.realizedTile + // if it is a ProjectedRasterTile, can we flatten it? + case prt: ProjectedRasterTile => prt.tile match { + // if it is RasterRef, we can get what's inside + case rr: RasterRef => rr.realizedTile + // otherwise it is some tile + case _ => prt.tile + } + case _ => tile + } +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala index e29966854..1d9b82abc 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala @@ -44,7 +44,7 @@ import scala.util.control.NonFatal * * @since 9/6/18 */ -case class RasterSourceToRasterRefs(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[Dimensions[Int]] = None) extends Expression +case class RasterSourceToRasterRefs(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[Dimensions[Int]] = None, bufferSize: Short = 0) extends Expression with Generator with CodegenFallback with ExpectsInputTypes { def inputTypes: Seq[DataType] = Seq.fill(children.size)(rasterSourceUDT) @@ -57,7 +57,7 @@ case class RasterSourceToRasterRefs(children: Seq[Expression], bandIndexes: Seq[ } yield StructField(name, RasterRef.rasterRefEncoder.schema, true)) private def band2ref(src: RFRasterSource, grid: Option[GridBounds[Int]], extent: Option[Extent])(b: Int): RasterRef = - if (b < src.bandCount) RasterRef(src, b, extent, grid.map(Subgrid.apply)) else null + if (b < src.bandCount) RasterRef(src, b, extent, grid.map(Subgrid.apply), bufferSize) else null def eval(input: InternalRow): TraversableOnce[InternalRow] = try { @@ -88,6 +88,8 @@ object RasterSourceToRasterRefs { def apply(rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = apply(None, Seq(0), rrs: _*) def apply(subtileDims: Option[Dimensions[Int]], bandIndexes: Seq[Int], rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = new Column(new RasterSourceToRasterRefs(rrs.map(_.expr), bandIndexes, subtileDims)).as[ProjectedRasterTile] + def apply(subtileDims: Option[Dimensions[Int]], bandIndexes: Seq[Int], bufferSize: Short, rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = + new Column(new RasterSourceToRasterRefs(rrs.map(_.expr), bandIndexes, subtileDims, bufferSize)).as[ProjectedRasterTile] private[rasterframes] def bandNames(basename: String, bandIndexes: Seq[Int]): Seq[String] = bandIndexes match { case Seq() => Seq.empty diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala index 85a7be8f9..8f28eb916 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala @@ -45,7 +45,7 @@ import scala.util.control.NonFatal * * @since 9/6/18 */ -case class RasterSourceToTiles(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[Dimensions[Int]] = None) +case class RasterSourceToTiles(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[Dimensions[Int]] = None, bufferSize: Short = 0) extends Expression with RasterResult with Generator with CodegenFallback with ExpectsInputTypes { @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) @@ -89,7 +89,9 @@ case class RasterSourceToTiles(children: Seq[Expression], bandIndexes: Seq[Int], object RasterSourceToTiles { def apply(rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = apply(None, Seq(0), rrs: _*) def apply(subtileDims: Option[Dimensions[Int]], bandIndexes: Seq[Int], rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = - new Column(new RasterSourceToTiles(rrs.map(_.expr), bandIndexes, subtileDims)).as[ProjectedRasterTile] + new Column(new RasterSourceToTiles(rrs.map(_.expr), bandIndexes, subtileDims, 0.toShort)).as[ProjectedRasterTile] + def apply(subtileDims: Option[Dimensions[Int]], bandIndexes: Seq[Int], bufferSize: Short, rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = + new Column(new RasterSourceToTiles(rrs.map(_.expr), bandIndexes, subtileDims, bufferSize)).as[ProjectedRasterTile] } diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala index 153eeb5fa..19cbe3090 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala @@ -25,7 +25,7 @@ import geotrellis.raster.Tile import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} @ExpressionDescription( usage = "_FUNC_(tile) - Compute the absolute value of each cell.", @@ -37,7 +37,7 @@ import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterO > SELECT _FUNC_(tile); ...""" ) -case class Abs(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback { +case class Abs(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { override def nodeName: String = "rf_abs" def na: Any = null protected def op(t: Tile): Tile = t.localAbs() diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala index ff23eb646..7f231797b 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction import org.locationtech.rasterframes.expressions.DynamicExtractors @ExpressionDescription( @@ -43,7 +43,7 @@ import org.locationtech.rasterframes.expressions.DynamicExtractors > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Add(left: Expression, right: Expression) extends BinaryLocalRasterOp +case class Add(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_add" protected def op(left: Tile, right: Tile): Tile = left.localAdd(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala index e31dd17eb..300103154 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor import org.locationtech.rasterframes.util.DataBiasedOp @@ -45,7 +45,7 @@ import org.locationtech.rasterframes.util.DataBiasedOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class BiasedAdd(left: Expression, right: Expression) extends BinaryLocalRasterOp +case class BiasedAdd(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_biased_add" protected def op(left: Tile, right: Tile): Tile = DataBiasedOp.BiasedAdd(left, right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Defined.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Defined.scala index 1a7af9b25..035a5ad84 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Defined.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Defined.scala @@ -25,7 +25,7 @@ import geotrellis.raster.Tile import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} @ExpressionDescription( usage = "_FUNC_(tile) - Return a tile with zeros where the input is NoData, otherwise one.", @@ -37,7 +37,7 @@ import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterO > SELECT _FUNC_(tile); ...""" ) -case class Defined(child: Expression) extends UnaryLocalRasterOp +case class Defined(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { override def nodeName: String = "rf_local_data" def na: Any = null diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala index f90fb4225..ce0d0be1c 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(tile, rhs) - Performs cell-wise division between two tiles or a tile and a scalar.", @@ -41,7 +41,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Divide(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Divide(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_divide" protected def op(left: Tile, right: Tile): Tile = left.localDivide(right) protected def op(left: Tile, right: Double): Tile = left.localDivide(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala index c1804708f..b83fcee7e 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(lhs, rhs) - Performs cell-wise equality test between two tiles.", @@ -39,7 +39,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Equal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Equal(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_equal" protected def op(left: Tile, right: Tile): Tile = left.localEqual(right) protected def op(left: Tile, right: Double): Tile = left.localEqual(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala index 01d45e19d..21f57d1f6 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.DataType -import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} +import org.locationtech.rasterframes.expressions.{UnaryRasterOp, fpTile} @ExpressionDescription( usage = "_FUNC_(tile) - Performs cell-wise exponential.", @@ -38,7 +38,7 @@ import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} > SELECT _FUNC_(tile); ...""" ) -case class Exp(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Exp(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_exp" protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E) @@ -59,7 +59,7 @@ object Exp { > SELECT _FUNC_(tile); ...""" ) -case class Exp10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Exp10(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_log10" override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(10.0) @@ -80,7 +80,7 @@ object Exp10 { > SELECT _FUNC_(tile); ...""" ) -case class Exp2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Exp2(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_exp2" protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(2.0) @@ -101,7 +101,7 @@ object Exp2 { > SELECT _FUNC_(tile); ...""" ) -case class ExpM1(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class ExpM1(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_expm1" protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E).localSubtract(1.0) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala index b318329fc..e820f94f5 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(lhs, rhs) - Performs cell-wise greater-than (>) test between two tiles.", @@ -38,7 +38,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Greater(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Greater(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_greater" protected def op(left: Tile, right: Tile): Tile = left.localGreater(right) protected def op(left: Tile, right: Double): Tile = left.localGreater(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala index e4d1dcfc1..dd33e3415 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(lhs, rhs) - Performs cell-wise greater-than-or-equal (>=) test between two tiles.", @@ -39,7 +39,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class GreaterEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class GreaterEqual(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_greater_equal" protected def op(left: Tile, right: Tile): Tile = left.localGreaterOrEqual(right) protected def op(left: Tile, right: Double): Tile = left.localGreaterOrEqual(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala index 001688a1c..418ddf780 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala @@ -25,7 +25,7 @@ import geotrellis.raster.Tile import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} @ExpressionDescription( usage = "_FUNC_(tile) - Return the given tile or projected raster unchanged. Useful in debugging round-trip serialization across various language and memory boundaries.", @@ -37,7 +37,7 @@ import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterO > SELECT _FUNC_(tile); ...""" ) -case class Identity(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback { +case class Identity(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { override def nodeName: String = "rf_identity" def na: Any = null protected def op(t: Tile): Tile = t diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala index 76543e34e..8f5ac719f 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(lhs, rhs) - Performs cell-wise less-than (<) test between two tiles.", @@ -38,7 +38,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Less(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Less(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_less" protected def op(left: Tile, right: Tile): Tile = left.localLess(right) protected def op(left: Tile, right: Double): Tile = left.localLess(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala index 116b3c712..ae51ab2f1 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(lhs, rhs) - Performs cell-wise less-than-or-equal (<=) test between two tiles.", @@ -39,7 +39,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class LessEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class LessEqual(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_less_equal" protected def op(left: Tile, right: Tile): Tile = left.localLessOrEqual(right) protected def op(left: Tile, right: Double): Tile = left.localLessOrEqual(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala index c428cc922..2ebd84412 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.DataType -import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} +import org.locationtech.rasterframes.expressions.{UnaryRasterOp, fpTile} @ExpressionDescription( usage = "_FUNC_(tile) - Performs cell-wise natural logarithm.", @@ -38,7 +38,7 @@ import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} > SELECT _FUNC_(tile); ...""" ) -case class Log(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Log(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "log" protected def op(tile: Tile): Tile = fpTile(tile).localLog() @@ -59,7 +59,7 @@ object Log { > SELECT _FUNC_(tile); ...""" ) -case class Log10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Log10(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_log10" protected def op(tile: Tile): Tile = fpTile(tile).localLog10() @@ -80,7 +80,7 @@ object Log10 { > SELECT _FUNC_(tile); ...""" ) -case class Log2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Log2(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_log2" protected def op(tile: Tile): Tile = fpTile(tile).localLog() / math.log(2.0) @@ -101,7 +101,7 @@ object Log2 { > SELECT _FUNC_(tile); ...""" ) -case class Log1p(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Log1p(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_log1p" protected def op(tile: Tile): Tile = fpTile(tile).localAdd(1.0).localLog() diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Max.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Max.scala index b68e49955..01019543f 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Max.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Max.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(tile, rhs) - Performs cell-wise maximum two tiles or a tile and a scalar.", @@ -41,7 +41,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Max(left: Expression, right:Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Max(left: Expression, right:Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName = "rf_local_max" protected def op(left: Tile, right: Tile): Tile = left.localMax(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Min.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Min.scala index 0af8b3117..171812929 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Min.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Min.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(tile, rhs) - Performs cell-wise minimum two tiles or a tile and a scalar.", @@ -41,7 +41,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Min(left: Expression, right:Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Min(left: Expression, right:Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName = "rf_local_min" protected def op(left: Tile, right: Tile): Tile = left.localMin(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala index 4dc7e8548..7bf3367d4 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(tile, rhs) - Performs cell-wise multiplication between two tiles or a tile and a scalar.", @@ -41,7 +41,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Multiply(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Multiply(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_multiply" protected def op(left: Tile, right: Tile): Tile = left.localMultiply(right) protected def op(left: Tile, right: Double): Tile = left.localMultiply(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala index 90bf4b508..d4238c27f 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala @@ -25,7 +25,7 @@ import geotrellis.raster.Tile import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} @ExpressionDescription( usage = "_FUNC_(tile) - Round cell values to the nearest integer without changing the cell type.", @@ -37,7 +37,7 @@ import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterO > SELECT _FUNC_(tile); ...""" ) -case class Round(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback { +case class Round(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { override def nodeName: String = "rf_round" def na: Any = null protected def op(child: Tile): Tile = child.localRound() diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Sqrt.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Sqrt.scala index d8e86fb34..ad3ed376d 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Sqrt.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Sqrt.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.DataType -import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} +import org.locationtech.rasterframes.expressions.{UnaryRasterOp, fpTile} @ExpressionDescription( usage = "_FUNC_(tile) - Perform cell-wise square root", @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} > SELECT _FUNC_(tile) ... """ ) -case class Sqrt(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { +case class Sqrt(child: Expression) extends UnaryRasterOp with CodegenFallback { override val nodeName: String = "rf_sqrt" protected def op(tile: Tile): Tile = fpTile(tile).localPow(0.5) override def dataType: DataType = child.dataType diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala index 645049ce2..708e7e207 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(tile, rhs) - Performs cell-wise subtraction between two tiles or a tile and a scalar.", @@ -41,7 +41,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Subtract(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Subtract(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_subtract" protected def op(left: Tile, right: Tile): Tile = left.localSubtract(right) protected def op(left: Tile, right: Double): Tile = left.localSubtract(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Undefined.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Undefined.scala index fb146451f..bd533f4b7 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Undefined.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Undefined.scala @@ -25,7 +25,7 @@ import geotrellis.raster.Tile import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} @ExpressionDescription( usage = "_FUNC_(tile) - Return a tile with ones where the input is NoData, otherwise zero.", @@ -37,7 +37,7 @@ import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterO > SELECT _FUNC_(tile); ...""" ) -case class Undefined(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback { +case class Undefined(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { override def nodeName: String = "rf_local_no_data" def na: Any = null protected def op(child: Tile): Tile = child.localUndefined() diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala index 2cdc30292..9bab9b86b 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit -import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.BinaryRasterFunction @ExpressionDescription( usage = "_FUNC_(lhs, rhs) - Performs cell-wise inequality test between two tiles.", @@ -39,7 +39,7 @@ import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp > SELECT _FUNC_(tile1, tile2); ...""" ) -case class Unequal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { +case class Unequal(left: Expression, right: Expression) extends BinaryRasterFunction with CodegenFallback { override val nodeName: String = "rf_local_unequal" protected def op(left: Tile, right: Tile): Tile = left.localUnequal(right) protected def op(left: Tile, right: Double): Tile = left.localUnequal(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala index 1fd99725e..9fa191ae4 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala @@ -34,6 +34,7 @@ import org.locationtech.rasterframes.expressions.aggregates.CellCountAggregate.D import org.locationtech.rasterframes.expressions.aggregates._ import org.locationtech.rasterframes.expressions.generators._ import org.locationtech.rasterframes.expressions.localops._ +import org.locationtech.rasterframes.expressions.focalops._ import org.locationtech.rasterframes.expressions.tilestats._ import org.locationtech.rasterframes.expressions.transformers._ @@ -137,6 +138,19 @@ package object expressions { registry.registerExpression[LocalCountAggregate.LocalNoDataCellsUDAF]("rf_agg_local_no_data_cells") registry.registerExpression[LocalMeanAggregate]("rf_agg_local_mean") + registry.registerExpression[FocalMax](FocalMax.name) + registry.registerExpression[FocalMin](FocalMin.name) + registry.registerExpression[FocalMean](FocalMean.name) + registry.registerExpression[FocalMode](FocalMode.name) + registry.registerExpression[FocalMedian](FocalMedian.name) + registry.registerExpression[FocalMoransI](FocalMoransI.name) + registry.registerExpression[FocalStdDev](FocalStdDev.name) + registry.registerExpression[Convolve](Convolve.name) + + registry.registerExpression[Slope](Slope.name) + registry.registerExpression[Aspect](Aspect.name) + registry.registerExpression[Hillshade](Hillshade.name) + registry.registerExpression[Mask.MaskByDefined]("rf_mask") registry.registerExpression[Mask.InverseMaskByDefined]("rf_inverse_mask") registry.registerExpression[Mask.MaskByValue]("rf_mask_by_value") diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala index a27b78328..52dc8c1ed 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.tilestats import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterFunction} import geotrellis.raster._ import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); 357""" ) -case class DataCells(child: Expression) extends UnaryRasterOp with CodegenFallback with NullToValue { +case class DataCells(child: Expression) extends UnaryRasterFunction with CodegenFallback with NullToValue { override def nodeName: String = "rf_data_cells" def dataType: DataType = LongType protected def eval(tile: Tile, ctx: Option[TileContext]): Any = DataCells.op(tile) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala index 1fa187409..ebb2156d7 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala @@ -7,7 +7,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, TypedColumn} import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ import org.locationtech.rasterframes.isCellTrue -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext import spire.syntax.cfor.cfor @@ -24,7 +24,7 @@ import spire.syntax.cfor.cfor true """ ) -case class Exists(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class Exists(child: Expression) extends UnaryRasterFunction with CodegenFallback { override def nodeName: String = "exists" def dataType: DataType = BooleanType protected def eval(tile: Tile, ctx: Option[TileContext]): Any = Exists.op(tile) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala index a49888845..f553de047 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala @@ -7,7 +7,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, TypedColumn} import org.locationtech.rasterframes._ import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext import spire.syntax.cfor.cfor @@ -24,7 +24,7 @@ import spire.syntax.cfor.cfor true """ ) -case class ForAll(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class ForAll(child: Expression) extends UnaryRasterFunction with CodegenFallback { override def nodeName: String = "for_all" def dataType: DataType = BooleanType protected def eval(tile: Tile, ctx: Option[TileContext]): Any = ForAll.op(tile) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala index f796e6019..e03b96194 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.tilestats import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterFunction} import geotrellis.raster._ import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); false""" ) -case class IsNoDataTile(child: Expression) extends UnaryRasterOp +case class IsNoDataTile(child: Expression) extends UnaryRasterFunction with CodegenFallback with NullToValue { override def nodeName: String = "rf_is_no_data_tile" def na: Any = true diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala index 2601bc4ae..556abd715 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.tilestats import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterFunction} import geotrellis.raster._ import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); 12""" ) -case class NoDataCells(child: Expression) extends UnaryRasterOp with CodegenFallback with NullToValue { +case class NoDataCells(child: Expression) extends UnaryRasterFunction with CodegenFallback with NullToValue { override def nodeName: String = "rf_no_data_cells" def dataType: DataType = LongType protected def eval(tile: Tile, ctx: Option[TileContext]): Any = NoDataCells.op(tile) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala index 9e1861cda..9e3ff1f8c 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.tilestats import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import geotrellis.raster._ import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile5); 2135.34""" ) -case class Sum(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class Sum(child: Expression) extends UnaryRasterFunction with CodegenFallback { override def nodeName: String = "rf_tile_sum" def dataType: DataType = DoubleType protected def eval(tile: Tile, ctx: Option[TileContext]): Any = Sum.op(tile) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala index 567216ac5..a4a5fffa3 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( @@ -41,7 +41,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); ...""" ) -case class TileHistogram(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class TileHistogram(child: Expression) extends UnaryRasterFunction with CodegenFallback { override def nodeName: String = "rf_tile_histogram" protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileHistogram.converter(TileHistogram.op(tile)) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala index 8d3cd285a..cbbe1a52c 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.tilestats import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterFunction} import geotrellis.raster.{Tile, isData} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); 1""" ) -case class TileMax(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { +case class TileMax(child: Expression) extends UnaryRasterFunction with NullToValue with CodegenFallback { override def nodeName: String = "rf_tile_max" protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMax.op(tile) def dataType: DataType = DoubleType diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala index 5fb7b1805..2f0bdedb5 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.tilestats import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterFunction} import geotrellis.raster.{Tile, isData} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); -1""" ) -case class TileMean(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { +case class TileMean(child: Expression) extends UnaryRasterFunction with NullToValue with CodegenFallback { override def nodeName: String = "rf_tile_mean" protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMean.op(tile) def dataType: DataType = DoubleType diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala index 66698824e..c3d26fb4a 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.tilestats import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterFunction} import geotrellis.raster.{Tile, isData} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -40,7 +40,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); -1""" ) -case class TileMin(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { +case class TileMin(child: Expression) extends UnaryRasterFunction with NullToValue with CodegenFallback { override def nodeName: String = "rf_tile_min" protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMin.op(tile) def dataType: DataType = DoubleType diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala index 2ef501faa..ebf6bf67c 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( @@ -41,7 +41,7 @@ import org.locationtech.rasterframes.model.TileContext > SELECT _FUNC_(tile); ...""" ) -case class TileStats(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class TileStats(child: Expression) extends UnaryRasterFunction with CodegenFallback { override def nodeName: String = "rf_tile_stats" protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileStats.converter(TileStats.op(tile).orNull) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala index 5f54506df..76be3ba16 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala @@ -29,11 +29,11 @@ import org.apache.spark.sql.types.{DataType, StringType} import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.unsafe.types.UTF8String import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext import spire.syntax.cfor.cfor -abstract class DebugRender(asciiArt: Boolean) extends UnaryRasterOp with CodegenFallback with Serializable { +abstract class DebugRender(asciiArt: Boolean) extends UnaryRasterFunction with CodegenFallback with Serializable { import org.locationtech.rasterframes.expressions.transformers.DebugRender.TileAsMatrix def dataType: DataType = StringType diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RenderPNG.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RenderPNG.scala index a896a4342..9d3639910 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RenderPNG.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RenderPNG.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescript import org.apache.spark.sql.types.{BinaryType, DataType} import org.apache.spark.sql.{Column, TypedColumn} import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext /** @@ -36,7 +36,7 @@ import org.locationtech.rasterframes.model.TileContext * @param child tile column * @param ramp color ramp to use for non-composite tiles. */ -abstract class RenderPNG(child: Expression, ramp: Option[ColorRamp]) extends UnaryRasterOp with CodegenFallback with Serializable { +abstract class RenderPNG(child: Expression, ramp: Option[ColorRamp]) extends UnaryRasterFunction with CodegenFallback with Serializable { def dataType: DataType = BinaryType protected def eval(tile: Tile, ctx: Option[TileContext]): Any = { val png = ramp.map(tile.renderPng).getOrElse(tile.renderPng()) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala index 6e52ed9ca..a856b917b 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala @@ -22,7 +22,7 @@ package org.locationtech.rasterframes.expressions.transformers import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -37,7 +37,7 @@ import org.locationtech.rasterframes.model.TileContext Arguments: * tile - tile to convert""" ) -case class TileToArrayDouble(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class TileToArrayDouble(child: Expression) extends UnaryRasterFunction with CodegenFallback { override def nodeName: String = "rf_tile_to_array_double" def dataType: DataType = DataTypes.createArrayType(DoubleType, false) protected def eval(tile: Tile, ctx: Option[TileContext]): Any = diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala index 07b5dc58b..e6bbbd4a7 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types.{DataType, DataTypes, IntegerType} import org.apache.spark.sql.{Column, TypedColumn} import org.locationtech.rasterframes.encoders.SparkBasicEncoders._ -import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.expressions.UnaryRasterFunction import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( @@ -37,7 +37,7 @@ import org.locationtech.rasterframes.model.TileContext Arguments: * tile - tile to convert""" ) -case class TileToArrayInt(child: Expression) extends UnaryRasterOp with CodegenFallback { +case class TileToArrayInt(child: Expression) extends UnaryRasterFunction with CodegenFallback { override def nodeName: String = "rf_tile_to_array_int" def dataType: DataType = DataTypes.createArrayType(IntegerType, false) protected def eval(tile: Tile, ctx: Option[TileContext]): Any = diff --git a/core/src/main/scala/org/locationtech/rasterframes/functions/FocalFunctions.scala b/core/src/main/scala/org/locationtech/rasterframes/functions/FocalFunctions.scala new file mode 100644 index 000000000..cdfe8e18d --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/functions/FocalFunctions.scala @@ -0,0 +1,95 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.functions + +import geotrellis.raster.Neighborhood +import geotrellis.raster.mapalgebra.focal.Kernel +import org.apache.spark.sql.Column +import org.apache.spark.sql.functions.lit +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.serialized_literal +import org.locationtech.rasterframes.expressions.focalops._ + +trait FocalFunctions { + def rf_focal_mean(tileCol: Column, neighborhood: Neighborhood): Column = + rf_focal_mean(tileCol, serialized_literal(neighborhood)) + + def rf_focal_mean(tileCol: Column, neighborhoodCol: Column): Column = + FocalMean(tileCol, neighborhoodCol) + + def rf_focal_median(tileCol: Column, neighborhood: Neighborhood): Column = + rf_focal_median(tileCol, serialized_literal(neighborhood)) + + def rf_focal_median(tileCol: Column, neighborhoodCol: Column): Column = + FocalMedian(tileCol, neighborhoodCol) + + def rf_focal_mode(tileCol: Column, neighborhood: Neighborhood): Column = + rf_focal_mode(tileCol, serialized_literal(neighborhood)) + + def rf_focal_mode(tileCol: Column, neighborhoodCol: Column): Column = + FocalMode(tileCol, neighborhoodCol) + + def rf_focal_max(tileCol: Column, neighborhood: Neighborhood): Column = + rf_focal_max(tileCol, serialized_literal(neighborhood)) + + def rf_focal_max(tileCol: Column, neighborhoodCol: Column): Column = + FocalMax(tileCol, neighborhoodCol) + + def rf_focal_min(tileCol: Column, neighborhood: Neighborhood): Column = + rf_focal_min(tileCol, serialized_literal(neighborhood)) + + def rf_focal_min(tileCol: Column, neighborhoodCol: Column): Column = + FocalMin(tileCol, neighborhoodCol) + + def rf_focal_stddev(tileCol: Column, neighborhood: Neighborhood): Column = + rf_focal_stddev(tileCol, serialized_literal(neighborhood)) + + def rf_focal_stddev(tileCol: Column, neighborhoodCol: Column): Column = + FocalStdDev(tileCol, neighborhoodCol) + + def rf_focal_moransi(tileCol: Column, neighborhood: Neighborhood): Column = + rf_focal_moransi(tileCol, serialized_literal(neighborhood)) + + def rf_focal_moransi(tileCol: Column, neighborhoodCol: Column): Column = + FocalMoransI(tileCol, neighborhoodCol) + + def rf_convolve(tileCol: Column, kernel: Kernel): Column = + rf_convolve(tileCol, serialized_literal(kernel)) + + def rf_convolve(tileCol: Column, kernelCol: Column): Column = + Convolve(tileCol, kernelCol) + + def rf_slope[T: Numeric](tileCol: Column, zFactor: T): Column = + rf_slope(tileCol, lit(zFactor)) + + def rf_slope(tileCol: Column, zFactorCol: Column): Column = + Slope(tileCol, zFactorCol) + + def rf_aspect(tileCol: Column): Column = + Aspect(tileCol) + + def rf_hillshade[T: Numeric](tileCol: Column, azimuth: T, altitude: T, zFactor: T): Column = + rf_hillshade(tileCol, lit(azimuth), lit(altitude), lit(zFactor)) + + def rf_hillshade(tileCol: Column, azimuth: Column, altitude: Column, zFactor: Column): Column = + Hillshade(tileCol, azimuth, altitude, zFactor) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala index 04497f489..fbc567e9d 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala @@ -24,7 +24,7 @@ package org.locationtech.rasterframes.ref import com.typesafe.scalalogging.LazyLogging import frameless.TypedExpressionEncoder import geotrellis.proj4.CRS -import geotrellis.raster.{CellType, GridBounds, Tile} +import geotrellis.raster.{BufferTile, CellType, GridBounds, Tile} import geotrellis.vector.Extent import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.locationtech.rasterframes.tiles.ProjectedRasterTile @@ -34,35 +34,49 @@ import org.locationtech.rasterframes.tiles.ProjectedRasterTile * * @since 8/21/18 */ -case class RasterRef(source: RFRasterSource, bandIndex: Int, subextent: Option[Extent], subgrid: Option[Subgrid]) extends ProjectedRasterTile { +case class RasterRef(source: RFRasterSource, bandIndex: Int, subextent: Option[Extent], subgrid: Option[Subgrid], bufferSize: Short) extends ProjectedRasterTile { def tile: Tile = this def extent: Extent = subextent.getOrElse(source.extent) def crs: CRS = source.crs - def delegate = realizedTile + def delegate: BufferTile = realizedTile override def cols: Int = grid.width override def rows: Int = grid.height override def cellType: CellType = source.cellType - protected lazy val grid: GridBounds[Int] = - subgrid.map(_.toGridBounds).getOrElse(source.rasterExtent.gridBoundsFor(extent, true)) + protected lazy val grid: GridBounds[Int] = subgrid.map(_.toGridBounds).getOrElse(source.rasterExtent.gridBoundsFor(extent, true)) - lazy val realizedTile: Tile = { - RasterRef.log.trace(s"Fetching $extent ($grid) from band $bandIndex of $source") - source.read(grid, Seq(bandIndex)).tile.band(0) + lazy val realizedTile: BufferTile = { + RasterRef.log.trace(s"Fetching $extent ($grid) from band $bandIndex of $source with bufferSize: $bufferSize") + // Pixel bounds we would like to read, including buffer + val bufferedGrid = grid.buffer(bufferSize) + + // Pixel bounds we can read, including buffer + val possibleGrid = bufferedGrid.intersection(source.gridBounds).get + // Pixel bounds of center/non-buffer pixels in read tile + val tileCenterBounds = grid.offset( + colOffset = - possibleGrid.colMin, + rowOffset = - possibleGrid.rowMin + ) + + val raster = source.read(possibleGrid, Seq(bandIndex)).mapTile(_.band(0)) + BufferTile(raster.tile, tileCenterBounds) } - override def toString: String = s"RasterRef($source,$bandIndex,$cellType)" + override def toString: String = s"RasterRef($source, $bandIndex, $cellType, $subextent, $subgrid, $bufferSize)" } object RasterRef extends LazyLogging { private val log = logger def apply(source: RFRasterSource, bandIndex: Int): RasterRef = - RasterRef(source, bandIndex, None, None) + RasterRef(source, bandIndex, None, None, 0) def apply(source: RFRasterSource, bandIndex: Int, subextent: Extent, subgrid: GridBounds[Int]): RasterRef = - RasterRef(source, bandIndex, Some(subextent), Some(Subgrid(subgrid))) + RasterRef(source, bandIndex, Some(subextent), Some(Subgrid(subgrid)), 0) + + def apply(source: RFRasterSource, bandIndex: Int, subextent: Option[Extent], subgrid: Option[Subgrid]): RasterRef = + new RasterRef(source, bandIndex, subextent, subgrid, 0) implicit val rasterRefEncoder: ExpressionEncoder[RasterRef] = TypedExpressionEncoder[RasterRef].asInstanceOf[ExpressionEncoder[RasterRef]] diff --git a/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala index 564664211..4bee10992 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala @@ -37,9 +37,10 @@ trait ProjectedRasterTile extends DelegatingTile with ProjectedRasterLike with D def tile: Tile def extent: Extent def crs: CRS + def delegate: Tile def projectedExtent: ProjectedExtent = ProjectedExtent(extent, crs) - def projectedRaster: ProjectedRaster[Tile] = ProjectedRaster[Tile](this, extent, crs) - def mapTile(f: Tile => Tile): ProjectedRasterTile = ProjectedRasterTile(f(this), extent, crs) + def projectedRaster: ProjectedRaster[Tile] = ProjectedRaster[Tile](delegate, extent, crs) + def mapTile(f: Tile => Tile): ProjectedRasterTile = ProjectedRasterTile(f(delegate), extent, crs) } object ProjectedRasterTile { @@ -55,8 +56,7 @@ object ProjectedRasterTile { } } - def unapply(prt: ProjectedRasterTile): Option[(Tile, Extent, CRS)] = - Some((prt.tile, prt.extent, prt.crs)) + def unapply(prt: ProjectedRasterTile): Option[(Tile, Extent, CRS)] = Some((prt.tile, prt.extent, prt.crs)) implicit lazy val projectedRasterTileEncoder: ExpressionEncoder[ProjectedRasterTile] = ExpressionEncoder() } diff --git a/core/src/main/scala/org/locationtech/rasterframes/util/package.scala b/core/src/main/scala/org/locationtech/rasterframes/util/package.scala index 4f91873d7..2bcaa53a6 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/util/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/package.scala @@ -185,6 +185,56 @@ package object util extends DataFrameRenderers { def apply() = mapping.keys.toSeq } + object FocalNeighborhood { + import scala.util.Try + import geotrellis.raster.Neighborhood + import geotrellis.raster.mapalgebra.focal._ + + // pattern matching and string interpolation works only since Scala 2.13 + def fromString(name: String): Try[Neighborhood] = Try { + name.toLowerCase().trim() match { + case s if s.startsWith("square-") => Square(Integer.parseInt(s.split("square-").last)) + case s if s.startsWith("circle-") => Circle(java.lang.Double.parseDouble(s.split("circle-").last)) + case s if s.startsWith("nesw-") => Nesw(Integer.parseInt(s.split("nesw-").last)) + case s if s.startsWith("wedge-") => { + val List(radius: Double, startAngle: Double, endAngle: Double) = + s + .split("wedge-") + .last + .split("-") + .toList + .map(java.lang.Double.parseDouble) + + Wedge(radius, startAngle, endAngle) + } + + case s if s.startsWith("annulus-") => { + val List(innerRadius: Double, outerRadius: Double) = + s + .split("annulus-") + .last + .split("-") + .toList + .map(java.lang.Double.parseDouble) + + Annulus(innerRadius, outerRadius) + } + case _ => throw new IllegalArgumentException(s"Unrecognized Neighborhood $name") + } + } + + def apply(neighborhood: Neighborhood): String = { + neighborhood match { + case Square(e) => s"square-$e" + case Circle(e) => s"circle-$e" + case Nesw(e) => s"nesw-$e" + case Wedge(radius, startAngle, endAngle) => s"nesw-$radius-$startAngle-$endAngle" + case Annulus(innerRadius, outerRadius) => s"annulus-$innerRadius-$outerRadius" + case _ => throw new IllegalArgumentException(s"Unrecognized Neighborhood ${neighborhood.toString}") + } + } + } + object ResampleMethod { import geotrellis.raster.resample.{ResampleMethod => GTResampleMethod, _} def unapply(name: String): Option[GTResampleMethod] = { @@ -217,7 +267,7 @@ package object util extends DataFrameRenderers { case Max => "max" case Min => "min" case Sum => "sum" - case _ => throw new IllegalArgumentException(s"Unrecogized ResampleMethod ${gtr.toString()}") + case _ => throw new IllegalArgumentException(s"Unrecognized ResampleMethod ${gtr.toString()}") } } } diff --git a/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala b/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala index 19e843875..953881cbd 100644 --- a/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala @@ -43,8 +43,7 @@ import org.scalatest.matchers.should.Matchers import org.scalatest.matchers.{MatchResult, Matcher} import org.slf4j.LoggerFactory -trait TestEnvironment extends AnyFunSpec - with Matchers with Inspectors with Tolerance with RasterMatchers { +trait TestEnvironment extends AnyFunSpec with Matchers with Inspectors with Tolerance with RasterMatchers { @transient protected lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) @@ -57,16 +56,20 @@ trait TestEnvironment extends AnyFunSpec // allow 2 retries, should stabilize CI builds. https://spark.apache.org/docs/2.4.7/submitting-applications.html#master-urls def sparkMaster: String = "local[*, 2]" - def additionalConf = new SparkConf(false) - - implicit val spark: SparkSession = { - val session = SparkSession.builder - .master(sparkMaster) - .withKryoSerialization - .config(additionalConf) - .getOrCreate() - session.withRasterFrames - } + def additionalConf: SparkConf = + new SparkConf(false) + .set("spark.driver.port", "0") + .set("spark.hostPort", "0") + .set("spark.ui.enabled", "false") + + implicit val spark: SparkSession = + SparkSession + .builder + .master(sparkMaster) + .withKryoSerialization + .config(additionalConf) + .getOrCreate() + .withRasterFrames implicit def sc: SparkContext = spark.sparkContext diff --git a/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala index 1b2b931e1..95fc4fb41 100644 --- a/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala @@ -23,10 +23,9 @@ package org.locationtech.rasterframes.encoders import java.io.File import java.net.URI - import geotrellis.layer._ import geotrellis.proj4._ -import geotrellis.raster.{ArrayTile, CellType, Raster, Tile} +import geotrellis.raster.{ArrayTile, BufferTile, CellType, GridBounds, Raster, Tile} import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.spark.SparkConf import org.apache.spark.sql.Row @@ -43,7 +42,6 @@ import org.locationtech.rasterframes.tiles.ProjectedRasterTile */ class EncodingSpec extends TestEnvironment with TestData { - import spark.implicits._ describe("Spark encoding on standard types") { @@ -58,6 +56,19 @@ class EncodingSpec extends TestEnvironment with TestData { } } + it("should serialize BufferTile") { + val tileUDT = new TileUDT() + val tile = one.tile + val expected = BufferTile(tile, GridBounds(tile.dimensions)) + val actual = tileUDT.deserialize(tileUDT.serialize(expected)) + + assert(actual.isInstanceOf[BufferTile] === true) + val actualBufferTile = actual.asInstanceOf[BufferTile] + + actualBufferTile.gridBounds shouldBe expected.gridBounds + assertEqual(actualBufferTile.sourceTile, expected.sourceTile) + } + it("should code RDD[Tile]") { val rdd = sc.makeRDD(Seq(byteArrayTile: Tile, null)) val ds = rdd.toDF("tile") @@ -65,6 +76,20 @@ class EncodingSpec extends TestEnvironment with TestData { assert(ds.toDF.as[Tile].collect().head === byteArrayTile) } + it("should code RDD[BufferTile]") { + val tile = one.tile + val expected = BufferTile(tile, GridBounds(tile.dimensions)) + val ds = Seq(expected: Tile).toDS() + write(ds) + val actual = ds.toDF.as[Tile].first() + + assert(actual.isInstanceOf[BufferTile] === true) + val actualBufferTile = actual.asInstanceOf[BufferTile] + + actualBufferTile.gridBounds shouldBe expected.gridBounds + assertEqual(actualBufferTile.sourceTile, expected.sourceTile) + } + it("should code RDD[(Int, Tile)]") { val ds = Seq((1, byteArrayTile: Tile), (2, null)).toDS write(ds) diff --git a/core/src/test/scala/org/locationtech/rasterframes/functions/FocalFunctionsSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/functions/FocalFunctionsSpec.scala new file mode 100644 index 000000000..73271bb35 --- /dev/null +++ b/core/src/test/scala/org/locationtech/rasterframes/functions/FocalFunctionsSpec.scala @@ -0,0 +1,346 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.functions + +import geotrellis.raster.mapalgebra.focal.{Circle, Kernel, Square} +import geotrellis.raster.{BufferTile, CellSize} +import geotrellis.raster.testkit.RasterMatchers +import org.locationtech.rasterframes.ref.{RFRasterSource, RasterRef, Subgrid} +import org.locationtech.rasterframes.tiles.ProjectedRasterTile +import org.locationtech.rasterframes._ +import geotrellis.raster.Tile +import geotrellis.raster.mapalgebra.local.Implicits._ +import org.locationtech.rasterframes.encoders.serialized_literal + +import java.nio.file.Paths + +class FocalFunctionsSpec extends TestEnvironment with RasterMatchers { + + import spark.implicits._ + + describe("focal operations") { + lazy val path = + if(Paths.get("").toUri.toString.endsWith("core/")) Paths.get("src/test/resources/L8-B7-Elkton-VA.tiff").toUri + else Paths.get("core/src/test/resources/L8-B7-Elkton-VA.tiff").toUri + + lazy val src = RFRasterSource(path) + lazy val fullTile = src.read(src.extent).tile.band(0) + + // read a smaller region to read + lazy val subGridBounds = src.gridBounds.buffer(-10) + // read the region above, but buffered + lazy val bufferedRaster = new RasterRef(src, 0, None, Some(Subgrid(subGridBounds)), 10) + + lazy val bt = BufferTile(fullTile, subGridBounds) + lazy val btCellSize = CellSize(src.extent, bt.cols, bt.rows) + + lazy val df = Seq(Option(ProjectedRasterTile(bufferedRaster, src.extent, src.crs))).toDF("proj_raster").cache() + + it("should perform focal mean") { + checkDocs("rf_focal_mean") + val actual = + df + .select(rf_focal_mean($"proj_raster", Square(1))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_focal_mean(proj_raster, 'square-1')") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.focalMean(Square(1)), actual) + assertEqual(fullTile.focalMean(Square(1)).crop(subGridBounds), actual) + } + it("should perform focal median") { + checkDocs("rf_focal_median") + val actual = + df + .select(rf_focal_median($"proj_raster", Square(1))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_focal_median(proj_raster, 'square-1')") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.focalMedian(Square(1)), actual) + assertEqual(fullTile.focalMedian(Square(1)).crop(subGridBounds), actual) + } + it("should perform focal mode") { + checkDocs("rf_focal_mode") + val actual = + df + .select(rf_focal_mode($"proj_raster", Square(1))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_focal_mode(proj_raster, 'square-1')") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.focalMode(Square(1)), actual) + assertEqual(fullTile.focalMode(Square(1)).crop(subGridBounds), actual) + } + it("should perform focal max") { + checkDocs("rf_focal_max") + val actual = + df + .select(rf_focal_max($"proj_raster", Square(1))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_focal_max(proj_raster, 'square-1')") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.focalMax(Square(1)), actual) + assertEqual(fullTile.focalMax(Square(1)).crop(subGridBounds), actual) + } + it("should perform focal min") { + checkDocs("rf_focal_min") + val actual = + df + .select(rf_focal_min($"proj_raster", Square(1))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_focal_min(proj_raster, 'square-1')") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.focalMin(Square(1)), actual) + assertEqual(fullTile.focalMin(Square(1)).crop(subGridBounds), actual) + } + it("should perform focal stddev") { + checkDocs("rf_focal_moransi") + val actual = + df + .select(rf_focal_stddev($"proj_raster", Square(1))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_focal_stddev(proj_raster, 'square-1')") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.focalStandardDeviation(Square(1)), actual) + assertEqual(fullTile.focalStandardDeviation(Square(1)).crop(subGridBounds), actual) + } + it("should perform focal Moran's I") { + checkDocs("rf_focal_moransi") + val actual = + df + .select(rf_focal_moransi($"proj_raster", Square(1))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_focal_moransi(proj_raster, 'square-1')") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.tileMoransI(Square(1)), actual) + assertEqual(fullTile.tileMoransI(Square(1)).crop(subGridBounds), actual) + } + it("should perform convolve") { + checkDocs("rf_convolve") + val actual = + df + .select(rf_convolve($"proj_raster", Kernel(Circle(2d)))) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .withColumn("kernel", serialized_literal(Kernel(Circle(2d)))) + .selectExpr(s"rf_convolve(proj_raster, kernel)") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.convolve(Kernel(Circle(2d))), actual) + assertEqual(fullTile.convolve(Kernel(Circle(2d))).crop(subGridBounds), actual) + } + it("should perform slope") { + checkDocs("rf_slope") + val actual = + df + .select(rf_slope($"proj_raster", 1d)) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_slope(proj_raster, 1)") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.slope(btCellSize, 1d), actual) + assertEqual(fullTile.slope(btCellSize, 1d).crop(subGridBounds), actual) + } + it("should perform aspect") { + checkDocs("rf_aspect") + val actual = + df + .select(rf_aspect($"proj_raster")) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_aspect(proj_raster)") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.aspect(btCellSize), actual) + assertEqual(fullTile.aspect(btCellSize).crop(subGridBounds), actual) + } + it("should perform hillshade") { + checkDocs("rf_hillshade") + val actual = + df + .select(rf_hillshade($"proj_raster", 315, 45, 1)) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val actualExpr = + df + .selectExpr(s"rf_hillshade(proj_raster, 315, 45, 1)") + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + assertEqual(actual, actualExpr) + assertEqual(bt.mapTile(_.hillshade(btCellSize, 315, 45, 1)), actual) + assertEqual(fullTile.hillshade(btCellSize, 315, 45, 1).crop(subGridBounds), actual) + } + // that is the original use case + // to read a buffered source, perform a focal operation + // the followup functions would work with the buffered tile as + // with a regular tile without a buffer (all ops will work within the window) + it("should perform a focal operation and a valid local operation after that") { + val actual = + df + .select(rf_aspect($"proj_raster").as("aspect")) + .select(rf_local_add($"aspect", $"aspect")) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + val a: Tile = bt.aspect(btCellSize) + assertEqual(a.localAdd(a), actual) + } + + // if we read a buffered tile the local buffer would preserve the buffer information + // however rf_local_* functions don't preserve that type information + // and the Buffer Tile is upcasted into the Tile and stored as a regular tile (within the buffer, with the buffer lost) + // the follow up focal operation would be non buffered + it("should perform a local operation and a valid focal operation after that with the buffer lost") { + val actual = + df + .select(rf_local_add($"proj_raster", $"proj_raster") as "added") + .select(rf_aspect($"added")) + .as[Option[ProjectedRasterTile]] + .first() + .get + .tile + + // that's what we would like eventually + // val expected = bt.localAdd(bt) match { + // case b: BufferTile => b.aspect(btCellSize) + // case _ => throw new Exception("Not a Buffer Tile") + // } + + // that's what we have actually + // even though local ops can preserve the output tile + // we don't handle that + val expected = bt.localAdd(bt).aspect(btCellSize) + assertEqual(expected, actual) + } + } +} diff --git a/core/src/test/scala/org/locationtech/rasterframes/functions/LocalFunctionsSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/functions/LocalFunctionsSpec.scala index 3a7d13321..ee8940b61 100644 --- a/core/src/test/scala/org/locationtech/rasterframes/functions/LocalFunctionsSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/functions/LocalFunctionsSpec.scala @@ -21,7 +21,6 @@ package org.locationtech.rasterframes.functions -import geotrellis.raster.testkit.RasterMatchers import org.locationtech.rasterframes.TestEnvironment import geotrellis.raster._ import geotrellis.raster.testkit.RasterMatchers diff --git a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala index aea2d13ae..f63cbc9fc 100644 --- a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala @@ -234,7 +234,7 @@ class RasterRefSpec extends TestEnvironment with TestData { import RasterRef.rasterRefEncoder // This shouldn't be required, but product encoder gets choosen. val r: RasterRef = subRaster val df = Seq(r).toDF() - val result = df.select(rf_tile(struct($"source", $"bandIndex", $"subextent", $"subgrid"))).first() + val result = df.select(rf_tile(struct($"source", $"bandIndex", $"subextent", $"subgrid", $"bufferSize"))).first() result.isInstanceOf[RasterRef] should be(false) assertEqual(r.tile.toArrayTile(), result) } @@ -242,7 +242,7 @@ class RasterRefSpec extends TestEnvironment with TestData { it("should resolve a RasterRefTile") { new Fixture { - val result = Seq(subRaster).toDF().select(rf_tile(struct($"source", $"bandIndex", $"subextent", $"subgrid"))).first() + val result = Seq(subRaster).toDF().select(rf_tile(struct($"source", $"bandIndex", $"subextent", $"subgrid", $"bufferSize"))).first() result.isInstanceOf[RasterRef] should be(false) assertEqual(subRaster.toArrayTile(), result) } diff --git a/datasource/src/main/scala/com/azavea/stac4s/api/client/search/package.scala b/datasource/src/main/scala/com/azavea/stac4s/api/client/search/package.scala deleted file mode 100644 index a383ff7b8..000000000 --- a/datasource/src/main/scala/com/azavea/stac4s/api/client/search/package.scala +++ /dev/null @@ -1,10 +0,0 @@ -package com.azavea.stac4s.api.client - -import com.azavea.stac4s.StacItem -import fs2.Stream - -package object search { - implicit class Stac4sClientOps[F[_]](val self: SttpStacClient[F]) extends AnyVal { - def search(filter: Option[SearchFilters]): Stream[F, StacItem] = filter.fold(self.search)(self.search) - } -} diff --git a/datasource/src/main/scala/org/apache/spark/sql/stac/GeometryUDT.scala b/datasource/src/main/scala/org/apache/spark/sql/stac/GeometryUDT.scala deleted file mode 100644 index 6421fe4b6..000000000 --- a/datasource/src/main/scala/org/apache/spark/sql/stac/GeometryUDT.scala +++ /dev/null @@ -1,14 +0,0 @@ -package org.apache.spark.sql.stac - -import org.locationtech.jts.geom._ -import org.apache.spark.sql.jts.AbstractGeometryUDT -import org.locationtech.jts.geom.Geometry - -class PointUDT extends AbstractGeometryUDT[Point]("point") -class MultiPointUDT extends AbstractGeometryUDT[MultiPoint]("multipoint") -class LineStringUDT extends AbstractGeometryUDT[LineString]("linestring") -class MultiLineStringUDT extends AbstractGeometryUDT[MultiLineString]("multilinestring") -class PolygonUDT extends AbstractGeometryUDT[Polygon]("polygon") -class MultiPolygonUDT extends AbstractGeometryUDT[MultiPolygon]("multipolygon") -class GeometryUDT extends AbstractGeometryUDT[Geometry]("geometry") -class GeometryCollectionUDT extends AbstractGeometryUDT[GeometryCollection]("geometrycollection") \ No newline at end of file diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala index bfe4bfb3e..a671d8618 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala @@ -52,7 +52,7 @@ package object datasource { private[rasterframes] def intParam(key: String, parameters: CaseInsensitiveStringMap): Option[Int] = - if(parameters.containsKey(key)) parameters.get(key).toInt.some + if(parameters.containsKey(key)) Option(parameters.get(key)).map(_.toInt) else None private[rasterframes] diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala index 5515b7513..5ed034f71 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala @@ -36,15 +36,16 @@ import scala.util.Try class RasterSourceDataSource extends DataSourceRegister with RelationProvider { import RasterSourceDataSource._ - override def shortName(): String = SHORT_NAME - override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { + def shortName(): String = SHORT_NAME + def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { val bands = parameters.bandIndexes val tiling = parameters.tileDims.orElse(Some(NOMINAL_TILE_DIMS)) + val bufferSize = parameters.bufferSize val lazyTiles = parameters.lazyTiles val spatialIndex = parameters.spatialIndex val spec = parameters.pathSpec val catRef = spec.fold(_.registerAsTable(sqlContext), identity) - RasterSourceRelation(sqlContext, catRef, bands, tiling, lazyTiles, spatialIndex) + RasterSourceRelation(sqlContext, catRef, bands, tiling, bufferSize, lazyTiles, spatialIndex) } } @@ -54,6 +55,7 @@ object RasterSourceDataSource { final val PATHS_PARAM = "paths" final val BAND_INDEXES_PARAM = "band_indexes" final val TILE_DIMS_PARAM = "tile_dimensions" + final val BUFFER_SIZE_PARAM = "buffer_size" final val CATALOG_TABLE_PARAM = "catalog_table" final val CATALOG_TABLE_COLS_PARAM = "catalog_col_names" final val CATALOG_CSV_PARAM = "catalog_csv" @@ -110,20 +112,22 @@ object RasterSourceDataSource { def tokenize(csv: String): Seq[String] = csv.split(',').map(_.trim) def tileDims: Option[Dimensions[Int]] = - parameters.get(TILE_DIMS_PARAM) + parameters + .get(TILE_DIMS_PARAM) .map(tokenize(_).map(_.toInt)) .map { case Seq(cols, rows) => Dimensions(cols, rows)} - def bandIndexes: Seq[Int] = parameters - .get(BAND_INDEXES_PARAM) - .map(tokenize(_).map(_.toInt)) - .getOrElse(Seq(0)) + def bandIndexes: Seq[Int] = + parameters + .get(BAND_INDEXES_PARAM) + .map(tokenize(_).map(_.toInt)) + .getOrElse(Seq(0)) + + def lazyTiles: Boolean = parameters.get(LAZY_TILES_PARAM).forall(_.toBoolean) - def lazyTiles: Boolean = parameters - .get(LAZY_TILES_PARAM).forall(_.toBoolean) + def bufferSize: Short = parameters.get(BUFFER_SIZE_PARAM).map(_.toShort).getOrElse(0.toShort) // .getOrElse(-1.toShort) - def spatialIndex: Option[Int] = parameters - .get(SPATIAL_INDEX_PARTITIONS_PARAM).flatMap(p => Try(p.toInt).toOption) + def spatialIndex: Option[Int] = parameters.get(SPATIAL_INDEX_PARTITIONS_PARAM).flatMap(p => Try(p.toInt).toOption) def catalog: Option[RasterSourceCatalog] = { val paths = ( @@ -143,16 +147,18 @@ object RasterSourceDataSource { ) } - def catalogTableCols: Seq[String] = parameters - .get(CATALOG_TABLE_COLS_PARAM) - .map(tokenize(_).filter(_.nonEmpty).toSeq) - .getOrElse(Seq.empty) + def catalogTableCols: Seq[String] = + parameters + .get(CATALOG_TABLE_COLS_PARAM) + .map(tokenize(_).filter(_.nonEmpty).toSeq) + .getOrElse(Seq.empty) - def catalogTable: Option[RasterSourceCatalogRef] = parameters - .get(CATALOG_TABLE_PARAM) - .map(p => RasterSourceCatalogRef(p, catalogTableCols: _*)) + def catalogTable: Option[RasterSourceCatalogRef] = + parameters + .get(CATALOG_TABLE_PARAM) + .map(p => RasterSourceCatalogRef(p, catalogTableCols: _*)) - def pathSpec: Either[RasterSourceCatalog, RasterSourceCatalogRef] = { + def pathSpec: Either[RasterSourceCatalog, RasterSourceCatalogRef] = (catalog, catalogTable) match { case (Some(f), None) => Left(f) case (None, Some(p)) => Right(p) @@ -161,7 +167,6 @@ object RasterSourceDataSource { case _ => throw new IllegalArgumentException( "Only one of a set of file paths OR a paths table column may be provided.") } - } } /** Mixin for adding extension methods on DataFrameReader for RasterSourceDataSource-like readers. */ @@ -179,7 +184,7 @@ object RasterSourceDataSource { type TaggedReader = DataFrameReader @@ ReaderTag val reader: TaggedReader - protected def tmpTableName() = UUID.randomUUID().toString.replace("-", "") + protected def tmpTableName(): String = UUID.randomUUID().toString.replace("-", "") /** Set the zero-based band indexes to read. Defaults to Seq(0). */ def withBandIndexes(bandIndexes: Int*): TaggedReader = @@ -192,6 +197,11 @@ object RasterSourceDataSource { reader.option(RasterSourceDataSource.TILE_DIMS_PARAM, s"$cols,$rows") ) + def withBufferSize(bufferSize: Short): TaggedReader = + tag[ReaderTag][DataFrameReader]( + reader.option(RasterSourceDataSource.BUFFER_SIZE_PARAM, bufferSize) + ) + /** Indicate if tile reading should be delayed until cells are fetched. Defaults to `true`. */ def withLazyTiles(state: Boolean): TaggedReader = tag[ReaderTag][DataFrameReader]( diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala index 3b729df53..658f862f4 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala @@ -51,6 +51,7 @@ case class RasterSourceRelation( catalogTable: RasterSourceCatalogRef, bandIndexes: Seq[Int], subtileDims: Option[Dimensions[Int]], + bufferSize: Short, lazyTiles: Boolean, spatialIndexPartitions: Option[Int] ) extends BaseRelation with TableScan { @@ -127,7 +128,7 @@ case class RasterSourceRelation( // Expand RasterSource into multiple columns per band, and multiple rows per tile // There's some unintentional fragility here in that the structure of the expression // is expected to line up with our column structure here. - val refs = RasterSourceToRasterRefs(subtileDims, bandIndexes, srcs: _*) as refColNames + val refs = RasterSourceToRasterRefs(subtileDims, bandIndexes, bufferSize, srcs: _*) as refColNames // RasterSourceToRasterRef is a generator, which means you have to do the Tile conversion // in a separate select statement (Query planner doesn't know how many columns ahead of time). @@ -139,7 +140,7 @@ case class RasterSourceRelation( .select(extras ++ paths :+ refs: _*) .select(paths ++ refsToTiles ++ extras: _*) } else { - val tiles = RasterSourceToTiles(subtileDims, bandIndexes, srcs: _*) as tileColNames + val tiles = RasterSourceToTiles(subtileDims, bandIndexes, bufferSize, srcs: _*) as tileColNames withPaths.select((paths :+ tiles) ++ extras: _*) } diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSource.scala index bce9191be..47772072a 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSource.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSource.scala @@ -16,12 +16,11 @@ class StacApiDataSource extends TableProvider with DataSourceRegister { def getTable(structType: StructType, transforms: Array[Transform], map: util.Map[String, String]): Table = new StacApiTable() - override def shortName(): String = "stac-api" + def shortName(): String = StacApiDataSource.SHORT_NAME } object StacApiDataSource { final val SHORT_NAME = "stac-api" final val URI_PARAM = "uri" final val SEARCH_FILTERS_PARAM = "search-filters" - final val ASSET_LIMIT_PARAM = "asset-limit" } diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiPartition.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiPartition.scala index a11f85b8c..41842cab1 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiPartition.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiPartition.scala @@ -7,13 +7,12 @@ import com.azavea.stac4s.StacItem import geotrellis.store.util.BlockingThreadPool import sttp.client3.asynchttpclient.cats.AsyncHttpClientCatsBackend import com.azavea.stac4s.api.client._ -import eu.timepit.refined.types.numeric.NonNegInt import cats.effect.IO import sttp.model.Uri import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory} -case class StacApiPartition(uri: Uri, searchFilters: SearchFilters, searchLimit: Option[NonNegInt]) extends InputPartition +case class StacApiPartition(uri: Uri, searchFilters: SearchFilters) extends InputPartition class StacApiPartitionReaderFactory extends PartitionReaderFactory { override def createReader(partition: InputPartition): PartitionReader[InternalRow] = { @@ -25,24 +24,17 @@ class StacApiPartitionReaderFactory extends PartitionReaderFactory { } class StacApiPartitionReader(partition: StacApiPartition) extends PartitionReader[InternalRow] { - lazy val partitionValues: Iterator[StacItem] = { - implicit val cs = IO.contextShift(BlockingThreadPool.executionContext) - AsyncHttpClientCatsBackend - .resource[IO]() - .use { backend => - SttpStacClient(backend, partition.uri) - .search(partition.searchFilters) - .take(partition.searchLimit.map(_.value)) - .compile - .toList - } - .map(_.toIterator) - .unsafeRunSync() - } + + @transient private implicit lazy val cs = IO.contextShift(BlockingThreadPool.executionContext) + @transient private lazy val backend = AsyncHttpClientCatsBackend[IO]().unsafeRunSync() + @transient private lazy val partitionValues: Iterator[StacItem] = + SttpStacClient(backend, partition.uri) + .search(partition.searchFilters) + .toIterator(_.unsafeRunSync()) def next: Boolean = partitionValues.hasNext def get: InternalRow = partitionValues.next.toInternalRow - def close(): Unit = { } + def close(): Unit = backend.close().unsafeRunSync() } diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiScanBuilder.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiScanBuilder.scala index 30ed8c8fa..a7886f81e 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiScanBuilder.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiScanBuilder.scala @@ -8,12 +8,12 @@ import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionRead import org.apache.spark.sql.types.StructType import sttp.model.Uri -class StacApiScanBuilder(uri: Uri, searchFilters: SearchFilters, searchLimit: Option[NonNegInt]) extends ScanBuilder { - override def build(): Scan = new StacApiBatchScan(uri, searchFilters, searchLimit) +class StacApiScanBuilder(uri: Uri, searchFilters: SearchFilters) extends ScanBuilder { + def build(): Scan = new StacApiBatchScan(uri, searchFilters) } /** Batch Reading Support. The schema is repeated here as it can change after column pruning, etc. */ -class StacApiBatchScan(uri: Uri, searchFilters: SearchFilters, searchLimit: Option[NonNegInt]) extends Scan with Batch { +class StacApiBatchScan(uri: Uri, searchFilters: SearchFilters) extends Scan with Batch { def readSchema(): StructType = stacItemEncoder.schema override def toBatch: Batch = this @@ -23,6 +23,6 @@ class StacApiBatchScan(uri: Uri, searchFilters: SearchFilters, searchLimit: Opti * To perform a distributed load, we'd need to know some internals about how the next page token is computed. * This can be a good idea for the STAC Spec extension. * */ - def planInputPartitions(): Array[InputPartition] = Array(StacApiPartition(uri, searchFilters, searchLimit)) + def planInputPartitions(): Array[InputPartition] = Array(StacApiPartition(uri, searchFilters)) def createReaderFactory(): PartitionReaderFactory = new StacApiPartitionReaderFactory() } diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiTable.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiTable.scala index 0db7a34f2..fe6a2e5e0 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiTable.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiTable.scala @@ -7,8 +7,8 @@ import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapabil import org.apache.spark.sql.connector.read.ScanBuilder import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap -import org.locationtech.rasterframes.datasource.stac.api.StacApiDataSource.{ASSET_LIMIT_PARAM, SEARCH_FILTERS_PARAM, URI_PARAM} -import org.locationtech.rasterframes.datasource.{intParam, jsonParam, uriParam} +import org.locationtech.rasterframes.datasource.stac.api.StacApiDataSource.{SEARCH_FILTERS_PARAM, URI_PARAM} +import org.locationtech.rasterframes.datasource.{jsonParam, uriParam} import sttp.model.Uri import scala.collection.JavaConverters._ @@ -24,7 +24,7 @@ class StacApiTable extends Table with SupportsRead { def capabilities(): util.Set[TableCapability] = Set(TableCapability.BATCH_READ).asJava def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = - new StacApiScanBuilder(options.uri, options.searchFilters, options.searchLimit) + new StacApiScanBuilder(options.uri, options.searchFilters) } object StacApiTable { @@ -35,7 +35,5 @@ object StacApiTable { jsonParam(SEARCH_FILTERS_PARAM, options) .flatMap(_.as[SearchFilters].toOption) .getOrElse(SearchFilters(limit = NonNegInt.from(30).toOption)) - - def searchLimit: Option[NonNegInt] = intParam(ASSET_LIMIT_PARAM, options).flatMap(NonNegInt.from(_).toOption) } } diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalyst.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalyst.scala index 0d6970200..d8692e96e 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalyst.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalyst.scala @@ -1,27 +1,34 @@ package org.locationtech.rasterframes.datasource.stac.api.encoders -import com.azavea.stac4s.ItemDatetime +import cats.data.Ior import frameless.SQLTimestamp import cats.syntax.option._ +import com.azavea.stac4s.{PointInTime, TimeRange} +import com.azavea.stac4s.types.ItemDatetime import java.time.Instant -case class ItemDatetimeCatalyst(start: SQLTimestamp, end: Option[SQLTimestamp], _type: ItemDatetimeCatalystType) +case class ItemDatetimeCatalyst(datetime: Option[SQLTimestamp], start: Option[SQLTimestamp], end: Option[SQLTimestamp], _type: ItemDatetimeCatalystType) object ItemDatetimeCatalyst { def toDatetime(dt: ItemDatetimeCatalyst): ItemDatetime = { - val ItemDatetimeCatalyst(start, endo, _type) = dt - (_type, endo) match { - case (ItemDatetimeCatalystType.PointInTime, _) => ItemDatetime.PointInTime(Instant.ofEpochMilli(start.us)) - case (ItemDatetimeCatalystType.TimeRange, Some(end)) => ItemDatetime.TimeRange(Instant.ofEpochMilli(start.us), Instant.ofEpochMilli(end.us)) - case err => throw new Exception(s"ItemDatetimeCatalyst decoding is not possible, $err") + dt match { + case ItemDatetimeCatalyst(Some(datetime), Some(start), Some(end), ItemDatetimeCatalystType.PointInTimeAndTimeRange) => + Ior.Both(PointInTime(Instant.ofEpochMilli(datetime.us)), TimeRange(Instant.ofEpochMilli(start.us), Instant.ofEpochMilli(end.us))) + case ItemDatetimeCatalyst(Some(datetime), _, _, ItemDatetimeCatalystType.PointInTime) => + Ior.Left(PointInTime(Instant.ofEpochMilli(datetime.us))) + case ItemDatetimeCatalyst(_, Some(start), Some(end), ItemDatetimeCatalystType.PointInTime) => + Ior.Right(TimeRange(Instant.ofEpochMilli(start.us), Instant.ofEpochMilli(end.us))) + case e => throw new Exception(s"ItemDatetimeCatalyst decoding is not possible, $e") } } def fromItemDatetime(dt: ItemDatetime): ItemDatetimeCatalyst = dt match { - case ItemDatetime.PointInTime(when) => - ItemDatetimeCatalyst(SQLTimestamp(when.toEpochMilli), None, ItemDatetimeCatalystType.PointInTime) - case ItemDatetime.TimeRange(start, end) => - ItemDatetimeCatalyst(SQLTimestamp(start.toEpochMilli), SQLTimestamp(end.toEpochMilli).some, ItemDatetimeCatalystType.PointInTime) + case Ior.Left(PointInTime(datetime)) => + ItemDatetimeCatalyst(SQLTimestamp(datetime.toEpochMilli).some, None, None, ItemDatetimeCatalystType.PointInTime) + case Ior.Right(TimeRange(start, end)) => + ItemDatetimeCatalyst(None, SQLTimestamp(start.toEpochMilli).some, SQLTimestamp(end.toEpochMilli).some, ItemDatetimeCatalystType.PointInTime) + case Ior.Both(PointInTime(datetime), TimeRange(start, end)) => + ItemDatetimeCatalyst(SQLTimestamp(datetime.toEpochMilli).some, SQLTimestamp(start.toEpochMilli).some, SQLTimestamp(end.toEpochMilli).some, ItemDatetimeCatalystType.PointInTime) } } diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalystType.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalystType.scala index ab2da1117..31f88c2c8 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalystType.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/ItemDatetimeCatalystType.scala @@ -4,6 +4,7 @@ sealed trait ItemDatetimeCatalystType { lazy val repr: String = this.getClass.ge object ItemDatetimeCatalystType { case object PointInTime extends ItemDatetimeCatalystType case object TimeRange extends ItemDatetimeCatalystType + case object PointInTimeAndTimeRange extends ItemDatetimeCatalystType def fromString(str: String): ItemDatetimeCatalystType = str match { case PointInTime.repr => PointInTime diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/StacSerializers.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/StacSerializers.scala index c5a8e2fd3..9f085a8c0 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/StacSerializers.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/encoders/StacSerializers.scala @@ -5,23 +5,24 @@ import io.circe.{Json, JsonObject} import io.circe.syntax._ import cats.syntax.either._ import com.azavea.stac4s._ +import com.azavea.stac4s.types.ItemDatetime import eu.timepit.refined.api.{RefType, Validate} import frameless.{Injection, SQLTimestamp, TypedEncoder, TypedExpressionEncoder} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder -import org.apache.spark.sql.stac._ +import org.apache.spark.sql.jts.JTSTypes import java.time.Instant /** STAC API Dataframe relies on the Frameless Expressions derivation. */ trait StacSerializers { /** GeoMesa UDTs, should be defined as implicits so frameless would pick them up */ - implicit val pointUDT: PointUDT = new PointUDT - implicit val multiPointUDT: MultiPointUDT = new MultiPointUDT - implicit val multiLineStringUDT: MultiLineStringUDT = new MultiLineStringUDT - implicit val polygonUDT: PolygonUDT = new PolygonUDT - implicit val multiPolygonUDT: MultiPolygonUDT = new MultiPolygonUDT - implicit val geometryUDT: GeometryUDT = new GeometryUDT - implicit val geometryCollectionUDT: GeometryCollectionUDT = new GeometryCollectionUDT + implicit val pointUDT = JTSTypes.PointTypeInstance + implicit val multiPointUDT = JTSTypes.MultiPointTypeInstance + implicit val multiLineStringUDT = JTSTypes.MultiLineStringTypeInstance + implicit val polygonUDT = JTSTypes.PolygonTypeInstance + implicit val multiPolygonUDT = JTSTypes.MultipolygonTypeInstance + implicit val geometryUDT = JTSTypes.GeometryTypeInstance + implicit val geometryCollectionUDT = JTSTypes.GeometryCollectionTypeInstance /** Injections to Encode stac4s objects */ implicit val stacLinkTypeInjection: Injection[StacLinkType, String] = Injection(_.repr, _.asJson.asUnsafe[StacLinkType]) diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/package.scala index b99515d38..d2834f963 100644 --- a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/package.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/stac/api/package.scala @@ -1,9 +1,11 @@ package org.locationtech.rasterframes.datasource.stac +import cats.Monad +import cats.syntax.functor._ import com.azavea.stac4s.api.client.SearchFilters import org.apache.spark.sql.{DataFrame, DataFrameReader} import io.circe.syntax._ -import fs2.Stream +import fs2.{Pull, Stream} import shapeless.tag import shapeless.tag.@@ import org.apache.spark.sql.SparkSession @@ -17,6 +19,7 @@ package object api { implicit class StacApiDataFrameReaderOps(val reader: StacApiDataFrameReader) extends AnyVal { def loadStac: StacApiDataFrame = tag[StacApiDataFrameTag][DataFrame](reader.load) + def loadStac(limit: Int): StacApiDataFrame = tag[StacApiDataFrameTag][DataFrame](reader.load.limit(limit)) } implicit class StacApiDataFrameOps(val df: StacApiDataFrame) extends AnyVal { @@ -38,7 +41,27 @@ package object api { } implicit class Fs2StreamOps[F[_], T](val self: Stream[F, T]) { - def take(n: Option[Int]): Stream[F, T] = n.fold(self)(self.take(_)) + /** Unsafe API to interop with the Spark API. */ + def toIterator(run: F[Option[(T, fs2.Stream[F, T])]] => Option[(T, fs2.Stream[F, T])]) + (implicit monad: Monad[F], compiler: Stream.Compiler[F, F]): Iterator[T] = new Iterator[T] { + private var head = self + private def nextF: F[Option[(T, fs2.Stream[F, T])]] = + head + .pull.uncons1 + .flatMap(Pull.output1) + .stream + .compile + .last + .map(_.flatten) + + def hasNext(): Boolean = run(nextF).nonEmpty + + def next(): T = { + val (item, tail) = run(nextF).get + this.head = tail + item + } + } } implicit class DataFrameReaderOps(val self: DataFrameReader) extends AnyVal { @@ -48,12 +71,11 @@ package object api { implicit class DataFrameReaderStacApiOps(val reader: DataFrameReader) extends AnyVal { def stacApi(): StacApiDataFrameReader = tag[StacApiDataFrameTag][DataFrameReader](reader.format(StacApiDataSource.SHORT_NAME)) - def stacApi(uri: String, filters: SearchFilters = SearchFilters(), searchLimit: Option[Int] = None): StacApiDataFrameReader = + def stacApi(uri: String, filters: SearchFilters = SearchFilters()): StacApiDataFrameReader = tag[StacApiDataFrameTag][DataFrameReader]( stacApi() .option(StacApiDataSource.URI_PARAM, uri) .option(StacApiDataSource.SEARCH_FILTERS_PARAM, filters.asJson.noSpaces) - .option(StacApiDataSource.ASSET_LIMIT_PARAM, searchLimit) ) } } diff --git a/datasource/src/test/scala/examples/BufferTiles.scala b/datasource/src/test/scala/examples/BufferTiles.scala new file mode 100644 index 000000000..66be3e979 --- /dev/null +++ b/datasource/src/test/scala/examples/BufferTiles.scala @@ -0,0 +1,68 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2020 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package examples + +import geotrellis.raster.mapalgebra.focal.Square +import org.apache.spark.sql._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.datasource.raster._ +import org.locationtech.rasterframes.tiles.ProjectedRasterTile + +object BufferTiles extends App { + + implicit val spark = + SparkSession + .builder() + .master("local[*]") + .appName("RasterFrames") + .withKryoSerialization + .getOrCreate() + .withRasterFrames + + spark.sparkContext.setLogLevel("ERROR") + + import spark.implicits._ + + val example = "https://raw.githubusercontent.com/locationtech/rasterframes/develop/core/src/test/resources/LC08_B7_Memphis_COG.tiff" + + val tile = + spark + .read + .raster + .from(example) + .withBufferSize(1) + .withTileDimensions(100, 100) + .load() + .limit(1) + .select($"proj_raster") + .select(rf_focal_max($"proj_raster", Square(1))) + // .select(rf_aspect($"proj_raster")) + // .select(rf_hillshade($"proj_raster", 315, 45, 1)) + .as[Option[ProjectedRasterTile]] + // .show(false) + .first() + + // tile.get.renderPng().write("/tmp/hillshade-buffered.png") + // tile.get.renderPng().write("/tmp/hillshade-nobuffered.png") + + // spark.stop() +} diff --git a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala index fd3069e24..79c82b3ab 100644 --- a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala @@ -40,10 +40,7 @@ import org.scalatest.BeforeAndAfter import scala.reflect.ClassTag - -class TileFeatureSupportSpec extends TestEnvironment - with TestData - with BeforeAndAfter { +class TileFeatureSupportSpec extends TestEnvironment with TestData with BeforeAndAfter { val strTF1 = TileFeature(squareIncrementingTile(3), List("data1")) val strTF2 = TileFeature(squareIncrementingTile(3), List("data2")) @@ -54,10 +51,8 @@ class TileFeatureSupportSpec extends TestEnvironment val geoms = Seq(ext2.toPolygon()) val maskOpts: Rasterizer.Options = Rasterizer.Options.DEFAULT - describe("TileFeatureSupport") { it("should support merge, prototype operations") { - val merged = strTF1.merge(strTF2) assert(merged.tile == strTF1.tile.merge(strTF2.tile)) assert(merged.data == List("data1", "data2")) diff --git a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSourceTest.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSourceTest.scala index a778b5db9..93e1d0446 100644 --- a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSourceTest.scala +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/stac/api/StacApiDataSourceTest.scala @@ -25,9 +25,7 @@ import org.locationtech.rasterframes.datasource.raster._ import org.locationtech.rasterframes.datasource.stac.api.encoders._ import com.azavea.stac4s.StacItem import com.azavea.stac4s.api.client.{SearchFilters, SttpStacClient} -import cats.syntax.option._ import cats.effect.IO -import eu.timepit.refined.auto._ import geotrellis.store.util.BlockingThreadPool import org.apache.spark.sql.functions.explode import org.locationtech.rasterframes.TestEnvironment @@ -45,9 +43,10 @@ class StacApiDataSourceTest extends TestEnvironment { self => .read .stacApi( "https://franklin.nasa-hsi.azavea.com/", - filters = SearchFilters(items = List("aviris-l1-cogs_f130329t01p00r06_sc01")), - searchLimit = Some(1) - ).load + filters = SearchFilters(items = List("aviris-l1-cogs_f130329t01p00r06_sc01")) + ) + .load + .limit(1) results.rdd.partitions.length shouldBe 1 results.count() shouldBe 1L @@ -78,9 +77,10 @@ class StacApiDataSourceTest extends TestEnvironment { self => .read .stacApi( "https://franklin.nasa-hsi.azavea.com/", - filters = SearchFilters(items = List("aviris-l1-cogs_f130329t01p00r06_sc01")), - searchLimit = Some(1) - ).load + filters = SearchFilters(items = List("aviris-l1-cogs_f130329t01p00r06_sc01")) + ) + .load + .limit(1) results.rdd.partitions.length shouldBe 1 @@ -118,10 +118,9 @@ class StacApiDataSourceTest extends TestEnvironment { self => .read .stacApi( "https://franklin.nasa-hsi.azavea.com/", - filters = SearchFilters(items = List("aviris-l1-cogs_f130329t01p00r06_sc01")), - searchLimit = Some(1) + filters = SearchFilters(items = List("aviris-l1-cogs_f130329t01p00r06_sc01")) ) - .loadStac + .loadStac(limit = 1) // to preserve the STAC DataFrame type val assets = items @@ -149,7 +148,7 @@ class StacApiDataSourceTest extends TestEnvironment { self => it("should read from Astraea Earth service") { import spark.implicits._ - val results = spark.read.stacApi("https://eod-catalog-svc-prod.astraea.earth/", searchLimit = Some(1)).load + val results = spark.read.stacApi("https://eod-catalog-svc-prod.astraea.earth/").load.limit(1) // results.printSchema() @@ -178,8 +177,9 @@ class StacApiDataSourceTest extends TestEnvironment { self => val items = spark .read - .stacApi("https://eod-catalog-svc-prod.astraea.earth/", searchLimit = 1.some) + .stacApi("https://eod-catalog-svc-prod.astraea.earth/") .load + .limit(1) println(items.collect().toList.length) @@ -199,7 +199,11 @@ class StacApiDataSourceTest extends TestEnvironment { self => ignore("should fetch rasters from the Datacube STAC API service") { import spark.implicits._ - val items = spark.read.stacApi("https://datacube.services.geo.ca/api", filters = SearchFilters(collections=List("markham")), searchLimit = Some(1)).load + val items = spark + .read + .stacApi("https://datacube.services.geo.ca/api", filters = SearchFilters(collections=List("markham"))) + .load + .limit(1) println(items.collect().toList.length) diff --git a/docs/src/main/paradox/release-notes.md b/docs/src/main/paradox/release-notes.md index 9d738c6ad..6feed51b6 100644 --- a/docs/src/main/paradox/release-notes.md +++ b/docs/src/main/paradox/release-notes.md @@ -1,5 +1,13 @@ # Release Notes +## 0.10.x + +### 0.10.0 + +* Upgraded to Spark 3.1.2, Scala 2.12 and GeoTrellis 3.6.0 +* Added FocalOperations support +* Added STAC API DataFrames implementation + ## 0.9.x ### 0.9.1 diff --git a/project/RFDependenciesPlugin.scala b/project/RFDependenciesPlugin.scala index c3f830930..766934a06 100644 --- a/project/RFDependenciesPlugin.scala +++ b/project/RFDependenciesPlugin.scala @@ -52,7 +52,7 @@ object RFDependenciesPlugin extends AutoPlugin { val scaffeine = "com.github.blemale" %% "scaffeine" % "4.0.2" val `spray-json` = "io.spray" %% "spray-json" % "1.3.4" val `scala-logging` = "com.typesafe.scala-logging" %% "scala-logging" % "3.8.0" - val stac4s = "com.azavea.stac4s" %% "client" % "0.6.2" + val stac4s = "com.azavea.stac4s" %% "client" % "0.7.1" val sttpCatsCe2 = "com.softwaremill.sttp.client3" %% "async-http-client-backend-cats-ce2" % "3.3.6" val frameless = "org.typelevel" %% "frameless-dataset" % "0.10.1" } diff --git a/project/RFProjectPlugin.scala b/project/RFProjectPlugin.scala index 7aba41f30..c62ae3d02 100644 --- a/project/RFProjectPlugin.scala +++ b/project/RFProjectPlugin.scala @@ -20,7 +20,7 @@ object RFProjectPlugin extends AutoPlugin { scmInfo := Some(ScmInfo(url("https://github.com/locationtech/rasterframes"), "git@github.com:locationtech/rasterframes.git")), description := "RasterFrames brings the power of Spark DataFrames to geospatial raster data.", licenses += ("Apache-2.0", url("https://www.apache.org/licenses/LICENSE-2.0.html")), - scalaVersion := "2.12.13", + scalaVersion := "2.12.15", scalacOptions ++= Seq( "-target:jvm-1.8", "-feature", diff --git a/pyrasterframes/src/main/python/pyrasterframes/__init__.py b/pyrasterframes/src/main/python/pyrasterframes/__init__.py index add1c42da..65b0eaed4 100644 --- a/pyrasterframes/src/main/python/pyrasterframes/__init__.py +++ b/pyrasterframes/src/main/python/pyrasterframes/__init__.py @@ -117,6 +117,7 @@ def _raster_reader( source=None, catalog_col_names: Optional[List[str]] = None, band_indexes: Optional[List[int]] = None, + buffer_size: int = 0, tile_dimensions: Tuple[int] = (256, 256), lazy_tiles: bool = True, spatial_index_partitions=None, @@ -134,6 +135,7 @@ def _raster_reader( :param catalog_col_names: required if `source` is a DataFrame or CSV string. It is a list of strings giving the names of columns containing URIs to read. :param band_indexes: list of integers indicating which bands, zero-based, to read from the raster files specified; default is to read only the first band. :param tile_dimensions: tuple or list of two indicating the default tile dimension as (columns, rows). + :param buffer_size: buffer each tile read by this many cells on all sides. :param lazy_tiles: If true (default) only generate minimal references to tile contents; if false, fetch tile cell values. :param spatial_index_partitions: If true, partitions read tiles by a Z2 spatial index using the default shuffle partitioning. If a values > 0, the given number of partitions are created instead of the default. @@ -176,7 +178,8 @@ def temp_name(): options.update({ "band_indexes": to_csv(band_indexes), "tile_dimensions": to_csv(tile_dimensions), - "lazy_tiles": str(lazy_tiles) + "lazy_tiles": str(lazy_tiles), + "buffer_size": int(buffer_size) }) # Parse the `source` argument @@ -249,6 +252,21 @@ def temp_name(): .format("raster") \ .load(path, **options) +def _stac_api_reader( + df_reader: DataFrameReader, + uri: str, + filters: dict = None) -> DataFrame: + """ + :param uri: STAC API uri + :param filters: STAC API Search filters dict (bbox, datetime, intersects, collections, items, limit, query, next), see the STAC API Spec for more details https://github.com/radiantearth/stac-api-spec + """ + import json + + return df_reader \ + .format("stac-api") \ + .option("uri", uri) \ + .option("search-filters", json.dumps(filters)) \ + .load() def _geotiff_writer( df_writer: DataFrameWriter, @@ -302,3 +320,4 @@ def set_dims(parts): DataFrameReader.geotrellis = lambda df_reader, path: _layer_reader(df_reader, "geotrellis", path) DataFrameReader.geotrellis_catalog = lambda df_reader, path: _aliased_reader(df_reader, "geotrellis-catalog", path) DataFrameWriter.geotrellis = lambda df_writer, path: _aliased_writer(df_writer, "geotrellis", path) +DataFrameReader.stacapi = _stac_api_reader diff --git a/pyrasterframes/src/main/python/pyrasterframes/rasterfunctions.py b/pyrasterframes/src/main/python/pyrasterframes/rasterfunctions.py index 0c48e91af..b9b67e247 100644 --- a/pyrasterframes/src/main/python/pyrasterframes/rasterfunctions.py +++ b/pyrasterframes/src/main/python/pyrasterframes/rasterfunctions.py @@ -65,7 +65,6 @@ def to_jvm(ct): elif isinstance(cell_type_arg, CellType): return to_jvm(cell_type_arg.cell_type_name) - def rf_cell_types() -> List[CellType]: """Return a list of standard cell types""" return [CellType(str(ct)) for ct in _context_call('rf_cell_types')] @@ -781,6 +780,67 @@ def rf_identity(tile_col: Column_type) -> Column: """Pass tile through unchanged""" return _apply_column_function('rf_identity', tile_col) +def rf_focal_max(tile_col: Column_type, neighborhood: Union[str, Column_type]) -> Column: + """Compute the max value in its neighborhood of each cell""" + if isinstance(neighborhood, str): + neighborhood = lit(neighborhood) + return _apply_column_function('rf_focal_max', tile_col, neighborhood) + +def rf_focal_mean(tile_col: Column_type, neighborhood: Union[str, Column_type]) -> Column: + """Compute the mean value in its neighborhood of each cell""" + if isinstance(neighborhood, str): + neighborhood = lit(neighborhood) + return _apply_column_function('rf_focal_mean', tile_col, neighborhood) + +def rf_focal_median(tile_col: Column_type, neighborhood: Union[str, Column_type]) -> Column: + """Compute the max in its neighborhood value of each cell""" + if isinstance(neighborhood, str): + neighborhood = lit(neighborhood) + return _apply_column_function('rf_focal_median', tile_col, neighborhood) + +def rf_focal_min(tile_col: Column_type, neighborhood: Union[str, Column_type]) -> Column: + """Compute the min value in its neighborhood of each cell""" + if isinstance(neighborhood, str): + neighborhood = lit(neighborhood) + return _apply_column_function('rf_focal_min', tile_col, neighborhood) + +def rf_focal_mode(tile_col: Column_type, neighborhood: Union[str, Column_type]) -> Column: + """Compute the mode value in its neighborhood of each cell""" + if isinstance(neighborhood, str): + neighborhood = lit(neighborhood) + return _apply_column_function('rf_focal_mode', tile_col, neighborhood) + +def rf_focal_std_dev(tile_col: Column_type, neighborhood: Union[str, Column_type]) -> Column: + """Compute the standard deviation value in its neighborhood of each cell""" + if isinstance(neighborhood, str): + neighborhood = lit(neighborhood) + return _apply_column_function('rf_focal_std_dev', tile_col, neighborhood) + +def rf_moransI(tile_col: Column_type, neighborhood: Union[str, Column_type]) -> Column: + """Compute moransI in its neighborhood value of each cell""" + if isinstance(neighborhood, str): + neighborhood = lit(neighborhood) + return _apply_column_function('rf_focal_moransi', tile_col, neighborhood) + +def rf_aspect(tile_col: Column_type) -> Column: + """Calculates the aspect of each cell in an elevation raster""" + return _apply_column_function('rf_aspect', tile_col) + +def rf_slope(tile_col: Column_type, z_factor: Union[int, float, Column_type]) -> Column: + """Calculates slope of each cell in an elevation raster""" + if isinstance(z_factor, (int, float)): + z_factor = lit(z_factor) + return _apply_column_function('rf_slope', tile_col, z_factor) + +def rf_hillshade(tile_col: Column_type, azimuth: Union[int, float, Column_type], altitude: Union[int, float, Column_type], z_factor: Union[int, float, Column_type]) -> Column: + """Calculates the hillshade of each cell in an elevation raster""" + if isinstance(azimuth, (int, float)): + azimuth = lit(azimuth) + if isinstance(altitude, (int, float)): + altitude = lit(altitude) + if isinstance(z_factor, (int, float)): + z_factor = lit(z_factor) + return _apply_column_function('rf_hillshade', tile_col, azimuth, altitude, z_factor) def rf_resample(tile_col: Column_type, scale_factor: Union[int, float, Column_type]) -> Column: """Resample tile to different size based on scalar factor or tile whose dimension to match diff --git a/pyrasterframes/src/main/python/pyrasterframes/rf_types.py b/pyrasterframes/src/main/python/pyrasterframes/rf_types.py index 516a0eb2c..9366fe07e 100644 --- a/pyrasterframes/src/main/python/pyrasterframes/rf_types.py +++ b/pyrasterframes/src/main/python/pyrasterframes/rf_types.py @@ -371,7 +371,7 @@ def __repr__(self): class Tile(object): - def __init__(self, cells, cell_type=None): + def __init__(self, cells, cell_type=None, grid_bounds=None): if cell_type is None: # infer cell type from the cells dtype and whether or not it is masked ct = CellType.from_numpy_dtype(cells.dtype) @@ -390,6 +390,11 @@ def __init__(self, cells, cell_type=None): # if the value in the array is `nd_value`, it is masked as nodata self.cells = np.ma.masked_equal(self.cells, nd_value) + # is it a buffer tile? crop it on extraction to preserve the tile behavior + if grid_bounds is not None: + colmin, rowmin, colmax, rowmax = grid_bounds + self.cells = self.cells[rowmin:(rowmax+1), colmin:(colmax+1)] + def __eq__(self, other): if type(other) is type(self): return self.cell_type == other.cell_type and \ @@ -461,7 +466,7 @@ def sqlType(cls): StructField("xmax",DoubleType(), True), StructField("ymax",DoubleType(), True) ]) - subgrid = StructType([ + grid = StructType([ StructField("colMin", IntegerType(), True), StructField("rowMin", IntegerType(), True), StructField("colMax", IntegerType(), True), @@ -474,7 +479,7 @@ def sqlType(cls): ]),True), StructField("bandIndex", IntegerType(), True), StructField("subextent", extent ,True), - StructField("subgrid", subgrid, True), + StructField("subgrid", grid, True), ]) return StructType([ @@ -482,6 +487,7 @@ def sqlType(cls): StructField("cols", IntegerType(), False), StructField("rows", IntegerType(), False), StructField("cells", BinaryType(), True), + StructField("gridBounds", grid, True), StructField("ref", ref, True) ]) @@ -501,6 +507,7 @@ def serialize(self, tile): dims[0], dims[1], cells, + None, None ] @@ -533,7 +540,7 @@ def deserialize(self, datum): try: as_numpy = np.frombuffer(cell_data_bytes, dtype=cell_type.to_numpy_dtype()) reshaped = as_numpy.reshape((rows, cols)) - t = Tile(reshaped, cell_type) + t = Tile(reshaped, cell_type, datum.gridBounds) except ValueError as e: raise ValueError({ "cell_type": cell_type, @@ -541,7 +548,8 @@ def deserialize(self, datum): "rows": rows, "cell_data.length": len(cell_data_bytes), "cell_data.type": type(cell_data_bytes), - "cell_data.values": repr(cell_data_bytes) + "cell_data.values": repr(cell_data_bytes), + "grid_bounds": datum.gridBounds }, e) return t diff --git a/pyrasterframes/src/main/scala/org/locationtech/rasterframes/py/PyRFContext.scala b/pyrasterframes/src/main/scala/org/locationtech/rasterframes/py/PyRFContext.scala index 9c9ca9c4b..2e5bdd8f0 100644 --- a/pyrasterframes/src/main/scala/org/locationtech/rasterframes/py/PyRFContext.scala +++ b/pyrasterframes/src/main/scala/org/locationtech/rasterframes/py/PyRFContext.scala @@ -21,7 +21,6 @@ package org.locationtech.rasterframes.py import java.nio.ByteBuffer - import geotrellis.proj4.CRS import geotrellis.raster.{CellType, MultibandTile} import geotrellis.spark._ @@ -29,14 +28,14 @@ import geotrellis.layer._ import geotrellis.vector.Extent import org.apache.spark.sql._ import org.locationtech.rasterframes -import org.locationtech.rasterframes.util.ResampleMethod +import org.locationtech.rasterframes.util.{KryoSupport, ResampleMethod} import org.locationtech.rasterframes.extensions.RasterJoin import org.locationtech.rasterframes.model.LazyCRS -import org.locationtech.rasterframes.ref.{GDALRasterSource, RasterRef, RFRasterSource} -import org.locationtech.rasterframes.util.KryoSupport -import org.locationtech.rasterframes.{RasterFunctions, _} +import org.locationtech.rasterframes.ref.{GDALRasterSource, RFRasterSource, RasterRef} +import org.locationtech.rasterframes._ import spray.json._ import org.locationtech.rasterframes.util.JsonCodecs._ + import scala.collection.JavaConverters._ /** @@ -134,8 +133,6 @@ class PyRFContext(implicit sparkSession: SparkSession) extends RasterFunctions * Left spatial join managing reprojection and merging of `other`; uses joinExprs to conduct initial join then extent and CRS columns to determine if rows intersect */ def rasterJoin(df: DataFrame, other: DataFrame, joinExprs: Column, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column, resamplingMethod: String): DataFrame = { - - val m = resamplingMethod match { case ResampleMethod(mm) => mm case _ => throw new IllegalArgumentException(s"Incorrect resampling method passed: ${resamplingMethod}") @@ -143,7 +140,6 @@ class PyRFContext(implicit sparkSession: SparkSession) extends RasterFunctions RasterJoin(df, other, joinExprs, leftExtent, leftCRS, rightExtent, rightCRS, m, None) } - /** * Convenience functions for use in Python */ diff --git a/rf-notebook/src/main/docker/Dockerfile b/rf-notebook/src/main/docker/Dockerfile index be0c95a8b..f00dc5acb 100644 --- a/rf-notebook/src/main/docker/Dockerfile +++ b/rf-notebook/src/main/docker/Dockerfile @@ -1,6 +1,5 @@ -# jupyter/scipy-notebook isn't semantically versioned. -# We pick this arbitrary one from Sept 2019 because it's what latest was on Oct 17 2019. -FROM jupyter/scipy-notebook:7a0c7325e470 +# Python version compatible with Spark and GDAL 3.1.2 +FROM jupyter/scipy-notebook:python-3.8.8 LABEL maintainer="Astraea, Inc. " @@ -8,14 +7,14 @@ USER root RUN \ apt-get -y update && \ - apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \ + apt-get install --no-install-recommends -y openjdk-11-jdk ca-certificates-java && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -ENV APACHE_SPARK_VERSION 2.4.7 -ENV HADOOP_VERSION 2.7 +ENV APACHE_SPARK_VERSION 3.1.2 +ENV HADOOP_VERSION 3.2 # On MacOS compute this with `shasum -a 512` -ARG APACHE_SPARK_CHECKSUM="0f5455672045f6110b030ce343c049855b7ba86c0ecb5e39a075ff9d093c7f648da55ded12e72ffe65d84c32dcd5418a6d764f2d6295a3f894a4286cc80ef478" +ARG APACHE_SPARK_CHECKSUM="2385cb772f21b014ce2abd6b8f5e815721580d6e8bc42a26d70bbcdda8d303d886a6f12b36d40f6971b5547b70fae62b5a96146f0421cb93d4e51491308ef5d5" ARG APACHE_SPARK_FILENAME="spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" ARG APACHE_SPARK_REMOTE_PATH="spark-${APACHE_SPARK_VERSION}/${APACHE_SPARK_FILENAME}" @@ -33,7 +32,7 @@ RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERS # Spark config ENV SPARK_HOME /usr/local/spark -ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip +ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info ENV RF_LIB_LOC=/usr/local/rasterframes diff --git a/rf-notebook/src/main/notebooks/Focal Operations.ipynb b/rf-notebook/src/main/notebooks/Focal Operations.ipynb new file mode 100644 index 000000000..262c685bf --- /dev/null +++ b/rf-notebook/src/main/notebooks/Focal Operations.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Focal Operations with RastrFrames Notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Spark Environment" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pyrasterframes\n", + "from pyrasterframes.utils import create_rf_spark_session\n", + "import pyrasterframes.rf_ipython # enables nicer visualizations of pandas DF\n", + "from pyrasterframes.rasterfunctions import *\n", + "import pyspark.sql.functions as F" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by bash)\n", + "bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by bash)\n", + "WARNING: An illegal reflective access operation has occurred\n", + "WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/usr/local/spark-3.1.2-bin-hadoop3.2/jars/spark-unsafe_2.12-3.1.2.jar) to constructor java.nio.DirectByteBuffer(long,int)\n", + "WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform\n", + "WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations\n", + "WARNING: All illegal access operations will be denied in a future release\n", + "21/09/30 03:19:33 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n", + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + } + ], + "source": [ + "spark = create_rf_spark_session()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get a PySpark DataFrame from elevation raster\n", + "\n", + "Read a single scene of elevation into DataFrame or raster tiles.\n", + "Each tile overlaps its neighbor by \"buffer_size\" of pixels, providing focal operations neighbor information around tile edges.\n", + "You can configure the default size of these tiles, by passing a tuple of desired columns and rows as: `raster(uri, tile_dimensions=(96, 96))`. The default is `(256, 256)`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "uri = 'https://geotrellis-demo.s3.us-east-1.amazonaws.com/cogs/harrisburg-pa/elevation.tif'\n", + "df = spark.read.raster(uri, tile_dimensions=(512, 512), buffer_size=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- proj_raster_path: string (nullable = false)\n", + " |-- proj_raster: struct (nullable = true)\n", + " | |-- tile: tile (nullable = true)\n", + " | |-- extent: struct (nullable = true)\n", + " | | |-- xmin: double (nullable = false)\n", + " | | |-- ymin: double (nullable = false)\n", + " | | |-- xmax: double (nullable = false)\n", + " | | |-- ymax: double (nullable = false)\n", + " | |-- crs: crs (nullable = true)\n", + "\n" + ] + } + ], + "source": [ + "df.printSchema()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The extent struct tells us where in the [CRS](https://spatialreference.org/ref/sr-org/6842/) the tile data covers. The granule is split into arbitrary sized chunks. Each row is a different chunk. Let's see how many." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "81" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Focal Operations\n", + "Additional transformations are complished through use of column functions.\n", + "The functions used here are mapped to their Scala implementation and applied per row.\n", + "For each row the source elevation data is fetched only once before it's used as input." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Showing only top 5 rows
rf_crs(proj_raster)rf_extent(proj_raster)rf_aspect(proj_raster)rf_slope(proj_raster, 1)rf_hillshade(proj_raster, 315, 45, 1)
utm-CS{240929.2154, 4398599.0319, 256289.2154, 4401599.0319}
utm-CS{210209.2154, 4432319.0319, 225569.2154, 4447679.0319}
utm-CS{256289.2154, 4416959.0319, 271649.2154, 4432319.0319}
utm-CS{271649.2154, 4509119.0319, 287009.2154, 4524479.0319}
utm-CS{333089.2154, 4398599.0319, 341969.2154, 4401599.0319}
" + ], + "text/markdown": [ + "\n", + "_Showing only top 5 rows_.\n", + "\n", + "| rf_crs(proj_raster) | rf_extent(proj_raster) | rf_aspect(proj_raster) | rf_slope(proj_raster, 1) | rf_hillshade(proj_raster, 315, 45, 1) |\n", + "|---|---|---|---|---|\n", + "| utm-CS | {240929.2154, 4398599.0319, 256289.2154, 4401599.0319} | | | |\n", + "| utm-CS | {210209.2154, 4432319.0319, 225569.2154, 4447679.0319} | | | |\n", + "| utm-CS | {256289.2154, 4416959.0319, 271649.2154, 4432319.0319} | | | |\n", + "| utm-CS | {271649.2154, 4509119.0319, 287009.2154, 4524479.0319} | | | |\n", + "| utm-CS | {333089.2154, 4398599.0319, 341969.2154, 4401599.0319} | | | |" + ], + "text/plain": [ + "DataFrame[rf_crs(proj_raster): udt, rf_extent(proj_raster): struct, rf_aspect(proj_raster): struct,crs:udt>, rf_slope(proj_raster, 1): struct,crs:udt>, rf_hillshade(proj_raster, 315, 45, 1): struct,crs:udt>]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.select(\n", + " rf_crs(df.proj_raster), \n", + " rf_extent(df.proj_raster), \n", + " rf_aspect(df.proj_raster), \n", + " rf_slope(df.proj_raster, z_factor=1), \n", + " rf_hillshade(df.proj_raster, azimuth=315, altitude=45, z_factor=1))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/rf-notebook/src/main/notebooks/STAC API Example.ipynb b/rf-notebook/src/main/notebooks/STAC API Example.ipynb new file mode 100644 index 000000000..3e5cf4e47 --- /dev/null +++ b/rf-notebook/src/main/notebooks/STAC API Example.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# STAC API with RasterFrames Notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Spark Environment" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pyrasterframes\n", + "from pyrasterframes.utils import create_rf_spark_session\n", + "import pyrasterframes.rf_ipython # enables nicer visualizations of pandas DF\n", + "from pyrasterframes.rasterfunctions import *\n", + "import pyspark.sql.functions as F\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by bash)\n", + "bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by bash)\n", + "WARNING: An illegal reflective access operation has occurred\n", + "WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/usr/local/spark-3.1.2-bin-hadoop3.2/jars/spark-unsafe_2.12-3.1.2.jar) to constructor java.nio.DirectByteBuffer(long,int)\n", + "WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform\n", + "WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations\n", + "WARNING: All illegal access operations will be denied in a future release\n", + "21/10/01 00:25:37 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n", + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + } + ], + "source": [ + "spark = create_rf_spark_session()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get a STAC API DataFrame\n", + "\n", + "Read a DataFrame that consists of STAC Items retrieved from the STAC API service." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# read assets from the landsat-8-l1-c1 collection\n", + "# due to the collection size and query parameters\n", + "# it makes sense to limit the amount of items retrieved from the STAC API\n", + "uri = 'https://earth-search.aws.element84.com/v0'\n", + "df = spark.read.stacapi(uri, {'collections': ['landsat-8-l1-c1']}).limit(100)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- id: string (nullable = true)\n", + " |-- stacVersion: string (nullable = true)\n", + " |-- stacExtensions: array (nullable = true)\n", + " | |-- element: string (containsNull = true)\n", + " |-- _type: string (nullable = true)\n", + " |-- geometry: geometry (nullable = true)\n", + " |-- bbox: struct (nullable = true)\n", + " | |-- xmin: double (nullable = true)\n", + " | |-- ymin: double (nullable = true)\n", + " | |-- xmax: double (nullable = true)\n", + " | |-- ymax: double (nullable = true)\n", + " |-- links: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- href: string (nullable = true)\n", + " | | |-- rel: string (nullable = true)\n", + " | | |-- _type: string (nullable = true)\n", + " | | |-- title: string (nullable = true)\n", + " | | |-- extensionFields: string (nullable = true)\n", + " |-- assets: map (nullable = true)\n", + " | |-- key: string\n", + " | |-- value: struct (valueContainsNull = true)\n", + " | | |-- href: string (nullable = true)\n", + " | | |-- title: string (nullable = true)\n", + " | | |-- description: string (nullable = true)\n", + " | | |-- roles: array (nullable = true)\n", + " | | | |-- element: string (containsNull = true)\n", + " | | |-- _type: string (nullable = true)\n", + " | | |-- extensionFields: string (nullable = true)\n", + " |-- collection: string (nullable = true)\n", + " |-- properties: struct (nullable = true)\n", + " | |-- datetime: struct (nullable = true)\n", + " | | |-- datetime: timestamp (nullable = true)\n", + " | | |-- start: timestamp (nullable = true)\n", + " | | |-- end: timestamp (nullable = true)\n", + " | | |-- _type: string (nullable = true)\n", + " | |-- title: string (nullable = true)\n", + " | |-- description: string (nullable = true)\n", + " | |-- created: timestamp (nullable = true)\n", + " | |-- updated: timestamp (nullable = true)\n", + " | |-- license: string (nullable = true)\n", + " | |-- providers: struct (nullable = true)\n", + " | | |-- head: struct (nullable = true)\n", + " | | | |-- name: string (nullable = true)\n", + " | | | |-- description: string (nullable = true)\n", + " | | | |-- roles: array (nullable = true)\n", + " | | | | |-- element: string (containsNull = true)\n", + " | | | |-- url: string (nullable = true)\n", + " | | |-- tail: array (nullable = true)\n", + " | | | |-- element: struct (containsNull = true)\n", + " | | | | |-- name: string (nullable = true)\n", + " | | | | |-- description: string (nullable = true)\n", + " | | | | |-- roles: array (nullable = true)\n", + " | | | | | |-- element: string (containsNull = true)\n", + " | | | | |-- url: string (nullable = true)\n", + " | |-- platform: string (nullable = true)\n", + " | |-- instruments: struct (nullable = true)\n", + " | | |-- head: string (nullable = true)\n", + " | | |-- tail: array (nullable = true)\n", + " | | | |-- element: string (containsNull = true)\n", + " | |-- constellation: string (nullable = true)\n", + " | |-- mission: string (nullable = true)\n", + " | |-- gsd: double (nullable = true)\n", + " | |-- extensionFields: string (nullable = true)\n", + "\n" + ] + } + ], + "source": [ + "df.printSchema()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each item in the DataFrame represents the entire STAC Item." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "100" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Showing only top 5 rows
idcollectiongeometry
LC08_L1TP_232093_20210716_20210717_01_T1landsat-8-l1-c1POLYGON ((-74.64766964714028 -46.3435154...
LC08_L1TP_232092_20210716_20210717_01_T1landsat-8-l1-c1POLYGON ((-74.07682865409966 -44.9166888...
LC08_L1TP_232091_20210716_20210717_01_T1landsat-8-l1-c1POLYGON ((-73.54155930424828 -43.4885910...
LC08_L1TP_232090_20210716_20210717_01_T1landsat-8-l1-c1POLYGON ((-73.02667875381594 -42.0589406...
LC08_L1TP_232089_20210716_20210717_01_T1landsat-8-l1-c1POLYGON ((-72.67424121162182 -40.6804236...
" + ], + "text/markdown": [ + "\n", + "_Showing only top 5 rows_.\n", + "\n", + "| id | collection | geometry |\n", + "|---|---|---|\n", + "| LC08_L1TP_232093_20210716_20210717_01_T1 | landsat-8-l1-c1 | POLYGON ((-74.64766964714028 -46.3435154... |\n", + "| LC08_L1TP_232092_20210716_20210717_01_T1 | landsat-8-l1-c1 | POLYGON ((-74.07682865409966 -44.9166888... |\n", + "| LC08_L1TP_232091_20210716_20210717_01_T1 | landsat-8-l1-c1 | POLYGON ((-73.54155930424828 -43.4885910... |\n", + "| LC08_L1TP_232090_20210716_20210717_01_T1 | landsat-8-l1-c1 | POLYGON ((-73.02667875381594 -42.0589406... |\n", + "| LC08_L1TP_232089_20210716_20210717_01_T1 | landsat-8-l1-c1 | POLYGON ((-72.67424121162182 -40.6804236... |" + ], + "text/plain": [ + "DataFrame[id: string, collection: string, geometry: udt]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.select(df.id, df.collection, df.geometry)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To read rasters we don't need STAC Items, but we need STAC Item Assets.\n", + "Each STAC Item in the DataFrame can contain more than a single asset => to covert such STAC Item DataFrame into the STAC Item Assets DataFrame we need to explode the assets column. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# select the first Landsat STAC Item\n", + "# explode its assets \n", + "# select blue, red, green, and nir assets only\n", + "# name each asset link as the band column\n", + "assets = df \\\n", + " .limit(1) \\\n", + " .select(df.id, F.explode(df.assets)) \\\n", + " .filter(F.col(\"key\").isin([\"B2\", \"B3\", \"B4\", \"B5\"])) \\\n", + " .select(F.col(\"value.href\").alias(\"band\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- band: string (nullable = true)\n", + "\n" + ] + } + ], + "source": [ + "assets.printSchema()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "assets.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# read rasters from the exploded STAC Assets DataFrame\n", + "# select only the blue asset to speed up notebook\n", + "rs = spark.read.raster(assets.limit(1), tile_dimensions=(512, 512), buffer_size=2, catalog_col_names=[\"band\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "256" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rs.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- band_path: string (nullable = false)\n", + " |-- band: struct (nullable = true)\n", + " | |-- tile: tile (nullable = true)\n", + " | |-- extent: struct (nullable = true)\n", + " | | |-- xmin: double (nullable = false)\n", + " | | |-- ymin: double (nullable = false)\n", + " | | |-- xmax: double (nullable = false)\n", + " | | |-- ymax: double (nullable = false)\n", + " | |-- crs: crs (nullable = true)\n", + "\n" + ] + } + ], + "source": [ + "rs.printSchema()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Focal Operations\n", + "Additional transformations are complished through use of column functions.\n", + "The functions used here are mapped to their Scala implementation and applied per row.\n", + "For each row the source elevation data is fetched only once before it's used as input." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Showing only top 5 rows
rf_crs(band)rf_extent(band)rf_aspect(band)rf_slope(band, 1)rf_hillshade(band, 315, 45, 1)
utm-CS{488445.0, -5335365.0, 503805.0, -5320005.0}
utm-CS{657405.0, -5335365.0, 672765.0, -5320005.0}
utm-CS{688125.0, -5335365.0, 703485.0, -5320005.0}
utm-CS{642045.0, -5197125.0, 657405.0, -5181765.0}
utm-CS{549885.0, -5366085.0, 565245.0, -5350725.0}
" + ], + "text/markdown": [ + "\n", + "_Showing only top 5 rows_.\n", + "\n", + "| rf_crs(band) | rf_extent(band) | rf_aspect(band) | rf_slope(band, 1) | rf_hillshade(band, 315, 45, 1) |\n", + "|---|---|---|---|---|\n", + "| utm-CS | {488445.0, -5335365.0, 503805.0, -5320005.0} | | | |\n", + "| utm-CS | {657405.0, -5335365.0, 672765.0, -5320005.0} | | | |\n", + "| utm-CS | {688125.0, -5335365.0, 703485.0, -5320005.0} | | | |\n", + "| utm-CS | {642045.0, -5197125.0, 657405.0, -5181765.0} | | | |\n", + "| utm-CS | {549885.0, -5366085.0, 565245.0, -5350725.0} | | | |" + ], + "text/plain": [ + "DataFrame[rf_crs(band): udt, rf_extent(band): struct, rf_aspect(band): struct,crs:udt>, rf_slope(band, 1): struct,crs:udt>, rf_hillshade(band, 315, 45, 1): struct,crs:udt>]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rs.select(\n", + " rf_crs(rs.band), \n", + " rf_extent(rs.band), \n", + " rf_aspect(rs.band), \n", + " rf_slope(rs.band, z_factor=1), \n", + " rf_hillshade(rs.band, azimuth=315, altitude=45, z_factor=1))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}