Skip to content

Commit

Permalink
Rehome listWindows as methods on GeoTiffSegmentLayout
Browse files Browse the repository at this point in the history
  • Loading branch information
echeipesh committed Nov 16, 2017
1 parent a25bc19 commit ec8f8ec
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 107 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@

package geotrellis.raster.io.geotiff

import geotrellis.raster.GridBounds
import geotrellis.raster.TileLayout
import geotrellis.raster.{GridBounds, RasterExtent, TileLayout, PixelIsArea}
import geotrellis.raster.rasterize.Rasterizer
import geotrellis.vector.{Extent, Geometry}
import scala.collection.mutable
import spire.syntax.cfor._


/**
* This case class represents how the segments in a given [[GeoTiff]] are arranged.
Expand Down Expand Up @@ -186,6 +189,95 @@ case class GeoTiffSegmentLayout(totalCols: Int, totalRows: Int, tileLayout: Tile
finalizePartition()
partitions.result
}

private def bestWindowSize(maxSize: Int, segment: Int): Int = {
var i: Int = 1
var result: Int = -1
// Search for the largest factor of segment that is > 1 and <=
// maxSize. If one cannot be found, give up and return maxSize.
while (i < math.sqrt(segment) && result == -1) {
if ((segment % i == 0) && ((segment/i) <= maxSize)) result = (segment/i)
i += 1
}
if (result == -1) maxSize; else result
}

def listWindows(maxSize: Int): Array[GridBounds] = {
val segCols = tileLayout.tileCols
val segRows = tileLayout.tileRows

val colSize: Int =
if (maxSize >= segCols * 2) {
math.floor(maxSize.toDouble / segCols).toInt * segCols
} else if (maxSize >= segCols) {
segCols
} else bestWindowSize(maxSize, segCols)

val rowSize: Int =
if (maxSize >= segRows * 2) {
math.floor(maxSize.toDouble / segRows).toInt * segRows
} else if (maxSize >= segRows) {
segRows
} else bestWindowSize(maxSize, segRows)

val windows = listWindows(colSize, rowSize)

windows
}

/** List all pixel windows that meet the given geometry */
def listWindows(maxSize: Int, extent: Extent, geometry: Geometry): Array[GridBounds] = {
val segCols = tileLayout.tileCols
val segRows = tileLayout.tileRows

val maxColSize: Int =
if (maxSize >= segCols * 2) {
math.floor(maxSize.toDouble / segCols).toInt * segCols
} else if (maxSize >= segCols) {
segCols
} else bestWindowSize(maxSize, segCols)

val maxRowSize: Int =
if (maxSize >= segRows) {
math.floor(maxSize.toDouble / segRows).toInt * segRows
} else if (maxSize >= segRows) {
segRows
} else bestWindowSize(maxSize, segRows)

val result = scala.collection.mutable.Set.empty[GridBounds]
val re = RasterExtent(extent, math.max(totalCols/maxColSize,1), math.max(totalRows/maxRowSize,1))
val options = Rasterizer.Options(includePartial=true, sampleType=PixelIsArea)

Rasterizer.foreachCellByGeometry(geometry, re, options)({ (col: Int, row: Int) =>
result +=
GridBounds(
col * maxColSize,
row * maxRowSize,
math.min((col+1)*maxColSize - 1, totalCols-1),
math.min((row+1)*maxRowSize - 1, totalRows-1)
)
})
result.toArray
}

/** List all pixel windows that cover a grid of given size */
def listWindows(cols: Int, rows: Int): Array[GridBounds] = {
val result = scala.collection.mutable.ArrayBuilder.make[GridBounds]
result.sizeHint((totalCols / cols) * (totalRows / rows))

cfor(0)(_ < totalCols, _ + cols) { col =>
cfor(0)(_ < totalRows, _ + rows) { row =>
result +=
GridBounds(
col,
row,
math.min(col + cols - 1, totalCols - 1),
math.min(row + rows - 1, totalRows - 1)
)
}
}
result.result
}
}

/**
Expand Down
7 changes: 4 additions & 3 deletions s3/src/main/scala/geotrellis/spark/io/s3/S3GeoTiffRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,12 @@ object S3GeoTiffRDD extends LazyLogging {
.flatMap { case (objectRequest, (cols, rows)) =>
val bucket = objectRequest.getBucketName
val key = objectRequest.getKey
val layout = sourceGeoTiffInfo.getGeoTiffInfo(s"s3://$bucket/$key").segmentLayout.tileLayout
val maxSize = getMaxSize(options)

RasterReader
.listWindows(cols, rows, maxSize, layout.tileCols, layout.tileRows)
sourceGeoTiffInfo
.getGeoTiffInfo(s"s3://$bucket/$key")
.segmentLayout
.listWindows(maxSize)
.map((objectRequest, _))
}

Expand Down
17 changes: 7 additions & 10 deletions spark/src/main/scala/geotrellis/spark/io/GeoTiffInfoReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ private [geotrellis] trait GeoTiffInfoReader extends LazyLogging {
geoTiffInfo
.flatMap { case (_, info) =>
RasterReader.listWindows(info.segmentLayout.totalCols, info.segmentLayout.totalRows, maxTileSize)
// TODO: really ?
// info.segmentLayout.listWindows(maxTileSize)
}
.length

Expand Down Expand Up @@ -76,24 +78,19 @@ private [geotrellis] trait GeoTiffInfoReader extends LazyLogging {
geometry: Option[Geometry]
)(implicit sc: SparkContext): RDD[(String, Array[GridBounds])] = {
geoTiffInfoRdd.flatMap({ uri =>
val md = getGeoTiffInfo(uri)
val cols = md.segmentLayout.totalCols
val rows = md.segmentLayout.totalRows
val segCols = md.segmentLayout.tileLayout.tileCols
val segRows = md.segmentLayout.tileLayout.tileRows
val cellType = md.cellType
val info = getGeoTiffInfo(uri)

val fileWindows =
val windows =
geometry match {
case Some(geometry) =>
val tags = getGeoTiffTags(uri)
val extent = tags.extent
RasterReader.listWindows(cols, rows, maxSize, extent, segCols, segRows, geometry)
info.segmentLayout.listWindows(maxSize, extent, geometry)
case None =>
RasterReader.listWindows(cols, rows, maxSize, segCols, segRows)
info.segmentLayout.listWindows(maxSize)
}

val partitions = md.segmentLayout.partitionWindowsBySegments(fileWindows, partitionBytes / cellType.bytes)
val partitions = info.segmentLayout.partitionWindowsBySegments(windows, partitionBytes / info.cellType.bytes)
partitions.map({ windows => (uri, windows)})
})
}
Expand Down
89 changes: 1 addition & 88 deletions spark/src/main/scala/geotrellis/spark/io/RasterReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import geotrellis.proj4._
import geotrellis.raster._
import geotrellis.raster.io.geotiff._
import geotrellis.raster.io.geotiff.reader.GeoTiffReader
import geotrellis.raster.rasterize.Rasterizer
import geotrellis.spark._
import geotrellis.util.{ByteReader, StreamingByteReader}
import geotrellis.vector._
Expand Down Expand Up @@ -61,94 +60,8 @@ object RasterReader {
}
}

private def best(maxSize: Int, segment: Int): Int = {
var i: Int = 1
var result: Int = -1
// Search for the largest factor of segment that is > 1 and <=
// maxSize. If one cannot be found, give up and return maxSize.
while (i < math.sqrt(segment) && result == -1) {
if ((segment % i == 0) && ((segment/i) <= maxSize)) result = (segment/i)
i += 1
}
if (result == -1) maxSize; else result
}

def listWindows(
cols: Int, rows: Int, maxSize: Int,
segCols: Int, segRows: Int
): Array[GridBounds] = {
val colSize: Int =
if (maxSize >= segCols * 2) {
math.floor(maxSize.toDouble / segCols).toInt * segCols
} else if (maxSize >= segCols) {
segCols
} else best(maxSize, segCols)

val rowSize: Int =
if (maxSize >= segRows * 2) {
math.floor(maxSize.toDouble / segRows).toInt * segRows
} else if (maxSize >= segRows) {
segRows
} else best(maxSize, segRows)

val windows = listWindows(cols, rows, colSize, rowSize)

windows
}

/** List all pixel windows that meet the given geometry */
def listWindows(
cols: Int, rows: Int, maxSize: Int,
extent: Extent, segCols: Int, segRows: Int, geometry: Geometry,
options: Rasterizer.Options = Rasterizer.Options.DEFAULT
): Array[GridBounds] = {
val maxColSize: Int =
if (maxSize >= segCols * 2) {
math.floor(maxSize.toDouble / segCols).toInt * segCols
} else if (maxSize >= segCols) {
segCols
} else best(maxSize, segCols)

val maxRowSize: Int =
if (maxSize >= segRows) {
math.floor(maxSize.toDouble / segRows).toInt * segRows
} else if (maxSize >= segRows) {
segRows
} else best(maxSize, segRows)

val result = scala.collection.mutable.ArrayBuffer[GridBounds]()
val re = RasterExtent(extent, math.max(cols/maxColSize,1), math.max(rows/maxRowSize,1))

Rasterizer.foreachCellByGeometry(geometry, re, options)({ (col: Int, row: Int) =>
result +=
GridBounds(
col * maxColSize,
row * maxRowSize,
math.min((col+1)*maxColSize - 1, cols-1),
math.min((row+1)*maxRowSize - 1, rows-1)
)
})
result.toArray
}

/** List all pixel windows that cover a grid of given size */
def listWindows(cols: Int, rows: Int, colSize: Int, rowSize: Int): Array[GridBounds] = {
val result = scala.collection.mutable.ArrayBuffer[GridBounds]()
cfor(0)(_ < cols, _ + colSize) { col =>
cfor(0)(_ < rows, _ + rowSize) { row =>
result +=
GridBounds(
col,
row,
math.min(col + colSize - 1, cols - 1),
math.min(row + rowSize - 1, rows - 1)
)
}
}
result.toArray
}

/** List all pixel windows that cover a grid of given size */
@deprecated("use GeoTiffSegmentLayout.listWindows instead", "1.2")
def listWindows(cols: Int, rows: Int, maxTileSize: Option[Int]): Array[GridBounds] = {
val result = scala.collection.mutable.ArrayBuffer[GridBounds]()
maxTileSize match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,13 @@ object HadoopGeoTiffRDD extends LazyLogging {
val windows: RDD[(Path, GridBounds)] =
pathsToDimensions
.flatMap { case (objectRequest, (cols, rows)) =>
val info = HadoopGeoTiffInfoReader(objectRequest.toString, conf, options.tiffExtensions)
val layout = info.getGeoTiffInfo(objectRequest.toString).segmentLayout.tileLayout
val infoReader = HadoopGeoTiffInfoReader(objectRequest.toString, conf, options.tiffExtensions)
val maxSize = getMaxSize(options)

RasterReader
.listWindows(cols, rows, maxSize, layout.tileCols, layout.tileRows)
infoReader
.getGeoTiffInfo(objectRequest.toString)
.segmentLayout
.listWindows(maxSize)
.map((objectRequest, _))
}

Expand Down

0 comments on commit ec8f8ec

Please sign in to comment.