Skip to content

Commit

Permalink
Merge pull request #146 from jpolchlo/feature/list-of-geotiffs
Browse files Browse the repository at this point in the history
Allow loading multiple GeoTIFFs via `get`
  • Loading branch information
echeipesh committed May 2, 2017
2 parents bfda044 + 8419629 commit e92703e
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@ package geopyspark.geotrellis.io.geotiff
import geopyspark.geotrellis._

import geotrellis.proj4._
import geotrellis.spark.io.avro._
import geotrellis.spark.io.hadoop._
import geotrellis.spark.io.s3._
import geotrellis.spark.io.s3.testkit._

import scala.collection.JavaConverters._
import scala.collection.JavaConversions._

import java.net.URI
import java.util.Map
import scala.reflect._

import org.apache.spark._
import org.apache.hadoop.fs.Path


object GeoTiffRDD {
import Constants._

Expand Down Expand Up @@ -82,32 +84,34 @@ object GeoTiffRDD {
def get(
sc: SparkContext,
keyType: String,
path: String
): RasterRDD[_] = {
val uri = new URI(path)

uri.getScheme match {
case S3 =>
getS3GeoTiffRDD(sc, keyType, uri, S3GeoTiffRDDOptions.default)
case _ =>
getHadoopGeoTiffRDD(sc, keyType, new Path(path), HadoopGeoTiffRDDOptions.default)
}
}

def get(
sc: SparkContext,
keyType: String,
path: String,
paths: java.util.List[String],
options: java.util.Map[String, Any]
): RasterRDD[_] = {
val uri = new URI(path)

uri.getScheme match {
case S3 =>
getS3GeoTiffRDD(sc, keyType, uri, S3GeoTiffRDDOptions.setValues(options))
case _ =>
getHadoopGeoTiffRDD(sc, keyType, new Path(path), HadoopGeoTiffRDDOptions.setValues(options))
}
val uris = paths.map{ path => new URI(path) }

uris
.map { uri =>
uri.getScheme match {
case S3 =>
if (options isEmpty)
getS3GeoTiffRDD(sc, keyType, uri, S3GeoTiffRDDOptions.default)
else
getS3GeoTiffRDD(sc, keyType, uri, S3GeoTiffRDDOptions.setValues(options))
case _ =>
if (options isEmpty)
getHadoopGeoTiffRDD(sc, keyType, new Path(uri), HadoopGeoTiffRDDOptions.default)
else
getHadoopGeoTiffRDD(sc, keyType, new Path(uri), HadoopGeoTiffRDDOptions.setValues(options))
}
}
.reduce{ (r1, r2) =>
keyType match {
case PROJECTEDEXTENT =>
ProjectedRasterRDD(r1.asInstanceOf[ProjectedRasterRDD].rdd.union(r2.asInstanceOf[ProjectedRasterRDD].rdd))
case TEMPORALPROJECTEDEXTENT =>
TemporalRasterRDD(r1.asInstanceOf[TemporalRasterRDD].rdd.union(r2.asInstanceOf[TemporalRasterRDD].rdd))
}
}
}

private def getHadoopGeoTiffRDD(
Expand Down
28 changes: 21 additions & 7 deletions geopyspark/geotrellis/geotiff_rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

from geopyspark.geotrellis.rdd import RasterRDD
from functools import reduce

def get(geopysc,
rdd_type,
Expand Down Expand Up @@ -72,13 +73,26 @@ def get(geopysc,
options = kwargs

if options:
srdd = geotiff_rdd.get(geopysc.sc,
key,
uri,
options)
if isinstance(uri, list):
srdd = geotiff_rdd.get(geopysc.sc,
key,
uri,
options)
else:
srdd = geotiff_rdd.get(geopysc.sc,
key,
[uri],
options)
else:
srdd = geotiff_rdd.get(geopysc.sc,
key,
uri)
if isinstance(uri, list):
srdd = geotiff_rdd.get(geopysc.sc,
key,
uri,
{})
else:
srdd = geotiff_rdd.get(geopysc.sc,
key,
[uri],
{})

return RasterRDD(geopysc, rdd_type, srdd)

0 comments on commit e92703e

Please sign in to comment.