/
S3LayerReader.scala
93 lines (79 loc) · 3.51 KB
/
S3LayerReader.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*
* Copyright 2016 Azavea
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package geotrellis.spark.store.s3
import geotrellis.layer._
import geotrellis.store._
import geotrellis.store.util._
import geotrellis.store.avro._
import geotrellis.store.index._
import geotrellis.store.s3._
import geotrellis.spark._
import geotrellis.spark.store._
import geotrellis.util._
import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.SparkContext
import software.amazon.awssdk.services.s3.S3Client
import software.amazon.awssdk.services.s3.model._
import io.circe._
import scala.concurrent.ExecutionContext
import scala.reflect.ClassTag
/**
* Handles reading raster RDDs and their metadata from S3.
*
* @param attributeStore AttributeStore that contains metadata for corresponding LayerId
* @tparam K Type of RDD Key (ex: SpatialKey)
* @tparam V Type of RDD Value (ex: Tile or MultibandTile )
* @tparam M Type of Metadata associated with the RDD[(K,V)]
*/
class S3LayerReader(
val attributeStore: AttributeStore,
s3Client: => S3Client = S3ClientProducer.get(),
executionContext: => ExecutionContext = BlockingThreadPool.executionContext
)(implicit sc: SparkContext)
extends FilteringLayerReader[LayerId] with LazyLogging {
val defaultNumPartitions = sc.defaultParallelism
def rddReader: S3RDDReader = new S3RDDReader(s3Client, executionContext)
def read[
K: AvroRecordCodec: Boundable: Decoder: ClassTag,
V: AvroRecordCodec: ClassTag,
M: Decoder: Component[?, Bounds[K]]
](id: LayerId, tileQuery: LayerQuery[K, M], numPartitions: Int, filterIndexOnly: Boolean) = {
if(!attributeStore.layerExists(id)) throw new LayerNotFoundError(id)
val LayerAttributes(header, metadata, keyIndex, writerSchema) = try {
attributeStore.readLayerAttributes[S3LayerHeader, M, K](id)
} catch {
case e: AttributeNotFoundError => throw new LayerReadError(id).initCause(e)
case e: NoSuchBucketException => throw new LayerReadError(id).initCause(e)
}
val bucket = header.bucket
val prefix = header.key
val queryKeyBounds = tileQuery(metadata)
val layerMetadata = metadata.setComponent[Bounds[K]](queryKeyBounds.foldLeft(EmptyBounds: Bounds[K])(_ combine _))
val maxWidth = Index.digits(keyIndex.toIndex(keyIndex.keyBounds.maxKey))
val keyPath = (index: BigInt) => makePath(prefix, Index.encode(index, maxWidth))
val decompose = (bounds: KeyBounds[K]) => keyIndex.indexRanges(bounds)
val rdd = rddReader.read[K, V](bucket, keyPath, queryKeyBounds, decompose, filterIndexOnly, Some(writerSchema), Some(3))
new ContextRDD(rdd, layerMetadata)
}
}
object S3LayerReader {
def apply(attributeStore: AttributeStore, s3Client: => S3Client)(implicit sc: SparkContext): S3LayerReader =
new S3LayerReader(attributeStore, s3Client)
def apply(bucket: String, prefix: String, s3Client: => S3Client)(implicit sc: SparkContext): S3LayerReader = {
val attStore = new S3AttributeStore(bucket, prefix, s3Client)
apply(attStore, s3Client)
}
}