-
Notifications
You must be signed in to change notification settings - Fork 362
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New opt-in partitioning strategy that may help with read optimization… #2855
Changes from 7 commits
718486f
69c33e7
c40619d
c598ed3
afafcdf
2dbe1a0
daa1350
5e034b3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,16 +21,12 @@ import geotrellis.spark.io._ | |
import geotrellis.spark.io.avro.codecs.KeyValueRecordCodec | ||
import geotrellis.spark.io.avro.{AvroEncoder, AvroRecordCodec} | ||
import geotrellis.spark.io.cassandra.conf.CassandraConfig | ||
import geotrellis.spark.io.index.MergeQueue | ||
import geotrellis.spark.io.index.{KeyIndex, MergeQueue} | ||
import geotrellis.spark.util.KryoWrapper | ||
|
||
import org.apache.avro.Schema | ||
import com.datastax.driver.core.querybuilder.QueryBuilder | ||
import com.datastax.driver.core.querybuilder.QueryBuilder.{eq => eqs} | ||
|
||
import scala.collection.JavaConverters._ | ||
import scala.reflect.ClassTag | ||
|
||
import java.math.BigInteger | ||
|
||
object CassandraCollectionReader { | ||
|
@@ -45,10 +41,13 @@ object CassandraCollectionReader { | |
decomposeBounds: KeyBounds[K] => Seq[(BigInt, BigInt)], | ||
filterIndexOnly: Boolean, | ||
writerSchema: Option[Schema] = None, | ||
keyIndex: KeyIndex[K], | ||
threads: Int = defaultThreadCount | ||
): Seq[(K, V)] = { | ||
if (queryKeyBounds.isEmpty) return Seq.empty[(K, V)] | ||
|
||
val indexStrategy = new CassandraIndexing[K](keyIndex, instance.cassandraConfig.tilesPerPartition) | ||
|
||
val includeKey = (key: K) => queryKeyBounds.includeKey(key) | ||
val _recordCodec = KeyValueRecordCodec[K, V] | ||
val kwWriterSchema = KryoWrapper(writerSchema) //Avro Schema is not Serializable | ||
|
@@ -58,18 +57,20 @@ object CassandraCollectionReader { | |
else | ||
queryKeyBounds.flatMap(decomposeBounds) | ||
|
||
val query = QueryBuilder.select("value") | ||
.from(keyspace, table) | ||
.where(eqs("key", QueryBuilder.bindMarker())) | ||
.and(eqs("name", layerId.name)) | ||
.and(eqs("zoom", layerId.zoom)) | ||
.toString | ||
val query = indexStrategy.queryValueStatement( | ||
instance.cassandraConfig.indexStrategy, | ||
keyspace, table, layerId.name, layerId.zoom | ||
) | ||
|
||
instance.withSessionDo { session => | ||
val statement = session.prepare(query) | ||
val statement = indexStrategy.prepareQuery(query)(session) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made the evaluation of Maybe I'm overthinking this... but not evaluating my Cassandra sessions lazily has been something that's bitten me before in other Spark code. |
||
|
||
LayerReader.njoin[K, V](ranges.toIterator, threads){ index: BigInt => | ||
val row = session.execute(statement.bind(index: BigInteger)) | ||
val row = session.execute(indexStrategy.bindQuery( | ||
instance.cassandraConfig.indexStrategy, | ||
statement, index: BigInteger | ||
)) | ||
|
||
if (row.asScala.nonEmpty) { | ||
val bytes = row.one().getBytes("value").array() | ||
val recs = AvroEncoder.fromBinary(kwWriterSchema.value.getOrElse(_recordCodec.schema), bytes)(_recordCodec) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was able to reduce the number of required bounds/evidence parameters to just the ClassTag itself... evidently that's all that is required to look up the
KeyIndex
.I'm curious as to whether the metadata in the attributes table that's already loaded in the
delete
implementation contains the required classname so that we could materialize the class/type required at runtime rather than have to rely upon an API change.Even if that information is available though, I may need an assist using it. I'm not sure how I'd go from a reflective call to look up a class by name to a type constructor argument required for the
KeyIndex
lookup.