-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: Added parallel read functionality Design doc - https://docs.google.com/document/d/1X11i1dV0V5Mf7G0UwNIuBApjEx0CitdtaQtK9vF-ws8/edit?usp=sharing Disabled sort/limit pushdown as they may work incorrectly with parallel read. Task to investigate sort/limit pushdown - https://memsql.atlassian.net/browse/PLAT-5893 **Design doc/spec**: **Docs impact**: none Test Plan: Now all tests are trying to use ReadFromAggregators and if they can - then ReadFromLeaves. Added special workflow that tests ReadFromLeaves. https://webapp.io/memsql/commits?query=repo%3Asinglestore-spark-connector+id%3A26 Reviewers: carl, cchen, pmishchenko-ua Reviewed By: carl Subscribers: rob, jprice, engineering-list JIRA Issues: PLAT-5844 Differential Revision: https://grizzly.internal.memcompute.com/D52565
- Loading branch information
1 parent
c78a5dc
commit 9c5ccf8
Showing
22 changed files
with
988 additions
and
338 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
src/main/scala-sparkv3.0/com/singlestore/spark/MaxNumConcurrentTasks.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package org.apache.spark.scheduler | ||
|
||
import org.apache.spark.rdd.RDD | ||
|
||
object MaxNumConcurrentTasks { | ||
def get(rdd: RDD[_]): Int = { | ||
rdd.sparkContext.maxNumConcurrentTasks() | ||
} | ||
} |
13 changes: 13 additions & 0 deletions
13
src/main/scala-sparkv3.1/com/singlestore/spark/MaxNumConcurrentTasks.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package org.apache.spark.scheduler | ||
|
||
import org.apache.spark.rdd.RDD | ||
|
||
object MaxNumConcurrentTasks { | ||
def get(rdd: RDD[_]): Int = { | ||
val (_, resourceProfiles) = | ||
rdd.sparkContext.dagScheduler.getShuffleDependenciesAndResourceProfiles(rdd) | ||
val resourceProfile = | ||
rdd.sparkContext.dagScheduler.mergeResourceProfilesForStage(resourceProfiles) | ||
rdd.sparkContext.maxNumConcurrentTasks(resourceProfile) | ||
} | ||
} |
145 changes: 145 additions & 0 deletions
145
src/main/scala/com/singlestore/spark/AggregatorParallelReadListener.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
package com.singlestore.spark | ||
|
||
import java.sql.Connection | ||
|
||
import com.singlestore.spark.SQLGen.VariableList | ||
import org.apache.spark.SparkContext | ||
import org.apache.spark.scheduler.{ | ||
SparkListener, | ||
SparkListenerStageCompleted, | ||
SparkListenerStageSubmitted | ||
} | ||
import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils} | ||
import org.apache.spark.sql.types.StructType | ||
|
||
import scala.collection.mutable | ||
|
||
class AggregatorParallelReadListener(applicationId: String) extends SparkListener with LazyLogging { | ||
// connectionsMap is a map from the result table name to the connection with which this table was created | ||
private val connectionsMap: mutable.Map[String, Connection] = | ||
new mutable.HashMap[String, Connection]() | ||
|
||
// rddInfos is a map from RDD id to the info needed to create result table for this RDD | ||
private val rddInfos: mutable.Map[Int, SingleStoreRDDInfo] = | ||
new mutable.HashMap[Int, SingleStoreRDDInfo]() | ||
|
||
// SingleStoreRDDInfo is information needed to create a result table | ||
private case class SingleStoreRDDInfo(query: String, | ||
variables: VariableList, | ||
schema: StructType, | ||
connectionOptions: JDBCOptions, | ||
materialized: Boolean, | ||
needsRepartition: Boolean) | ||
|
||
def addRDDInfo(rdd: SinglestoreRDD): Unit = { | ||
rddInfos.synchronized({ | ||
rddInfos += (rdd.id -> SingleStoreRDDInfo( | ||
rdd.query, | ||
rdd.variables, | ||
rdd.schema, | ||
JdbcHelpers.getDDLJDBCOptions(rdd.options), | ||
rdd.parallelReadType.contains(ReadFromAggregatorsMaterialized), | ||
rdd.options.parallelReadRepartition | ||
)) | ||
}) | ||
} | ||
|
||
def deleteRDDInfo(rdd: SinglestoreRDD): Unit = { | ||
rddInfos.synchronized({ | ||
rddInfos -= rdd.id | ||
}) | ||
} | ||
|
||
def isEmpty: Boolean = { | ||
rddInfos.synchronized({ | ||
rddInfos.isEmpty | ||
}) | ||
} | ||
|
||
override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = { | ||
stageSubmitted.stageInfo.rddInfos.foreach(rddInfo => { | ||
if (rddInfo.name == "SinglestoreRDD") { | ||
rddInfos | ||
.synchronized( | ||
rddInfos.get(rddInfo.id) | ||
) | ||
.foreach(singleStoreRDDInfo => { | ||
val stageId = stageSubmitted.stageInfo.stageId | ||
val tableName = JdbcHelpers.getResultTableName(applicationId, stageId, rddInfo.id) | ||
|
||
// Create connection and save it in the map | ||
val conn = JdbcUtils.createConnectionFactory(singleStoreRDDInfo.connectionOptions)() | ||
connectionsMap.synchronized( | ||
connectionsMap += (tableName -> conn) | ||
) | ||
|
||
// Create result table | ||
JdbcHelpers.createResultTable( | ||
conn, | ||
tableName, | ||
singleStoreRDDInfo.query, | ||
singleStoreRDDInfo.schema, | ||
singleStoreRDDInfo.variables, | ||
singleStoreRDDInfo.materialized, | ||
singleStoreRDDInfo.needsRepartition | ||
) | ||
}) | ||
} | ||
}) | ||
} | ||
|
||
override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = { | ||
stageCompleted.stageInfo.rddInfos.foreach(rddInfo => { | ||
if (rddInfo.name == "SinglestoreRDD") { | ||
val stageId = stageCompleted.stageInfo.stageId | ||
val tableName = JdbcHelpers.getResultTableName(applicationId, stageId, rddInfo.id) | ||
|
||
connectionsMap.synchronized( | ||
connectionsMap | ||
.get(tableName) | ||
.foreach(conn => { | ||
// Drop result table | ||
JdbcHelpers.dropResultTable(conn, tableName) | ||
// Close connection | ||
conn.close() | ||
// Delete connection from map | ||
connectionsMap -= tableName | ||
}) | ||
) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
case object AggregatorParallelReadListenerAdder { | ||
// listeners is a map from SparkContext hash code to the listener associated with this SparkContext | ||
private val listeners = new mutable.HashMap[SparkContext, AggregatorParallelReadListener]() | ||
|
||
def addRDD(rdd: SinglestoreRDD): Unit = { | ||
this.synchronized({ | ||
val listener = listeners.getOrElse( | ||
rdd.sparkContext, { | ||
val newListener = new AggregatorParallelReadListener(rdd.sparkContext.applicationId) | ||
rdd.sparkContext.addSparkListener(newListener) | ||
listeners += (rdd.sparkContext -> newListener) | ||
newListener | ||
} | ||
) | ||
listener.addRDDInfo(rdd) | ||
}) | ||
} | ||
|
||
def deleteRDD(rdd: SinglestoreRDD): Unit = { | ||
this.synchronized({ | ||
listeners | ||
.get(rdd.sparkContext) | ||
.foreach(listener => { | ||
listener.deleteRDDInfo(rdd) | ||
if (listener.isEmpty) { | ||
listeners -= rdd.sparkContext | ||
rdd.sparkContext.removeSparkListener(listener) | ||
} | ||
}) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
src/main/scala/com/singlestore/spark/ParallelReadEnablement.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package com.singlestore.spark | ||
|
||
sealed trait ParallelReadEnablement | ||
|
||
case object Disabled extends ParallelReadEnablement | ||
case object Automatic extends ParallelReadEnablement | ||
case object Forced extends ParallelReadEnablement | ||
|
||
object ParallelReadEnablement { | ||
def apply(value: String): ParallelReadEnablement = value.toLowerCase match { | ||
case "disabled" => Disabled | ||
case "automatic" => Automatic | ||
case "forced" => Forced | ||
|
||
// These two options are added for compatibility purposes | ||
case "false" => Disabled | ||
case "true" => Automatic | ||
|
||
case _ => | ||
throw new IllegalArgumentException( | ||
s"""Illegal argument for `${SinglestoreOptions.ENABLE_PARALLEL_READ}` option. Valid arguments are: | ||
| - "Disabled" | ||
| - "Automatic" | ||
| - "Forced"""".stripMargin) | ||
} | ||
} |
21 changes: 21 additions & 0 deletions
21
src/main/scala/com/singlestore/spark/ParallelReadType.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package com.singlestore.spark | ||
|
||
sealed trait ParallelReadType | ||
|
||
case object ReadFromLeaves extends ParallelReadType | ||
case object ReadFromAggregators extends ParallelReadType | ||
case object ReadFromAggregatorsMaterialized extends ParallelReadType | ||
|
||
object ParallelReadType { | ||
def apply(value: String): ParallelReadType = value.toLowerCase match { | ||
case "readfromleaves" => ReadFromLeaves | ||
case "readfromaggregators" => ReadFromAggregators | ||
case "readfromaggregatorsmaterialized" => ReadFromAggregatorsMaterialized | ||
case _ => | ||
throw new IllegalArgumentException( | ||
s"""Illegal argument for `${SinglestoreOptions.PARALLEL_READ_FEATURES}` option. Valid arguments are: | ||
| - "ReadFromLeaves" | ||
| - "ReadFromAggregators" | ||
| - "ReadFromAggregatorsMaterialized"""".stripMargin) | ||
} | ||
} |
Oops, something went wrong.