Skip to content

Commit

Permalink
Tweaks for Spark 3; drop Spark 2.3
Browse files Browse the repository at this point in the history
  • Loading branch information
srowen committed Nov 24, 2019
1 parent a5fad93 commit 637d41f
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 13 deletions.
6 changes: 3 additions & 3 deletions .travis.yml
Expand Up @@ -5,10 +5,10 @@ script: mvn ${SPARK} -Dmaven.javadoc.skip=true -B clean compile
jdk: openjdk8
matrix:
include:
# Covers Spark 2.3.x
# Covers Spark 2.4.x + Scala 2.11
- env: SPARK=
# Covers Spark 2.4.x + Scala 2.12
- env: SPARK="-Pspark-2.4 -Pscala-2.12"
# Covers Spark 3.0.x + Scala 2.12
- env: SPARK="-Pspark-3.0 -Pscala-2.12"
cache:
directories:
- $HOME/.m2
Expand Up @@ -107,7 +107,7 @@ class RunKMeans(private val spark: SparkSession) {
val pipeline = new Pipeline().setStages(Array(assembler, kmeans))

val kmeansModel = pipeline.fit(data).stages.last.asInstanceOf[KMeansModel]
kmeansModel.computeCost(assembler.transform(data)) / data.count()
kmeansModel.summary.trainingCost
}

def clusteringScore1(data: DataFrame, k: Int): Double = {
Expand All @@ -126,7 +126,7 @@ class RunKMeans(private val spark: SparkSession) {
val pipeline = new Pipeline().setStages(Array(assembler, kmeans))

val kmeansModel = pipeline.fit(data).stages.last.asInstanceOf[KMeansModel]
kmeansModel.computeCost(assembler.transform(data)) / data.count()
kmeansModel.summary.trainingCost
}

def clusteringTake1(data: DataFrame): Unit = {
Expand Down Expand Up @@ -161,7 +161,7 @@ class RunKMeans(private val spark: SparkSession) {
val pipelineModel = pipeline.fit(data)

val kmeansModel = pipelineModel.stages.last.asInstanceOf[KMeansModel]
kmeansModel.computeCost(pipelineModel.transform(data)) / data.count()
kmeansModel.summary.trainingCost
}

def clusteringTake2(data: DataFrame): Unit = {
Expand Down Expand Up @@ -215,7 +215,7 @@ class RunKMeans(private val spark: SparkSession) {
val pipelineModel = pipeline.fit(data)

val kmeansModel = pipelineModel.stages.last.asInstanceOf[KMeansModel]
kmeansModel.computeCost(pipelineModel.transform(data)) / data.count()
kmeansModel.summary.trainingCost
}

def clusteringTake3(data: DataFrame): Unit = {
Expand Down
Expand Up @@ -225,7 +225,7 @@ class RunRisk(private val spark: SparkSession) {
val bandwidth = 1.06 * stddev * math.pow(samples.size, -.2)

// Using toList before toArray avoids a Scala bug
val domain = Range.Double(min, max, (max - min) / 100).toList.toArray
val domain = Range.BigDecimal(min, max, (max - min) / 100).map(_.toDouble).toList.toArray
val kd = new KernelDensity().
setSample(samples.toSeq.toDS.rdd).
setBandwidth(bandwidth)
Expand All @@ -248,7 +248,7 @@ class RunRisk(private val spark: SparkSession) {
val bandwidth = 1.06 * stddev * math.pow(count, -.2)

// Using toList before toArray avoids a Scala bug
val domain = Range.Double(min, max, (max - min) / 100).toList.toArray
val domain = Range.BigDecimal(min, max, (max - min) / 100).map(_.toDouble).toList.toArray
val kd = new KernelDensity().
setSample(samples.rdd).
setBandwidth(bandwidth)
Expand Down
8 changes: 4 additions & 4 deletions pom.xml
Expand Up @@ -198,7 +198,7 @@
<java.version>1.8</java.version>
<scala.minor.version>2.11</scala.minor.version>
<scala.complete.version>${scala.minor.version}.12</scala.complete.version>
<spark.version>2.3.3</spark.version>
<spark.version>2.4.4</spark.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -416,16 +416,16 @@

<profiles>
<profile>
<id>spark-2.4</id>
<id>spark-3.0</id>
<properties>
<spark.version>2.4.3</spark.version>
<spark.version>3.0.0-preview</spark.version>
</properties>
</profile>
<profile>
<id>scala-2.12</id>
<properties>
<scala.minor.version>2.12</scala.minor.version>
<scala.complete.version>${scala.minor.version}.8</scala.complete.version>
<scala.complete.version>${scala.minor.version}.10</scala.complete.version>
</properties>
</profile>
</profiles>
Expand Down

0 comments on commit 637d41f

Please sign in to comment.