[SPARK-6765] Fix test code style for mllib.

So we can turn style checker on for test code. Author: Reynold Xin <rxin@databricks.com> Closes apache#5411 from rxin/test-style-mllib and squashes the following commits: d8a2569 [Reynold Xin] [SPARK-6765] Fix test code style for mllib.
nemccarthy · Apr 8, 2015 · 66159c3 · 66159c3
1 parent 8d812f9
commit 66159c3
Show file tree

Hide file tree

Showing 12 changed files with 40 additions and 28 deletions.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
@@ -28,7 +28,7 @@ class TestParams extends Params {
   def setInputCol(value: String): this.type = { set(inputCol, value); this }
   def getInputCol: String = get(inputCol)
 
-  override def validate(paramMap: ParamMap) = {
+  override def validate(paramMap: ParamMap): Unit = {
     val m = this.paramMap ++ paramMap
     require(m(maxIter) >= 0)
     require(m.contains(inputCol))

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -102,7 +102,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
   def validateModelFit(
       piData: Array[Double],
       thetaData: Array[Array[Double]],
-      model: NaiveBayesModel) = {
+      model: NaiveBayesModel): Unit = {
     def closeFit(d1: Double, d2: Double, precision: Double): Boolean = {
       (d1 - d2).abs <= precision
     }

diff --git a/...c/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/...c/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.streaming.TestSuiteBase
 class StreamingLogisticRegressionSuite extends FunSuite with TestSuiteBase {
 
   // use longer wait time to ensure job completion
-  override def maxWaitTimeMillis = 30000
+  override def maxWaitTimeMillis: Int = 30000
 
   // Test if we can accurately learn B for Y = logistic(BX) on streaming data
   test("parameter accuracy") {

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -199,9 +199,13 @@ class KMeansSuite extends FunSuite with MLlibTestSparkContext {
   test("k-means|| initialization") {
 
     case class VectorWithCompare(x: Vector) extends Ordered[VectorWithCompare] {
-      @Override def compare(that: VectorWithCompare): Int = {
-        if(this.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x) >
-          that.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x)) -1 else 1
+      override def compare(that: VectorWithCompare): Int = {
+        if (this.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x) >
+          that.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x)) {
+          -1
+        } else {
+          1
+        }
       }
     }
 

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.clustering
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.linalg.{DenseMatrix, Matrix, Vectors}
+import org.apache.spark.mllib.linalg.{Vector, DenseMatrix, Matrix, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
@@ -141,7 +141,7 @@ private[clustering] object LDASuite {
     (terms.toArray, termWeights.toArray)
   }
 
-  def tinyCorpus = Array(
+  def tinyCorpus: Array[(Long, Vector)] = Array(
     Vectors.dense(1, 3, 0, 2, 8),
     Vectors.dense(0, 2, 1, 0, 4),
     Vectors.dense(2, 3, 12, 3, 1),

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.random.XORShiftRandom
 
 class StreamingKMeansSuite extends FunSuite with TestSuiteBase {
 
-  override def maxWaitTimeMillis = 30000
+  override def maxWaitTimeMillis: Int = 30000
 
   test("accuracy for single center and equivalence to grand average") {
     // set parameters
@@ -59,7 +59,7 @@ class StreamingKMeansSuite extends FunSuite with TestSuiteBase {
     // estimated center from streaming should exactly match the arithmetic mean of all data points
     // because the decay factor is set to 1.0
     val grandMean =
-      input.flatten.map(x => x.toBreeze).reduce(_+_) / (numBatches * numPoints).toDouble
+      input.flatten.map(x => x.toBreeze).reduce(_ + _) / (numBatches * numPoints).toDouble
     assert(model.latestModel().clusterCenters(0) ~== Vectors.dense(grandMean.toArray) absTol 1E-5)
   }
 

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
@@ -181,7 +181,8 @@ class RandomRDDsSuite extends FunSuite with MLlibTestSparkContext with Serializa
       val poisson = RandomRDDs.poissonVectorRDD(sc, poissonMean, rows, cols, parts, seed)
       testGeneratedVectorRDD(poisson, rows, cols, parts, poissonMean, math.sqrt(poissonMean), 0.1)
 
-      val exponential = RandomRDDs.exponentialVectorRDD(sc, exponentialMean, rows, cols, parts, seed)
+      val exponential =
+        RandomRDDs.exponentialVectorRDD(sc, exponentialMean, rows, cols, parts, seed)
       testGeneratedVectorRDD(exponential, rows, cols, parts, exponentialMean, exponentialMean, 0.1)
 
       val gamma = RandomRDDs.gammaVectorRDD(sc, gammaShape, gammaScale, rows, cols, parts, seed)
@@ -197,7 +198,7 @@ private[random] class MockDistro extends RandomDataGenerator[Double] {
   // This allows us to check that each partition has a different seed
   override def nextValue(): Double = seed.toDouble
 
-  override def setSeed(seed: Long) = this.seed = seed
+  override def setSeed(seed: Long): Unit = this.seed = seed
 
   override def copy(): MockDistro = new MockDistro
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -203,6 +203,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext {
    * @param numProductBlocks number of product blocks to partition products into
    * @param negativeFactors whether the generated user/product factors can have negative entries
    */
+  // scalastyle:off
   def testALS(
       users: Int,
       products: Int,
@@ -216,6 +217,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext {
       numUserBlocks: Int = -1,
       numProductBlocks: Int = -1,
       negativeFactors: Boolean = true) {
+    // scalastyle:on
+
     val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
       features, samplingRate, implicitPrefs, negativeWeights, negativeFactors)
 

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -35,7 +35,7 @@ private object RidgeRegressionSuite {
 
 class RidgeRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
-  def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]) = {
+  def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = {
     predictions.zip(input).map { case (prediction, expected) =>
       (prediction - expected.label) * (prediction - expected.label)
     }.reduceLeft(_ + _) / predictions.size

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.streaming.TestSuiteBase
 class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {
 
   // use longer wait time to ensure job completion
-  override def maxWaitTimeMillis = 20000
+  override def maxWaitTimeMillis: Int = 20000
 
   // Assert that two values are equal within tolerance epsilon
   def assertEqual(v1: Double, v2: Double, epsilon: Double) {

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
@@ -95,16 +95,16 @@ object TestingUtils {
     /**
      * Comparison using absolute tolerance.
      */
-    def absTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(AbsoluteErrorComparison,
-      x, eps, ABS_TOL_MSG)
+    def absTol(eps: Double): CompareDoubleRightSide =
+      CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG)
 
     /**
      * Comparison using relative tolerance.
      */
-    def relTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(RelativeErrorComparison,
-      x, eps, REL_TOL_MSG)
+    def relTol(eps: Double): CompareDoubleRightSide =
+      CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG)
 
-    override def toString = x.toString
+    override def toString: String = x.toString
   }
 
   case class CompareVectorRightSide(
@@ -166,7 +166,7 @@ object TestingUtils {
         x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
       }, x, eps, REL_TOL_MSG)
 
-    override def toString = x.toString
+    override def toString: String = x.toString
   }
 
   case class CompareMatrixRightSide(
@@ -229,7 +229,7 @@ object TestingUtils {
         x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
       }, x, eps, REL_TOL_MSG)
 
-    override def toString = x.toString
+    override def toString: String = x.toString
   }
 
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
@@ -88,16 +88,20 @@ class TestingUtilsSuite extends FunSuite {
     assert(!(17.8 ~= 17.59 absTol 0.2))
 
     // Comparisons of numbers very close to zero, and both side of zeros
-    assert(Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
-    assert(Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
-
-    assert(-Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
-    assert(Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+    assert(
+      Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+    assert(
+      Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+
+    assert(
+      -Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+    assert(
+      Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
   }
 
   test("Comparing vectors using relative error.") {
 
-    //Comparisons of two dense vectors
+    // Comparisons of two dense vectors
     assert(Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
     assert(Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
     assert(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
@@ -130,7 +134,7 @@ class TestingUtilsSuite extends FunSuite {
 
   test("Comparing vectors using absolute error.") {
 
-    //Comparisons of two dense vectors
+    // Comparisons of two dense vectors
     assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
       Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)