Skip to content

Commit

Permalink
[SPARK-6765] Fix test code style for mllib.
Browse files Browse the repository at this point in the history
So we can turn style checker on for test code.

Author: Reynold Xin <rxin@databricks.com>

Closes apache#5411 from rxin/test-style-mllib and squashes the following commits:

d8a2569 [Reynold Xin] [SPARK-6765] Fix test code style for mllib.
  • Loading branch information
rxin committed Apr 8, 2015
1 parent 8d812f9 commit 66159c3
Show file tree
Hide file tree
Showing 12 changed files with 40 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class TestParams extends Params {
def setInputCol(value: String): this.type = { set(inputCol, value); this }
def getInputCol: String = get(inputCol)

override def validate(paramMap: ParamMap) = {
override def validate(paramMap: ParamMap): Unit = {
val m = this.paramMap ++ paramMap
require(m(maxIter) >= 0)
require(m.contains(inputCol))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
def validateModelFit(
piData: Array[Double],
thetaData: Array[Array[Double]],
model: NaiveBayesModel) = {
model: NaiveBayesModel): Unit = {
def closeFit(d1: Double, d2: Double, precision: Double): Boolean = {
(d1 - d2).abs <= precision
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.streaming.TestSuiteBase
class StreamingLogisticRegressionSuite extends FunSuite with TestSuiteBase {

// use longer wait time to ensure job completion
override def maxWaitTimeMillis = 30000
override def maxWaitTimeMillis: Int = 30000

// Test if we can accurately learn B for Y = logistic(BX) on streaming data
test("parameter accuracy") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,13 @@ class KMeansSuite extends FunSuite with MLlibTestSparkContext {
test("k-means|| initialization") {

case class VectorWithCompare(x: Vector) extends Ordered[VectorWithCompare] {
@Override def compare(that: VectorWithCompare): Int = {
if(this.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x) >
that.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x)) -1 else 1
override def compare(that: VectorWithCompare): Int = {
if (this.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x) >
that.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x)) {
-1
} else {
1
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.mllib.clustering

import org.scalatest.FunSuite

import org.apache.spark.mllib.linalg.{DenseMatrix, Matrix, Vectors}
import org.apache.spark.mllib.linalg.{Vector, DenseMatrix, Matrix, Vectors}
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._

Expand Down Expand Up @@ -141,7 +141,7 @@ private[clustering] object LDASuite {
(terms.toArray, termWeights.toArray)
}

def tinyCorpus = Array(
def tinyCorpus: Array[(Long, Vector)] = Array(
Vectors.dense(1, 3, 0, 2, 8),
Vectors.dense(0, 2, 1, 0, 4),
Vectors.dense(2, 3, 12, 3, 1),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.util.random.XORShiftRandom

class StreamingKMeansSuite extends FunSuite with TestSuiteBase {

override def maxWaitTimeMillis = 30000
override def maxWaitTimeMillis: Int = 30000

test("accuracy for single center and equivalence to grand average") {
// set parameters
Expand Down Expand Up @@ -59,7 +59,7 @@ class StreamingKMeansSuite extends FunSuite with TestSuiteBase {
// estimated center from streaming should exactly match the arithmetic mean of all data points
// because the decay factor is set to 1.0
val grandMean =
input.flatten.map(x => x.toBreeze).reduce(_+_) / (numBatches * numPoints).toDouble
input.flatten.map(x => x.toBreeze).reduce(_ + _) / (numBatches * numPoints).toDouble
assert(model.latestModel().clusterCenters(0) ~== Vectors.dense(grandMean.toArray) absTol 1E-5)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,8 @@ class RandomRDDsSuite extends FunSuite with MLlibTestSparkContext with Serializa
val poisson = RandomRDDs.poissonVectorRDD(sc, poissonMean, rows, cols, parts, seed)
testGeneratedVectorRDD(poisson, rows, cols, parts, poissonMean, math.sqrt(poissonMean), 0.1)

val exponential = RandomRDDs.exponentialVectorRDD(sc, exponentialMean, rows, cols, parts, seed)
val exponential =
RandomRDDs.exponentialVectorRDD(sc, exponentialMean, rows, cols, parts, seed)
testGeneratedVectorRDD(exponential, rows, cols, parts, exponentialMean, exponentialMean, 0.1)

val gamma = RandomRDDs.gammaVectorRDD(sc, gammaShape, gammaScale, rows, cols, parts, seed)
Expand All @@ -197,7 +198,7 @@ private[random] class MockDistro extends RandomDataGenerator[Double] {
// This allows us to check that each partition has a different seed
override def nextValue(): Double = seed.toDouble

override def setSeed(seed: Long) = this.seed = seed
override def setSeed(seed: Long): Unit = this.seed = seed

override def copy(): MockDistro = new MockDistro
}
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext {
* @param numProductBlocks number of product blocks to partition products into
* @param negativeFactors whether the generated user/product factors can have negative entries
*/
// scalastyle:off
def testALS(
users: Int,
products: Int,
Expand All @@ -216,6 +217,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext {
numUserBlocks: Int = -1,
numProductBlocks: Int = -1,
negativeFactors: Boolean = true) {
// scalastyle:on

val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
features, samplingRate, implicitPrefs, negativeWeights, negativeFactors)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ private object RidgeRegressionSuite {

class RidgeRegressionSuite extends FunSuite with MLlibTestSparkContext {

def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]) = {
def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = {
predictions.zip(input).map { case (prediction, expected) =>
(prediction - expected.label) * (prediction - expected.label)
}.reduceLeft(_ + _) / predictions.size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.spark.streaming.TestSuiteBase
class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {

// use longer wait time to ensure job completion
override def maxWaitTimeMillis = 20000
override def maxWaitTimeMillis: Int = 20000

// Assert that two values are equal within tolerance epsilon
def assertEqual(v1: Double, v2: Double, epsilon: Double) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,16 @@ object TestingUtils {
/**
* Comparison using absolute tolerance.
*/
def absTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(AbsoluteErrorComparison,
x, eps, ABS_TOL_MSG)
def absTol(eps: Double): CompareDoubleRightSide =
CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG)

/**
* Comparison using relative tolerance.
*/
def relTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(RelativeErrorComparison,
x, eps, REL_TOL_MSG)
def relTol(eps: Double): CompareDoubleRightSide =
CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG)

override def toString = x.toString
override def toString: String = x.toString
}

case class CompareVectorRightSide(
Expand Down Expand Up @@ -166,7 +166,7 @@ object TestingUtils {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
}, x, eps, REL_TOL_MSG)

override def toString = x.toString
override def toString: String = x.toString
}

case class CompareMatrixRightSide(
Expand Down Expand Up @@ -229,7 +229,7 @@ object TestingUtils {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
}, x, eps, REL_TOL_MSG)

override def toString = x.toString
override def toString: String = x.toString
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,20 @@ class TestingUtilsSuite extends FunSuite {
assert(!(17.8 ~= 17.59 absTol 0.2))

// Comparisons of numbers very close to zero, and both side of zeros
assert(Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)

assert(-Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(
Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(
Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)

assert(
-Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
assert(
Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
}

test("Comparing vectors using relative error.") {

//Comparisons of two dense vectors
// Comparisons of two dense vectors
assert(Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
Expand Down Expand Up @@ -130,7 +134,7 @@ class TestingUtilsSuite extends FunSuite {

test("Comparing vectors using absolute error.") {

//Comparisons of two dense vectors
// Comparisons of two dense vectors
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)

Expand Down

0 comments on commit 66159c3

Please sign in to comment.