From aa0a9122c6c26dc4c5211fdf0a833afc8ae5734e Mon Sep 17 00:00:00 2001 From: Christopher Suchanek Date: Mon, 13 May 2019 09:33:54 -0700 Subject: [PATCH 1/2] improved PercentileCalibratorTest --- .../feature/PercentileCalibratorTest.scala | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala index 48f3ad5746..c1334c3aab 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala @@ -34,7 +34,7 @@ import com.salesforce.op._ import com.salesforce.op.features.types._ import com.salesforce.op.features.Feature import com.salesforce.op.stages.base.unary.UnaryModel -import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext} +import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder, TestSparkContext} import com.salesforce.op.utils.spark.RichDataset._ import com.salesforce.op.utils.spark.RichMetadata._ import org.apache.spark.ml.{Estimator, Transformer} @@ -49,9 +49,23 @@ import org.scalatest.FlatSpec import scala.util.Random @RunWith(classOf[JUnitRunner]) -class PercentileCalibratorTest extends FlatSpec with TestSparkContext { +class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN, RealNN], PercentileCalibrator] { import spark.implicits._ + val testData = Seq(10, 100, 1000).map(_.toRealNN) + + val (inputData, testF) = TestFeatureBuilder(testData) + + /** + * Estimator instance to be tested + */ + override val estimator: PercentileCalibrator = new PercentileCalibrator().setInput(testF) + + /** + * Expected result of the transformer applied on the Input Dataset + */ + override val expectedResult: Seq[RealNN] = Seq(33.toRealNN, 66.toRealNN, 99.toRealNN) + Spec[PercentileCalibrator] should "return a minimum calibrated score of 0 and max of 99 when buckets is 100" in { val data = (0 until 1000).map(i => i.toLong.toIntegral -> Random.nextDouble.toRealNN) val (scoresDF, f1, f2): (DataFrame, Feature[Integral], Feature[RealNN]) = TestFeatureBuilder(data) @@ -151,4 +165,5 @@ class PercentileCalibratorTest extends FlatSpec with TestSparkContext { val indicesByPerc = scoresTransformed.orderBy(percentile.name, f2.name).collect(f1).deep indicesByProb should equal (indicesByPerc) } + } From a822a73d9afe8bf98bf09e749cf83eb346e83867 Mon Sep 17 00:00:00 2001 From: Christopher Suchanek Date: Mon, 13 May 2019 09:34:58 -0700 Subject: [PATCH 2/2] code style --- .../impl/feature/PercentileCalibratorTest.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala index c1334c3aab..cc2378f08a 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/PercentileCalibratorTest.scala @@ -50,6 +50,7 @@ import scala.util.Random @RunWith(classOf[JUnitRunner]) class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN, RealNN], PercentileCalibrator] { + import spark.implicits._ val testData = Seq(10, 100, 1000).map(_.toRealNN) @@ -66,7 +67,7 @@ class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN */ override val expectedResult: Seq[RealNN] = Seq(33.toRealNN, 66.toRealNN, 99.toRealNN) - Spec[PercentileCalibrator] should "return a minimum calibrated score of 0 and max of 99 when buckets is 100" in { + it should "return a minimum calibrated score of 0 and max of 99 when buckets is 100" in { val data = (0 until 1000).map(i => i.toLong.toIntegral -> Random.nextDouble.toRealNN) val (scoresDF, f1, f2): (DataFrame, Feature[Integral], Feature[RealNN]) = TestFeatureBuilder(data) val percentile = f2.toPercentile() @@ -75,8 +76,8 @@ class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN val scoresTransformed = model.asInstanceOf[Transformer].transform(scoresDF) percentile.name shouldBe percentile.originStage.getOutputFeatureName - scoresTransformed.select(min(percentile.name)).first.getDouble(0) should equal (0.0) - scoresTransformed.select(max(percentile.name)).first.getDouble(0) should equal (99.0) + scoresTransformed.select(min(percentile.name)).first.getDouble(0) should equal(0.0) + scoresTransformed.select(max(percentile.name)).first.getDouble(0) should equal(99.0) } it should "produce the calibration map metadata" in { @@ -103,7 +104,7 @@ class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN val model = percentile.originStage.asInstanceOf[Estimator[_]].fit(scoresDF) val scoresTransformed = model.asInstanceOf[Transformer].transform(scoresDF) - scoresTransformed.select(max(percentile.name)).first.getDouble(0) should equal (99.0) + scoresTransformed.select(max(percentile.name)).first.getDouble(0) should equal(99.0) } it should "return a maximum calibrated score of 99 when calibrating with less than 100" in { @@ -114,7 +115,7 @@ class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN val model = percentile.originStage.asInstanceOf[Estimator[_]].fit(scoresDF) val scoresTransformed = model.asInstanceOf[Transformer].transform(scoresDF) - scoresTransformed.select(max(percentile.name)).first.getDouble(0) should equal (99.0) + scoresTransformed.select(max(percentile.name)).first.getDouble(0) should equal(99.0) } it should "return all scores from 0 to 99 in increments of 1" in { @@ -131,7 +132,7 @@ class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN val checkSet = (0 to 99).map(_.toReal).toSet - scoreCounts.collect(percentile).toSet should equal (checkSet) + scoreCounts.collect(percentile).toSet should equal(checkSet) } it should "return a uniform distribution of scores" in { @@ -163,7 +164,7 @@ class PercentileCalibratorTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN val indicesByProb = scoresTransformed.orderBy(f2.name).collect(f1).deep val indicesByPerc = scoresTransformed.orderBy(percentile.name, f2.name).collect(f1).deep - indicesByProb should equal (indicesByPerc) + indicesByProb should equal(indicesByPerc) } }