From f3f1fd2dcd413c7fc79f5a42849701d54b44724b Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Mon, 15 Apr 2019 16:01:01 -0700 Subject: [PATCH 01/11] match up names for OpLDA and test --- .../scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala index 903b98aadf..a1ccd078de 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala @@ -42,7 +42,7 @@ import org.scalatest.{Assertions, FlatSpec, Matchers} @RunWith(classOf[JUnitRunner]) -class OpLdaTest extends FlatSpec with TestSparkContext { +class OpLDATest extends FlatSpec with TestSparkContext { val inputData = Seq( (0.0, Vectors.sparse(11, Array(0, 1, 2, 4, 5, 6, 7, 10), Array(1.0, 2.0, 6.0, 2.0, 3.0, 1.0, 1.0, 3.0))), From 1bf74c08e84a65998583722ddb87d45dff1b1338 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Mon, 15 Apr 2019 16:02:12 -0700 Subject: [PATCH 02/11] rename OpLDATest everywhere --- .../op/stages/impl/feature/{OpLdaTest.scala => OpLDATest.scala} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename core/src/test/scala/com/salesforce/op/stages/impl/feature/{OpLdaTest.scala => OpLDATest.scala} (98%) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala similarity index 98% rename from core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala rename to core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala index a1ccd078de..df35c4192e 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLdaTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala @@ -78,7 +78,7 @@ class OpLDATest extends FlatSpec with TestSparkContext { .toSeq .map(_.getAs[Vector](0)) - Spec[OpLdaTest] should "convert document term vectors into topic vectors" in { + Spec[OpLDATest] should "convert document term vectors into topic vectors" in { val f2Vec = new OpLDA().setInput(f2).setK(k).setSeed(seed).setMaxIter(maxIter) val testTransformedData = f2Vec.fit(inputDS).transform(inputDS) val output = f2Vec.getOutput() From 9a5953a83977e8bd3a692d3a966d9f17691b08d5 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Mon, 15 Apr 2019 16:05:09 -0700 Subject: [PATCH 03/11] rename StandardScalar test to match --- ...tandardScalerTest.scala => OpScalarStandardScalerTest.scala} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename core/src/test/scala/com/salesforce/op/stages/impl/feature/{OpStandardScalerTest.scala => OpScalarStandardScalerTest.scala} (99%) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStandardScalerTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpScalarStandardScalerTest.scala similarity index 99% rename from core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStandardScalerTest.scala rename to core/src/test/scala/com/salesforce/op/stages/impl/feature/OpScalarStandardScalerTest.scala index 583556699c..a74ba04443 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStandardScalerTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpScalarStandardScalerTest.scala @@ -45,7 +45,7 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) -class OpStandardScalerTest extends FlatSpec with TestSparkContext { +class OpScalarStandardScalerTest extends FlatSpec with TestSparkContext { import spark.implicits._ // TODO: use TestFeatureBuilder instead From 26058eebb04dae9b7021c7a6028d8e0cb8836140 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Mon, 15 Apr 2019 16:07:14 -0700 Subject: [PATCH 04/11] rename TextMapPivotVectorizer test to match --- ...pVectorizerTest.scala => TextMapPivotVectorizerTest.scala} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename core/src/test/scala/com/salesforce/op/stages/impl/feature/{TextMapVectorizerTest.scala => TextMapPivotVectorizerTest.scala} (99%) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/TextMapVectorizerTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/TextMapPivotVectorizerTest.scala similarity index 99% rename from core/src/test/scala/com/salesforce/op/stages/impl/feature/TextMapVectorizerTest.scala rename to core/src/test/scala/com/salesforce/op/stages/impl/feature/TextMapPivotVectorizerTest.scala index fb388f9e9c..42f8c1203a 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/TextMapVectorizerTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/TextMapPivotVectorizerTest.scala @@ -44,9 +44,9 @@ import org.slf4j.LoggerFactory @RunWith(classOf[JUnitRunner]) -class TextMapVectorizerTest extends FlatSpec with TestSparkContext with AttributeAsserts { +class TextMapPivotVectorizerTest extends FlatSpec with TestSparkContext with AttributeAsserts { - val log = LoggerFactory.getLogger(classOf[TextMapVectorizerTest]) + val log = LoggerFactory.getLogger(classOf[TextMapPivotVectorizerTest]) lazy val (dataSet, top, bot) = TestFeatureBuilder("top", "bot", Seq( From ddc8de1441bcf1bd716223f13ab8f3f79fb30076 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Mon, 15 Apr 2019 16:09:48 -0700 Subject: [PATCH 05/11] rename Transmogrifier test to match --- .../{TransmogrifyTest.scala => TransmogrifierTest.scala} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename core/src/test/scala/com/salesforce/op/stages/impl/feature/{TransmogrifyTest.scala => TransmogrifierTest.scala} (97%) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/TransmogrifyTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/TransmogrifierTest.scala similarity index 97% rename from core/src/test/scala/com/salesforce/op/stages/impl/feature/TransmogrifyTest.scala rename to core/src/test/scala/com/salesforce/op/stages/impl/feature/TransmogrifierTest.scala index dd00952311..54df05a7b5 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/TransmogrifyTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/TransmogrifierTest.scala @@ -42,7 +42,7 @@ import org.scalatest.FlatSpec import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) -class TransmogrifyTest extends FlatSpec with PassengerSparkFixtureTest with AttributeAsserts { +class TransmogrifierTest extends FlatSpec with PassengerSparkFixtureTest with AttributeAsserts { val inputFeatures = Array[OPFeature](heightNoWindow, weight, gender) From 41c513f4f5375ee59693c04b35366fd36fd64148 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Mon, 15 Apr 2019 17:08:23 -0700 Subject: [PATCH 06/11] test for OpLDA not OpLDATest --- .../scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala index df35c4192e..091bd316ab 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpLDATest.scala @@ -78,7 +78,7 @@ class OpLDATest extends FlatSpec with TestSparkContext { .toSeq .map(_.getAs[Vector](0)) - Spec[OpLDATest] should "convert document term vectors into topic vectors" in { + Spec[OpLDA] should "convert document term vectors into topic vectors" in { val f2Vec = new OpLDA().setInput(f2).setK(k).setSeed(seed).setMaxIter(maxIter) val testTransformedData = f2Vec.fit(inputDS).transform(inputDS) val output = f2Vec.getOutput() From 038a0b0afd16ea3db1bfeaf78d3f6b911c0f9a82 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Wed, 17 Apr 2019 12:31:05 -0700 Subject: [PATCH 07/11] opindextostringnofiltertest extends optransformerspec --- .../feature/OpIndexToStringNoFilterTest.scala | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala index d7b9f1da5f..c48c8e99c0 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala @@ -32,50 +32,51 @@ package com.salesforce.op.stages.impl.feature import com.salesforce.op._ import com.salesforce.op.features.types._ -import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext} +import com.salesforce.op.test.{OpTransformerSpec, TestFeatureBuilder} import com.salesforce.op.utils.spark.RichDataset._ import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner -import org.scalatest.{Assertions, FlatSpec, Matchers} @RunWith(classOf[JUnitRunner]) -class OpIndexToStringNoFilterTest extends FlatSpec with TestSparkContext { +class OpIndexToStringNoFilterTest extends OpTransformerSpec[Text, OpIndexToStringNoFilter] { - val (ds, indF) = TestFeatureBuilder(Seq(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN)) + val (inputData, indF) = TestFeatureBuilder(Seq(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN)) val labels = Array("a", "c", "b") + override val transformer: OpIndexToStringNoFilter = new OpIndexToStringNoFilter().setInput(indF).setLabels(labels) val expected = Array("a", "b", "c", "a", "a", "c").map(_.toText) + override val expectedResult: Seq[Text] = Array("a", "b", "c", "a", "a", "c").map(_.toText) val labelsNew = Array("a", "c") val expectedNew = Array("a", OpIndexToStringNoFilter.unseenDefault, "c", "a", "a", "c").map(_.toText) Spec[OpIndexToStringNoFilter] should "correctly deindex a numeric column" in { val indexToStr = new OpIndexToStringNoFilter().setInput(indF).setLabels(labels) - val strs = indexToStr.transform(ds).collect(indexToStr.getOutput()) + val strs = indexToStr.transform(inputData).collect(indexToStr.getOutput()) strs shouldBe expected } it should "correctly deindex a numeric column (shortcut)" in { val str = indF.deindexed(labels) - val strs = str.originStage.asInstanceOf[OpIndexToStringNoFilter].transform(ds).collect(str) + val strs = str.originStage.asInstanceOf[OpIndexToStringNoFilter].transform(inputData).collect(str) strs shouldBe expected val str2 = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.Error) - val strs2 = str2.originStage.asInstanceOf[OpIndexToString].transform(ds).collect(str2) + val strs2 = str2.originStage.asInstanceOf[OpIndexToString].transform(inputData).collect(str2) strs2 shouldBe expected } it should "correctly deindex even if the lables list does not match the number of indicies" in { val indexToStr = new OpIndexToStringNoFilter().setInput(indF).setLabels(labelsNew) - val strs = indexToStr.transform(ds).collect(indexToStr.getOutput()) + val strs = indexToStr.transform(inputData).collect(indexToStr.getOutput()) strs shouldBe expectedNew } Spec[OpIndexToString] should "correctly deindex a numeric column" in { val indexToStr = new OpIndexToString().setInput(indF).setLabels(labels) - val strs = indexToStr.transform(ds).collect(indexToStr.getOutput()) + val strs = indexToStr.transform(inputData).collect(indexToStr.getOutput()) strs shouldBe expected } From cf21083e8f04e8899e7c1bf50a42bc188087c4a3 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Wed, 17 Apr 2019 13:36:48 -0700 Subject: [PATCH 08/11] split into separate tests for nofilter --- .../feature/OpIndexToStringNoFilterTest.scala | 43 ++-------------- .../impl/feature/OpIndexToStringTest.scala | 51 +++++++++++++++++++ 2 files changed, 55 insertions(+), 39 deletions(-) create mode 100644 core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala index c48c8e99c0..bccaf4ce94 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala @@ -30,54 +30,19 @@ package com.salesforce.op.stages.impl.feature -import com.salesforce.op._ import com.salesforce.op.features.types._ import com.salesforce.op.test.{OpTransformerSpec, TestFeatureBuilder} -import com.salesforce.op.utils.spark.RichDataset._ import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpIndexToStringNoFilterTest extends OpTransformerSpec[Text, OpIndexToStringNoFilter] { - val (inputData, indF) = TestFeatureBuilder(Seq(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN)) - val labels = Array("a", "c", "b") - override val transformer: OpIndexToStringNoFilter = new OpIndexToStringNoFilter().setInput(indF).setLabels(labels) - val expected = Array("a", "b", "c", "a", "a", "c").map(_.toText) - override val expectedResult: Seq[Text] = Array("a", "b", "c", "a", "a", "c").map(_.toText) - - val labelsNew = Array("a", "c") - val expectedNew = Array("a", OpIndexToStringNoFilter.unseenDefault, "c", "a", "a", "c").map(_.toText) - - Spec[OpIndexToStringNoFilter] should "correctly deindex a numeric column" in { - val indexToStr = new OpIndexToStringNoFilter().setInput(indF).setLabels(labels) - val strs = indexToStr.transform(inputData).collect(indexToStr.getOutput()) - - strs shouldBe expected - } + val labels = Array("a", "c") - it should "correctly deindex a numeric column (shortcut)" in { - val str = indF.deindexed(labels) - val strs = str.originStage.asInstanceOf[OpIndexToStringNoFilter].transform(inputData).collect(str) - strs shouldBe expected - - val str2 = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.Error) - val strs2 = str2.originStage.asInstanceOf[OpIndexToString].transform(inputData).collect(str2) - strs2 shouldBe expected - } - - it should "correctly deindex even if the lables list does not match the number of indicies" in { - val indexToStr = new OpIndexToStringNoFilter().setInput(indF).setLabels(labelsNew) - val strs = indexToStr.transform(inputData).collect(indexToStr.getOutput()) - - strs shouldBe expectedNew - } - - Spec[OpIndexToString] should "correctly deindex a numeric column" in { - val indexToStr = new OpIndexToString().setInput(indF).setLabels(labels) - val strs = indexToStr.transform(inputData).collect(indexToStr.getOutput()) + override val transformer: OpIndexToStringNoFilter = new OpIndexToStringNoFilter().setInput(indF).setLabels(labels) - strs shouldBe expected - } + override val expectedResult: Seq[Text] = + Array("a", OpIndexToStringNoFilter.unseenDefault, "c", "a", "a", "c").map(_.toText) } diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala new file mode 100644 index 0000000000..88c0701eca --- /dev/null +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.op.stages.impl.feature + +import com.salesforce.op.features.types._ +import com.salesforce.op.test.{OpTransformerSpec, TestFeatureBuilder} +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class OpIndexToStringTest extends OpTransformerSpec[Text, OpIndexToString] { + + val (inputData, indF) = TestFeatureBuilder(Seq(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN)) + val labels = Array("a", "c", "b") + + override val expectedResult: Seq[Text] = Array("a", "b", "c", "a", "a", "c").map(_.toText) + + override val transformer: OpIndexToString = new OpIndexToString().setInput(indF).setLabels(labels) + + it should "getLabels" in { + transformer.getLabels shouldBe labels + } +} From 432ee35cc5b7c54b3e7aebad652d12452a159c98 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Wed, 17 Apr 2019 14:42:39 -0700 Subject: [PATCH 09/11] fix tests --- .../feature/OpIndexToStringNoFilterTest.scala | 7 +++++++ .../impl/feature/OpIndexToStringTest.scala | 21 +++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala index bccaf4ce94..a9ea5e6ec4 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala @@ -32,6 +32,7 @@ package com.salesforce.op.stages.impl.feature import com.salesforce.op.features.types._ import com.salesforce.op.test.{OpTransformerSpec, TestFeatureBuilder} +import com.salesforce.op.utils.spark.RichDataset._ import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @@ -45,4 +46,10 @@ class OpIndexToStringNoFilterTest extends OpTransformerSpec[Text, OpIndexToStrin override val expectedResult: Seq[Text] = Array("a", OpIndexToStringNoFilter.unseenDefault, "c", "a", "a", "c").map(_.toText) + + it should "correctly deindex a numeric column using shortcut" in { + val str2 = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.NoFilter) + val strs2 = str2.originStage.asInstanceOf[OpIndexToStringNoFilter].transform(inputData).collect(str2) + strs2 shouldBe expectedResult + } } diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala index 88c0701eca..47d41da9d9 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala @@ -31,19 +31,32 @@ package com.salesforce.op.stages.impl.feature import com.salesforce.op.features.types._ -import com.salesforce.op.test.{OpTransformerSpec, TestFeatureBuilder} +import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext} +import com.salesforce.op.utils.spark.RichDataset._ import org.junit.runner.RunWith +import org.scalatest.FlatSpec import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) -class OpIndexToStringTest extends OpTransformerSpec[Text, OpIndexToString] { +class OpIndexToStringTest extends FlatSpec with TestSparkContext { val (inputData, indF) = TestFeatureBuilder(Seq(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN)) val labels = Array("a", "c", "b") - override val expectedResult: Seq[Text] = Array("a", "b", "c", "a", "a", "c").map(_.toText) + val expectedResult: Seq[Text] = Array("a", "b", "c", "a", "a", "c").map(_.toText) - override val transformer: OpIndexToString = new OpIndexToString().setInput(indF).setLabels(labels) + val transformer: OpIndexToString = new OpIndexToString().setInput(indF).setLabels(labels) + + Spec[OpIndexToString] should "correctly deindex a numeric column" in { + val strs = transformer.transform(inputData).collect(transformer.getOutput()) + strs shouldBe expectedResult + } + + it should "correctly deindex a numeric column (shortcut)" in { + val str = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.Error) + val strs = str.originStage.asInstanceOf[OpIndexToString].transform(inputData).collect(str) + strs shouldBe expectedResult + } it should "getLabels" in { transformer.getLabels shouldBe labels From 44ecb7a0110f27c0ded6e6f4ef58eea06f166f3c Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Wed, 17 Apr 2019 15:16:54 -0700 Subject: [PATCH 10/11] upgrade to use OpEstimatorSpec --- .../feature/OpStringIndexerNoFilterTest.scala | 40 ++++++++----------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala index b7590e02ce..38ee8a2acd 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala @@ -32,52 +32,46 @@ package com.salesforce.op.stages.impl.feature import com.salesforce.op._ import com.salesforce.op.features.types._ +import com.salesforce.op.stages.base.unary.UnaryModel import com.salesforce.op.stages.impl.feature.StringIndexerHandleInvalid.Skip import com.salesforce.op.stages.sparkwrappers.generic.SwUnaryModel -import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext} +import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder} import com.salesforce.op.utils.spark.RichDataset._ import org.apache.spark.ml.feature.StringIndexerModel import org.junit.runner.RunWith -import org.scalatest.FlatSpec import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) -class OpStringIndexerNoFilterTest extends FlatSpec with TestSparkContext { +class OpStringIndexerNoFilterTest extends OpEstimatorSpec[RealNN, UnaryModel[Text, RealNN], OpStringIndexerNoFilter[Text]] { val txtData = Seq("a", "b", "c", "a", "a", "c").map(_.toText) - val (ds, txtF) = TestFeatureBuilder(txtData) - val expected = Array(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN) + val (inputData, txtF) = TestFeatureBuilder(txtData) + override val expectedResult: Seq[RealNN] = Array(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN) + + override val estimator: OpStringIndexerNoFilter[Text] = new OpStringIndexerNoFilter[Text]().setInput(txtF) val txtDataNew = Seq("a", "b", "c", "a", "a", "c", "d", "e").map(_.toText) - val (dsNew, txtFNew ) = TestFeatureBuilder(txtDataNew) + val (dsNew, txtFNew) = TestFeatureBuilder(txtDataNew) val expectedNew = Array(0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0).map(_.toRealNN) - - Spec[OpStringIndexerNoFilter[_]] should "correctly index a text column" in { - val stringIndexer = new OpStringIndexerNoFilter[Text]().setInput(txtF) - val indices = stringIndexer.fit(ds).transform(ds).collect(stringIndexer.getOutput()) - - indices shouldBe expected - } - it should "correctly index a text column (shortcut)" in { val indexed = txtF.indexed() - val indices = indexed.originStage.asInstanceOf[OpStringIndexerNoFilter[_]].fit(ds).transform(ds).collect(indexed) - indices shouldBe expected + val indices = indexed.originStage.asInstanceOf[OpStringIndexerNoFilter[_]].fit(inputData).transform(inputData).collect(indexed) + indices shouldBe expectedResult val indexed2 = txtF.indexed(handleInvalid = Skip) - val indicesfit = indexed2.originStage.asInstanceOf[OpStringIndexer[_]].fit(ds) - val indices2 = indicesfit.transform(ds).collect(indexed2) + val indicesfit = indexed2.originStage.asInstanceOf[OpStringIndexer[_]].fit(inputData) + val indices2 = indicesfit.transform(inputData).collect(indexed2) val indices3 = indicesfit.asInstanceOf[SwUnaryModel[Text, RealNN, StringIndexerModel]] .setInput(txtFNew).transform(dsNew).collect(indexed2) - indices2 shouldBe expected - indices3 shouldBe expected + indices2 shouldBe expectedResult + indices3 shouldBe expectedResult } it should "correctly deinxed a numeric column" in { val indexed = txtF.indexed() - val indices = indexed.originStage.asInstanceOf[OpStringIndexerNoFilter[_]].fit(ds).transform(ds) + val indices = indexed.originStage.asInstanceOf[OpStringIndexerNoFilter[_]].fit(inputData).transform(inputData) val deindexed = indexed.deindexed() val deindexedData = deindexed.originStage.asInstanceOf[OpIndexToStringNoFilter] .transform(indices).collect(deindexed) @@ -85,9 +79,7 @@ class OpStringIndexerNoFilterTest extends FlatSpec with TestSparkContext { } it should "assign new strings to the unseen string category" in { - val stringIndexer = new OpStringIndexerNoFilter[Text]().setInput(txtF) - val indices = stringIndexer.fit(ds).setInput(txtFNew).transform(dsNew).collect(stringIndexer.getOutput()) - + val indices = estimator.fit(inputData).setInput(txtFNew).transform(dsNew).collect(estimator.getOutput()) indices shouldBe expectedNew } } From 17f7381219432cbd9d47b619e7f8595c7545c229 Mon Sep 17 00:00:00 2001 From: Chris Rupley Date: Wed, 17 Apr 2019 15:47:49 -0700 Subject: [PATCH 11/11] fix my style --- .../feature/OpIndexToStringNoFilterTest.scala | 10 +++++----- .../impl/feature/OpIndexToStringTest.scala | 18 +++++++++--------- .../feature/OpStringIndexerNoFilterTest.scala | 6 ++++-- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala index a9ea5e6ec4..c04937af45 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringNoFilterTest.scala @@ -47,9 +47,9 @@ class OpIndexToStringNoFilterTest extends OpTransformerSpec[Text, OpIndexToStrin override val expectedResult: Seq[Text] = Array("a", OpIndexToStringNoFilter.unseenDefault, "c", "a", "a", "c").map(_.toText) - it should "correctly deindex a numeric column using shortcut" in { - val str2 = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.NoFilter) - val strs2 = str2.originStage.asInstanceOf[OpIndexToStringNoFilter].transform(inputData).collect(str2) - strs2 shouldBe expectedResult - } + it should "correctly deindex a numeric column using shortcut" in { + val str2 = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.NoFilter) + val strs2 = str2.originStage.asInstanceOf[OpIndexToStringNoFilter].transform(inputData).collect(str2) + strs2 shouldBe expectedResult + } } diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala index 47d41da9d9..e1637ebde6 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpIndexToStringTest.scala @@ -47,16 +47,16 @@ class OpIndexToStringTest extends FlatSpec with TestSparkContext { val transformer: OpIndexToString = new OpIndexToString().setInput(indF).setLabels(labels) - Spec[OpIndexToString] should "correctly deindex a numeric column" in { - val strs = transformer.transform(inputData).collect(transformer.getOutput()) - strs shouldBe expectedResult - } + Spec[OpIndexToString] should "correctly deindex a numeric column" in { + val strs = transformer.transform(inputData).collect(transformer.getOutput()) + strs shouldBe expectedResult + } - it should "correctly deindex a numeric column (shortcut)" in { - val str = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.Error) - val strs = str.originStage.asInstanceOf[OpIndexToString].transform(inputData).collect(str) - strs shouldBe expectedResult - } + it should "correctly deindex a numeric column (shortcut)" in { + val str = indF.deindexed(labels, handleInvalid = IndexToStringHandleInvalid.Error) + val strs = str.originStage.asInstanceOf[OpIndexToString].transform(inputData).collect(str) + strs shouldBe expectedResult + } it should "getLabels" in { transformer.getLabels shouldBe labels diff --git a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala index 38ee8a2acd..f2e01d12d8 100644 --- a/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala +++ b/core/src/test/scala/com/salesforce/op/stages/impl/feature/OpStringIndexerNoFilterTest.scala @@ -43,7 +43,8 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) -class OpStringIndexerNoFilterTest extends OpEstimatorSpec[RealNN, UnaryModel[Text, RealNN], OpStringIndexerNoFilter[Text]] { +class OpStringIndexerNoFilterTest extends + OpEstimatorSpec[RealNN, UnaryModel[Text, RealNN], OpStringIndexerNoFilter[Text]] { val txtData = Seq("a", "b", "c", "a", "a", "c").map(_.toText) val (inputData, txtF) = TestFeatureBuilder(txtData) @@ -57,7 +58,8 @@ class OpStringIndexerNoFilterTest extends OpEstimatorSpec[RealNN, UnaryModel[Tex it should "correctly index a text column (shortcut)" in { val indexed = txtF.indexed() - val indices = indexed.originStage.asInstanceOf[OpStringIndexerNoFilter[_]].fit(inputData).transform(inputData).collect(indexed) + val indices = indexed.originStage.asInstanceOf[OpStringIndexerNoFilter[_]] + .fit(inputData).transform(inputData).collect(indexed) indices shouldBe expectedResult val indexed2 = txtF.indexed(handleInvalid = Skip)