From 6e3e4e017c75a075c13ff30ab8ceea9276902999 Mon Sep 17 00:00:00 2001 From: merzouk Date: Sun, 9 May 2021 16:25:44 +0200 Subject: [PATCH 1/3] LogisticRegressionTest contains error to clear --- .../github/jsarni/CaraStage/CaraStage.scala | 3 +- .../jsarni/CaraStage/CaraStageMapper.scala | 3 +- .../ModelStage/LogisticRegression.scala | 3 +- .../CaraStage/ModelStage/TestStage.scala | 25 ------------ .../jsarni/CaraStage/CaraStageTest.scala | 32 ++++++++++++++++ .../CaraStage/LogisticRegressionTest.scala | 38 +++++++++++++++++++ .../PipelineParser/CaraParserTest.scala | 18 +-------- 7 files changed, 76 insertions(+), 46 deletions(-) delete mode 100644 src/main/scala/io/github/jsarni/CaraStage/ModelStage/TestStage.scala create mode 100644 src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala create mode 100644 src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala diff --git a/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala b/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala index f69f67d..a0c0260 100644 --- a/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala +++ b/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala @@ -1,11 +1,12 @@ package io.github.jsarni.CaraStage import org.apache.spark.ml.PipelineStage +import scala.util.Try trait CaraStage { //TODO: Add builder function - def build(): PipelineStage + def build(): Try[PipelineStage] // Function to get methode by name and do invoke with the right params types and values def GetMethode(lr : PipelineStage, field : Any, field_name : String) = { diff --git a/src/main/scala/io/github/jsarni/CaraStage/CaraStageMapper.scala b/src/main/scala/io/github/jsarni/CaraStage/CaraStageMapper.scala index 516101b..0f153b7 100644 --- a/src/main/scala/io/github/jsarni/CaraStage/CaraStageMapper.scala +++ b/src/main/scala/io/github/jsarni/CaraStage/CaraStageMapper.scala @@ -1,13 +1,12 @@ package io.github.jsarni.CaraStage import io.github.jsarni.CaraStage.DatasetStage.CaraDataset -import io.github.jsarni.CaraStage.ModelStage.{CaraModel, LogisticRegression, TestStage} +import io.github.jsarni.CaraStage.ModelStage.{CaraModel, LogisticRegression} trait CaraStageMapper { def mapModelStage(stageDescription: CaraStageDescription): CaraModel = { stageDescription.stageName match { - case "TestStage" => TestStage(stageDescription.params) case "LogisticRegression" => LogisticRegression(stageDescription.params) case _ => throw new Exception(s"${stageDescription.stageName} is not a valid Model stage name. Please verify the corresponding Yaml File") diff --git a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala b/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala index 2729654..3c66823 100644 --- a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala +++ b/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala @@ -2,6 +2,7 @@ package io.github.jsarni.CaraStage.ModelStage import io.github.jsarni.CaraStage.Annotation.MapperConstructor import org.apache.spark.ml.PipelineStage import org.apache.spark.ml.classification.{LogisticRegression => log} +import scala.util.Try @@ -31,7 +32,7 @@ case class LogisticRegression(MaxIter: Option[Int], RegParam: Option[Double], El ) } - override def build(): PipelineStage = { + override def build(): Try[PipelineStage] = Try { val lr = new log() val definedFields = this.getClass.getDeclaredFields.filter(f => f.get(this).asInstanceOf[Option[Any]].isDefined) val names = definedFields.map(f => f.getName) diff --git a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/TestStage.scala b/src/main/scala/io/github/jsarni/CaraStage/ModelStage/TestStage.scala deleted file mode 100644 index ecd9547..0000000 --- a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/TestStage.scala +++ /dev/null @@ -1,25 +0,0 @@ -package io.github.jsarni.CaraStage.ModelStage - -import io.github.jsarni.CaraStage.Annotation.MapperConstructor -import org.apache.spark.ml.PipelineStage - - -case class TestStage(MaxIter: Option[Int], RegParam: Option[Double], ElasticNetParam: Option[Double]) - extends CaraModel { - - @MapperConstructor - def this(params: Map[String, String]) = { - this( - params.get("MaxIter").map(_.toInt), - params.get("RegParam").map(_.toDouble), - params.get("ElasticNetParam").map(_.toDouble) - ) - } - - override def build(): PipelineStage = ??? -} - -object TestStage { - def apply(params: Map[String, String]): TestStage = new TestStage(params) -} - diff --git a/src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala b/src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala new file mode 100644 index 0000000..f7480ea --- /dev/null +++ b/src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala @@ -0,0 +1,32 @@ +package io.github.jsarni.CaraStage + +import io.github.jsarni.TestBase +import io.github.jsarni.CaraStage.ModelStage.{CaraModel, LogisticRegression} +import org.apache.spark.ml.classification.{LogisticRegression => log} +// +//import java.io.FileNotFoundException +//import scala.io.Source +//import scala.util.Try + +class CaraStageTest extends TestBase { + + "GetMethode" should("Return the appropriate methode by it's") + + + "LogisticRegression" should("Create an lr model and set all parameters with there args values") in { +// val basicParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial") + val allParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" + , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" + , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") + val r = new log() +// println(r.getClass.getMethod("GetMaxIter", Int.getClass )) + +// val lr = LogisticRegression(basicParams) + val lr1 = LogisticRegression( allParams) +// print(lr1.asInstanceOf[log]) +// lr.build() + val model = lr1.build() + model + + } +} diff --git a/src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala b/src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala new file mode 100644 index 0000000..0a38f01 --- /dev/null +++ b/src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala @@ -0,0 +1,38 @@ +package io.github.jsarni.CaraStage + +import io.github.jsarni.TestBase +import io.github.jsarni.CaraStage.ModelStage.{CaraModel, LogisticRegression} +import org.apache.spark.ml.classification.{LogisticRegression => log} +// +//import java.io.FileNotFoundException +//import scala.io.Source +import scala.util.{Try,Success, Failure} + +class LogisticRegressionTest extends TestBase { + + + "build" should("Create an lr model and set all parameters with there args values") in { + val basicParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial") + val allParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" + , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" + , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") + val lr = LogisticRegression( basicParams) + val model = lr.build().asInstanceOf[log] + + model.getMaxIter shouldBe 10 + model.getRegParam shouldBe 0.3 + model.getElasticNetParam shouldBe 0.1 + model.getFamily shouldBe "multinomial" + + } + "GetMethode" should("Return the appropriate methode by it's name") in { + val allParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" + , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" + , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") + val lg = new log() + val lr = LogisticRegression( allParams) +// val model =Success(lr.build()).asInstanceOf[log] +// println(lr.GetMethode(lg,10,"MaxIter").getName) + } +} + diff --git a/src/test/scala/io/github/jsarni/PipelineParser/CaraParserTest.scala b/src/test/scala/io/github/jsarni/PipelineParser/CaraParserTest.scala index 500ddfb..28e3b92 100644 --- a/src/test/scala/io/github/jsarni/PipelineParser/CaraParserTest.scala +++ b/src/test/scala/io/github/jsarni/PipelineParser/CaraParserTest.scala @@ -2,7 +2,7 @@ package io.github.jsarni.PipelineParser import io.github.jsarni.TestBase import io.github.jsarni.CaraStage.CaraStageDescription -import io.github.jsarni.CaraStage.ModelStage.{TestStage, LogisticRegression} +import io.github.jsarni.CaraStage.ModelStage.{ LogisticRegression} import io.github.jsarni.CaraYaml.{DatasetYaml, ModelYaml} import org.codehaus.jackson.JsonNode import io.github.jsarni.PipelineParser.CaraParser @@ -102,22 +102,6 @@ class CaraParserTest extends TestBase { val res = modelParser.parseStageMap(stageDesc) - res.isInstanceOf[TestStage] shouldBe true - res.asInstanceOf[TestStage].MaxIter shouldBe params.get("MaxIter").map(_.toInt) - res.asInstanceOf[TestStage].RegParam shouldBe params.get("RegParam").map(_.toDouble) - res.asInstanceOf[TestStage].ElasticNetParam shouldBe params.get("ElasticNetParam").map(_.toDouble) - } - "LogisticRegression" should("Create an lr model and set all parameters with there args values") in { - val basicParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial") - val allParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" - , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" - , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") - - val lr = LogisticRegression( basicParams) - val lr1 = LogisticRegression( allParams) - lr.build() - lr1.build() - } } From 656579e6b81920c4ae22e142f2bfe52abb1b3c3a Mon Sep 17 00:00:00 2001 From: merzouk Date: Sun, 9 May 2021 21:09:26 +0200 Subject: [PATCH 2/3] Finalize LogisticRegression's class and tests --- .../github/jsarni/CaraStage/CaraStage.scala | 4 +- .../ModelStage/LogisticRegression.scala | 16 +++---- .../jsarni/PipelineParser/CaraParser.scala | 2 +- .../jsarni/CaraStage/CaraStageTest.scala | 32 ------------- .../CaraStage/LogisticRegressionTest.scala | 38 --------------- .../ModelStage/LogisticRegressionTest.scala | 47 +++++++++++++++++++ 6 files changed, 58 insertions(+), 81 deletions(-) delete mode 100644 src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala delete mode 100644 src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala create mode 100644 src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala diff --git a/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala b/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala index a0c0260..cf4c097 100644 --- a/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala +++ b/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala @@ -26,8 +26,8 @@ trait CaraStage { case _ : java.lang.Short => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Short].getClass ) case _ : java.lang.Character => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Char].getClass ) case _ : java.lang.Byte => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Byte].getClass ) - case _ :java.lang.Long => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Long].getClass) - case _: java.lang.Integer => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Int].getClass) + case _ : java.lang.Long => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Long].getClass) + case _ : java.lang.Integer => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Int].getClass) case _ : java.lang.String => lr.getClass.getMethod(MethodeName, field.getClass ) } } diff --git a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala b/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala index 3c66823..bb76a7b 100644 --- a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala +++ b/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala @@ -1,7 +1,7 @@ package io.github.jsarni.CaraStage.ModelStage import io.github.jsarni.CaraStage.Annotation.MapperConstructor import org.apache.spark.ml.PipelineStage -import org.apache.spark.ml.classification.{LogisticRegression => log} +import org.apache.spark.ml.classification.{LogisticRegression => SparkLR} import scala.util.Try @@ -18,22 +18,22 @@ case class LogisticRegression(MaxIter: Option[Int], RegParam: Option[Double], El params.get("MaxIter").map(_.toInt), params.get("RegParam").map(_.toDouble), params.get("ElasticNetParam").map(_.toDouble), - params.get("Family").map(_.toString), - params.get("FeaturesCol").map(_.toString), + params.get("Family"), + params.get("FeaturesCol"), params.get("FitIntercept").map(_.toBoolean), - params.get("PredictionCol").map(_.toString), - params.get("ProbabilityCol").map(_.toString), - params.get("RawPredictionCol").map(_.toString), + params.get("PredictionCol"), + params.get("ProbabilityCol"), + params.get("RawPredictionCol"), params.get("Standardization").map(_.toBoolean), params.get("Thresholds").map(_.split(",").map(_.toDouble)), params.get("Tol").map(_.toDouble), - params.get("WeightCol").map(_.toString) + params.get("WeightCol") ) } override def build(): Try[PipelineStage] = Try { - val lr = new log() + val lr = new SparkLR() val definedFields = this.getClass.getDeclaredFields.filter(f => f.get(this).asInstanceOf[Option[Any]].isDefined) val names = definedFields.map(f => f.getName) val values = definedFields.map(f => f.get(this)) diff --git a/src/main/scala/io/github/jsarni/PipelineParser/CaraParser.scala b/src/main/scala/io/github/jsarni/PipelineParser/CaraParser.scala index 6b0b46c..9492611 100644 --- a/src/main/scala/io/github/jsarni/PipelineParser/CaraParser.scala +++ b/src/main/scala/io/github/jsarni/PipelineParser/CaraParser.scala @@ -66,7 +66,7 @@ class CaraParser(caraYaml: CaraYaml) extends ParserUtils with CaraStageMapper{ } private[PipelineParser] def buildStages(stagesList: List[CaraStage]): Try[List[PipelineStage]] = { - Try(stagesList.map(_.build())) + Try(stagesList.map(_.build().get)) } private[PipelineParser] def buildPipeline(mlStages: List[PipelineStage]): Try[Pipeline] = { diff --git a/src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala b/src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala deleted file mode 100644 index f7480ea..0000000 --- a/src/test/scala/io/github/jsarni/CaraStage/CaraStageTest.scala +++ /dev/null @@ -1,32 +0,0 @@ -package io.github.jsarni.CaraStage - -import io.github.jsarni.TestBase -import io.github.jsarni.CaraStage.ModelStage.{CaraModel, LogisticRegression} -import org.apache.spark.ml.classification.{LogisticRegression => log} -// -//import java.io.FileNotFoundException -//import scala.io.Source -//import scala.util.Try - -class CaraStageTest extends TestBase { - - "GetMethode" should("Return the appropriate methode by it's") - - - "LogisticRegression" should("Create an lr model and set all parameters with there args values") in { -// val basicParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial") - val allParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" - , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" - , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") - val r = new log() -// println(r.getClass.getMethod("GetMaxIter", Int.getClass )) - -// val lr = LogisticRegression(basicParams) - val lr1 = LogisticRegression( allParams) -// print(lr1.asInstanceOf[log]) -// lr.build() - val model = lr1.build() - model - - } -} diff --git a/src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala b/src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala deleted file mode 100644 index 0a38f01..0000000 --- a/src/test/scala/io/github/jsarni/CaraStage/LogisticRegressionTest.scala +++ /dev/null @@ -1,38 +0,0 @@ -package io.github.jsarni.CaraStage - -import io.github.jsarni.TestBase -import io.github.jsarni.CaraStage.ModelStage.{CaraModel, LogisticRegression} -import org.apache.spark.ml.classification.{LogisticRegression => log} -// -//import java.io.FileNotFoundException -//import scala.io.Source -import scala.util.{Try,Success, Failure} - -class LogisticRegressionTest extends TestBase { - - - "build" should("Create an lr model and set all parameters with there args values") in { - val basicParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial") - val allParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" - , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" - , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") - val lr = LogisticRegression( basicParams) - val model = lr.build().asInstanceOf[log] - - model.getMaxIter shouldBe 10 - model.getRegParam shouldBe 0.3 - model.getElasticNetParam shouldBe 0.1 - model.getFamily shouldBe "multinomial" - - } - "GetMethode" should("Return the appropriate methode by it's name") in { - val allParams = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" - , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" - , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") - val lg = new log() - val lr = LogisticRegression( allParams) -// val model =Success(lr.build()).asInstanceOf[log] -// println(lr.GetMethode(lg,10,"MaxIter").getName) - } -} - diff --git a/src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala b/src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala new file mode 100644 index 0000000..d9c4e49 --- /dev/null +++ b/src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala @@ -0,0 +1,47 @@ +package io.github.jsarni.CaraStage.ModelStage + +import io.github.jsarni.TestBase +import org.apache.spark.ml.classification.{LogisticRegression => SparkLR} +import scala.util.{Failure, Success, Try} + + +class LogisticRegressionTest extends TestBase { + + "build" should("Create an lr model and set all parameters with there args values or set default ones") in { + val params = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" + , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" + , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") + val lr = LogisticRegression( params) + + val expectedResult = List( + new SparkLR().setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.1).setFamily("multinomial").setFeaturesCol("FeatureColname").setFitIntercept(true) + .setPredictionCol("Age").setProbabilityCol("ProbaColname").setRawPredictionCol("RawPredictColname").setStandardization(true).setTol(0.13) + .setWeightCol("WeightColname") + ) + val res = List (lr.build().get) + val resParameters = res.map(_.extractParamMap().toSeq.map(_.value)) + val expectedParameters = expectedResult.map(_.extractParamMap().toSeq.map(_.value)) + + resParameters.head should contain theSameElementsAs expectedParameters.head + + val lrWithTwoParams = new SparkLR().setRegParam(0.8).setStandardization(false) + + lrWithTwoParams.getMaxIter shouldBe 100 + lrWithTwoParams.getFamily shouldBe "auto" + lrWithTwoParams.getTol shouldBe 0.000001 + + } + "GetMethode" should("Return the appropriate methode by it's name") in { + val params = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" + , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" + , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") + val lr = LogisticRegression(params) + val model =lr.build().get.asInstanceOf[SparkLR] + + lr.GetMethode(model,10,"MaxIter").getName shouldBe "setMaxIter" + lr.GetMethode(model,0.0,"RegParam").getName shouldBe "setRegParam" + lr.GetMethode(model, false ,"Standardization").getName shouldBe "setStandardization" + + } +} + From 58149aa481dcb4aeaa1abfd65d8418a2960f6668 Mon Sep 17 00:00:00 2001 From: merzouk Date: Sat, 15 May 2021 13:43:04 +0200 Subject: [PATCH 3/3] refactor names to caml case and correct spaces --- .../github/jsarni/CaraStage/CaraStage.scala | 40 +++++----- .../ModelStage/LogisticRegression.scala | 2 +- .../ModelStage/LogisticRegressionTest.scala | 74 ++++++++++++++----- 3 files changed, 75 insertions(+), 41 deletions(-) diff --git a/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala b/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala index cf4c097..4b36dfa 100644 --- a/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala +++ b/src/main/scala/io/github/jsarni/CaraStage/CaraStage.scala @@ -1,34 +1,34 @@ package io.github.jsarni.CaraStage import org.apache.spark.ml.PipelineStage +import java.lang.reflect.Method import scala.util.Try trait CaraStage { - //TODO: Add builder function def build(): Try[PipelineStage] // Function to get methode by name and do invoke with the right params types and values - def GetMethode(lr : PipelineStage, field : Any, field_name : String) = { - val MethodeName = "set"+field_name + def getMethode(stage : PipelineStage, field : Any, fieldName : String): Method = { + val methodeName = "set" + fieldName field match { - case _ : Any if field.getClass == Array[Double]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[Double]].getClass ) - case _ : Any if field.getClass == Array[String]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[String]].getClass ) - case _ : Any if field.getClass == Array[Float]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[Float]].getClass ) - case _ : Any if field.getClass == Array[Short]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[Short]].getClass ) - case _ : Any if field.getClass == Array[Char]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[Char]].getClass ) - case _ : Any if field.getClass == Array[Byte]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[Byte]].getClass ) - case _ : Any if field.getClass == Array[Long]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[Long]].getClass ) - case _ : Any if field.getClass == Array[Int]().getClass => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Array[Int]].getClass ) - case _ : java.lang.Boolean => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Boolean].getClass ) - case _ : java.lang.Double => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Double].getClass ) - case _ : java.lang.Float => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Float].getClass ) - case _ : java.lang.Short => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Short].getClass ) - case _ : java.lang.Character => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Char].getClass ) - case _ : java.lang.Byte => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Byte].getClass ) - case _ : java.lang.Long => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Long].getClass) - case _ : java.lang.Integer => lr.getClass.getMethod(MethodeName, field.asInstanceOf[Int].getClass) - case _ : java.lang.String => lr.getClass.getMethod(MethodeName, field.getClass ) + case _ : Any if field.getClass == Array[Double]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[Double]].getClass ) + case _ : Any if field.getClass == Array[String]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[String]].getClass ) + case _ : Any if field.getClass == Array[Float]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[Float]].getClass ) + case _ : Any if field.getClass == Array[Short]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[Short]].getClass ) + case _ : Any if field.getClass == Array[Char]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[Char]].getClass ) + case _ : Any if field.getClass == Array[Byte]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[Byte]].getClass ) + case _ : Any if field.getClass == Array[Long]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[Long]].getClass ) + case _ : Any if field.getClass == Array[Int]().getClass => stage.getClass.getMethod(methodeName, field.asInstanceOf[Array[Int]].getClass ) + case _ : java.lang.Boolean => stage.getClass.getMethod(methodeName, field.asInstanceOf[Boolean].getClass ) + case _ : java.lang.Double => stage.getClass.getMethod(methodeName, field.asInstanceOf[Double].getClass ) + case _ : java.lang.Float => stage.getClass.getMethod(methodeName, field.asInstanceOf[Float].getClass ) + case _ : java.lang.Short => stage.getClass.getMethod(methodeName, field.asInstanceOf[Short].getClass ) + case _ : java.lang.Character => stage.getClass.getMethod(methodeName, field.asInstanceOf[Char].getClass ) + case _ : java.lang.Byte => stage.getClass.getMethod(methodeName, field.asInstanceOf[Byte].getClass ) + case _ : java.lang.Long => stage.getClass.getMethod(methodeName, field.asInstanceOf[Long].getClass) + case _ : java.lang.Integer => stage.getClass.getMethod(methodeName, field.asInstanceOf[Int].getClass) + case _ : java.lang.String => stage.getClass.getMethod(methodeName, field.getClass ) } } diff --git a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala b/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala index bb76a7b..68a5d20 100644 --- a/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala +++ b/src/main/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegression.scala @@ -38,7 +38,7 @@ case class LogisticRegression(MaxIter: Option[Int], RegParam: Option[Double], El val names = definedFields.map(f => f.getName) val values = definedFields.map(f => f.get(this)) val zipFields = names zip values - zipFields.map(f=> GetMethode(lr,f._2 match {case Some(s) => s },f._1).invoke(lr,(f._2 match {case Some(value) => value.asInstanceOf[f._2.type ] }))) + zipFields.map(f=> getMethode(lr,f._2 match {case Some(s) => s },f._1).invoke(lr,(f._2 match {case Some(value) => value.asInstanceOf[f._2.type ] }))) lr } diff --git a/src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala b/src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala index d9c4e49..e24cfa8 100644 --- a/src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala +++ b/src/test/scala/io/github/jsarni/CaraStage/ModelStage/LogisticRegressionTest.scala @@ -2,45 +2,79 @@ package io.github.jsarni.CaraStage.ModelStage import io.github.jsarni.TestBase import org.apache.spark.ml.classification.{LogisticRegression => SparkLR} -import scala.util.{Failure, Success, Try} class LogisticRegressionTest extends TestBase { - "build" should("Create an lr model and set all parameters with there args values or set default ones") in { - val params = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" - , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" - , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") - val lr = LogisticRegression( params) + "build" should "Create an lr model and set all parameters with there args values or set default ones" in { + val params = Map( + "MaxIter" -> "10", + "RegParam" -> "0.3", + "ElasticNetParam" -> "0.1", + "Family" -> "multinomial", + "FeaturesCol" -> "FeatureColname", + "FitIntercept" -> "True", + "PredictionCol" -> "Age", + "ProbabilityCol" -> "ProbaColname", + "RawPredictionCol"-> "RawPredictColname", + "Standardization" -> "True", + "Tol" -> "0.13", + "WeightCol" -> "WeightColname" + ) + val lr = LogisticRegression(params) + val lrWithTwoParams = new SparkLR() + .setRegParam(0.8) + .setStandardization(false) val expectedResult = List( - new SparkLR().setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.1).setFamily("multinomial").setFeaturesCol("FeatureColname").setFitIntercept(true) - .setPredictionCol("Age").setProbabilityCol("ProbaColname").setRawPredictionCol("RawPredictColname").setStandardization(true).setTol(0.13) + new SparkLR() + .setMaxIter(10) + .setRegParam(0.3) + .setElasticNetParam(0.1) + .setFamily("multinomial") + .setFeaturesCol("FeatureColname") + .setFitIntercept(true) + .setPredictionCol("Age") + .setProbabilityCol("ProbaColname") + .setRawPredictionCol("RawPredictColname") + .setStandardization(true).setTol(0.13) .setWeightCol("WeightColname") ) - val res = List (lr.build().get) + lr.build().isSuccess shouldBe true + + val res = List(lr.build().get) val resParameters = res.map(_.extractParamMap().toSeq.map(_.value)) val expectedParameters = expectedResult.map(_.extractParamMap().toSeq.map(_.value)) resParameters.head should contain theSameElementsAs expectedParameters.head - val lrWithTwoParams = new SparkLR().setRegParam(0.8).setStandardization(false) - +// Test default values of unset params lrWithTwoParams.getMaxIter shouldBe 100 lrWithTwoParams.getFamily shouldBe "auto" lrWithTwoParams.getTol shouldBe 0.000001 } - "GetMethode" should("Return the appropriate methode by it's name") in { - val params = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1", "Family" -> "multinomial" ,"FeaturesCol" -> "FeatureColname" - , "FitIntercept" -> "True", "PredictionCol" -> "Age", "ProbabilityCol" -> "ProbaColname", "RawPredictionCol"-> "RawPredictColname" - , "Standardization" -> "True" , "Tol" -> "0.13", "WeightCol" -> "WeightColname") - val lr = LogisticRegression(params) - val model =lr.build().get.asInstanceOf[SparkLR] + "GetMethode" should "Return the appropriate methode by it's name" in { + val params = Map( + "MaxIter" -> "10", + "RegParam" -> "0.3", + "ElasticNetParam" -> "0.1", + "Family" -> "multinomial", + "FeaturesCol" -> "FeatureColname", + "FitIntercept" -> "True", + "PredictionCol" -> "Age", + "ProbabilityCol" -> "ProbaColname", + "RawPredictionCol"-> "RawPredictColname", + "Standardization" -> "True", + "Tol" -> "0.13", + "WeightCol" -> "WeightColname" + ) + val caraLr = LogisticRegression(params) + val model =caraLr.build().get.asInstanceOf[SparkLR] - lr.GetMethode(model,10,"MaxIter").getName shouldBe "setMaxIter" - lr.GetMethode(model,0.0,"RegParam").getName shouldBe "setRegParam" - lr.GetMethode(model, false ,"Standardization").getName shouldBe "setStandardization" + caraLr.getMethode(model,10,"MaxIter").getName shouldBe "setMaxIter" + caraLr.getMethode(model,0.0,"RegParam").getName shouldBe "setRegParam" + caraLr.getMethode(model, false ,"Standardization").getName shouldBe "setStandardization" } }