Skip to content

Commit

Permalink
finilize class LinearRegression plus tests (#11)
Browse files Browse the repository at this point in the history
Co-authored-by: merzouk <merzoukoumedda@gmail.com>
  • Loading branch information
merzouk13 and merzouk committed Jun 5, 2021
1 parent cbcdd91 commit b0c71fa
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 0 deletions.
Expand Up @@ -17,6 +17,8 @@ trait CaraStageMapper {
stageDescription.stageName match {
case "LogisticRegression" =>
LogisticRegression(stageDescription.params)
case "LinearRegression" =>
LinearRegression(stageDescription.params)
case _ => throw
new Exception(s"${stageDescription.stageName} is not a valid Cara Stage name. Please verify your Yaml File")
}
Expand Down
@@ -0,0 +1,53 @@
package io.github.jsarni.CaraStage.ModelStage
import io.github.jsarni.CaraStage.Annotation.MapperConstructor
import org.apache.spark.ml.PipelineStage
import org.apache.spark.ml.regression.{LinearRegression => SparkLR}
import scala.util.Try



case class LinearRegression(MaxIter: Option[Int], RegParam: Option[Double], ElasticNetParam: Option[Double], LabelCol:Option[String], Loss: Option[String],
FitIntercept: Option[Boolean], PredictionCol: Option[String], FeaturesCol: Option[String], Solver: Option[String],
Standardization: Option[Boolean], Tol : Option[Double], WeightCol: Option[String])

extends CaraModel {

@MapperConstructor
def this(params: Map[String, String]) = {
this(
params.get("MaxIter").map(_.toInt),
params.get("RegParam").map(_.toDouble),
params.get("ElasticNetParam").map(_.toDouble),
params.get("LabelCol"),
params.get("Loss"),
params.get("FitIntercept").map(_.toBoolean),
params.get("PredictionCol"),
params.get("FeaturesCol"),
params.get("Solver"),
params.get("Standardization").map(_.toBoolean),
params.get("Tol").map(_.toDouble),
params.get("WeightCol")

)
}

override def build(): Try[PipelineStage] = Try {
val lr = new SparkLR()
val definedFields = this.getClass.getDeclaredFields.filter(f => f.get(this).asInstanceOf[Option[Any]].isDefined)
val names = definedFields.map(f => f.getName)
val values = definedFields.map(f => f.get(this))
val zipFields = names zip values
zipFields.map(f=> getMethode(lr,f._2 match {case Some(s) => s },f._1).invoke(lr,(f._2 match {case Some(value) => value.asInstanceOf[f._2.type ] })))
lr

}
}
object LinearRegression
{
def apply(params: Map[String, String]): LinearRegression = new LinearRegression(params)
}





@@ -0,0 +1,81 @@
package io.github.jsarni.CaraStage.ModelStage
import org.apache.spark.ml.regression.{LinearRegression => SparkLR}
import io.github.jsarni.TestBase

class LinearRegressionTest extends TestBase {
"build" should "Create an lr model and set all parameters with there args values or set default ones" in {
val params = Map(
"MaxIter" -> "10",
"RegParam" -> "0.3",
"ElasticNetParam" -> "0.1",
"FeaturesCol" -> "FeatureColname",
"FitIntercept" -> "True",
"PredictionCol" -> "Age",
"Standardization" -> "True",
"Tol" -> "0.13",
"WeightCol" -> "WeightColname",
"Loss" -> "huber",
"Solver" -> "normal",
"LabelCol" -> "LabelCol"
)
val lr = LinearRegression(params)
val lrWithTwoParams = new SparkLR()
.setRegParam(0.8)
.setStandardization(false)

val expectedResult = List(
new SparkLR()
.setMaxIter(10)
.setRegParam(0.3)
.setElasticNetParam(0.1)
.setFeaturesCol("FeatureColname")
.setFitIntercept(true)
.setPredictionCol("Age")
.setStandardization(true)
.setTol(0.13)
.setWeightCol("WeightColname")
.setLoss("huber")
.setSolver("normal")
.setLabelCol("LabelCol")
)
lr.build().isSuccess shouldBe true

val res = List(lr.build().get)
val resParameters = res.map(_.extractParamMap().toSeq.map(_.value))
val expectedParameters = expectedResult.map(_.extractParamMap().toSeq.map(_.value))

resParameters.head should contain theSameElementsAs expectedParameters.head

// Test default values of unset params
lrWithTwoParams.getMaxIter shouldBe 100
lrWithTwoParams.getLoss shouldBe "squaredError"
lrWithTwoParams.getTol shouldBe 0.000001

}
"GetMethode" should "Return the appropriate methode by it's name" in {
val params = Map(
"MaxIter" -> "10",
"RegParam" -> "0.3",
"ElasticNetParam" -> "0.1",
"FeaturesCol" -> "FeatureColname",
"FitIntercept" -> "True",
"PredictionCol" -> "Age",
"Standardization" -> "True",
"Tol" -> "0.13",
"WeightCol" -> "WeightColname",
"Loss" -> "huber",
"Solver" -> "normal",
"LabelCol" -> "LabelCol"
)
val caraLr = LinearRegression(params)
val model =caraLr.build().get.asInstanceOf[SparkLR]

caraLr.getMethode(model,10,"MaxIter").getName shouldBe "setMaxIter"
caraLr.getMethode(model,0.0,"RegParam").getName shouldBe "setRegParam"
caraLr.getMethode(model, false ,"Standardization").getName shouldBe "setStandardization"

}



}

0 comments on commit b0c71fa

Please sign in to comment.