Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/yaml parser #9

Merged
merged 7 commits into from
May 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class CaraParser(caraYaml: CaraYaml) extends ParserUtils with CaraStageMapper{
for {
pipeline <- parsePipeline()
evaluator <- parseEvaluator()
} yield CaraPipeline(pipeline, evaluator)
tunerDesc <- parseTuner()
} yield CaraPipeline(pipeline, evaluator, tunerDesc)
}

private[PipelineParser] def parsePipeline(): Try[Pipeline] = {
Expand Down Expand Up @@ -142,3 +143,7 @@ class CaraParser(caraYaml: CaraYaml) extends ParserUtils with CaraStageMapper{
}

}

object CaraParser {
def apply(caraYaml: CaraYaml): CaraParser = new CaraParser(caraYaml)
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.github.jsarni.PipelineParser

import io.github.jsarni.CaraStage.TuningStage.TuningStageDescription
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.evaluation.Evaluator

case class CaraPipeline(pipeline: Pipeline, evaluator: Evaluator)
case class CaraPipeline(pipeline: Pipeline, evaluator: Evaluator, tuner: TuningStageDescription)
29 changes: 15 additions & 14 deletions src/test/scala/io/github/jsarni/PipelineParser/CaraParserTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class CaraParserTest extends TestBase {
"extractTuner" should "return parse the yaml description file to a json object" in {
val caraPath = getClass.getResource("/cara.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val myJson = caraYaml.loadFile()

Expand All @@ -36,7 +36,7 @@ class CaraParserTest extends TestBase {

"parseSingleStageMap" should "parse a CaraStageDescription to a CaraStage " in {
val caraPath = getClass.getResource("/cara.yaml").getPath
val caraParser = new CaraParser(CaraYaml(caraPath))
val caraParser = CaraParser(CaraYaml(caraPath))

val params = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1")
val stageDesc =
Expand All @@ -55,7 +55,7 @@ class CaraParserTest extends TestBase {

"parseStages" should "parse a list of CaraStageDescription to the corresponding list of CaraStage" in {
val caraPath = getClass.getResource("/cara.yaml").getPath
val caraParser = new CaraParser(CaraYaml(caraPath))
val caraParser = CaraParser(CaraYaml(caraPath))

val params1 = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1")
val params2 = Map("MaxIter" -> "20", "FitIntercept" -> "False", "ProbabilityCol" -> "col1")
Expand All @@ -75,7 +75,7 @@ class CaraParserTest extends TestBase {

"buildStages" should "build a list PipelineStages out of a list of CaraStages" in {
val caraPath = getClass.getResource("/cara.yaml").getPath
val caraParser = new CaraParser(CaraYaml(caraPath))
val caraParser = CaraParser(CaraYaml(caraPath))

val params1 = Map("MaxIter" -> "10", "RegParam" -> "0.3", "ElasticNetParam" -> "0.1")
val params2 = Map("MaxIter" -> "20", "FitIntercept" -> "False", "ProbabilityCol" -> "col1")
Expand All @@ -102,7 +102,7 @@ class CaraParserTest extends TestBase {

"buildPipeline" should "build a Spark ML Pipeline out of a list of PipelineStages" in {
val caraPath = getClass.getResource("/cara.yaml").getPath
val caraParser = new CaraParser(CaraYaml(caraPath))
val caraParser = CaraParser(CaraYaml(caraPath))

val stagesList = List(
new SparkLR().setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.1)
Expand All @@ -118,7 +118,7 @@ class CaraParserTest extends TestBase {
"parsePipeline" should "build the described Pipeline of the Yaml File" in {
val caraPath = getClass.getResource("/cara_for_build.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)


val parsePipeline = PrivateMethod[Try[Pipeline]]('parsePipeline)
Expand All @@ -133,7 +133,7 @@ class CaraParserTest extends TestBase {
"extractTuner" should "get the correct Evaluator Name from the Yaml File" in {
val caraPath = getClass.getResource("/cara_for_build.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val myJson = caraYaml.loadFile()

Expand All @@ -147,7 +147,7 @@ class CaraParserTest extends TestBase {
it should "Raise an exception if there is no evaluator specified" in {
val caraPath = getClass.getResource("/cara_zero_evaluator.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val myJson = caraYaml.loadFile()

Expand All @@ -160,7 +160,7 @@ class CaraParserTest extends TestBase {
it should "Raise an exception if there is more than one evaluator specified" in {
val caraPath = getClass.getResource("/cara_two_evaluator.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val myJson = caraYaml.loadFile()

Expand All @@ -173,7 +173,7 @@ class CaraParserTest extends TestBase {
"parseEvaluator" should "build the described evaluator of the Yaml File" in {
val caraPath = getClass.getResource("/cara_for_build.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val parseEvaluator = PrivateMethod[Try[Evaluator]]('parseEvaluator)
val res = caraParser.invokePrivate(parseEvaluator())
Expand All @@ -185,7 +185,7 @@ class CaraParserTest extends TestBase {
"extractTuner" should "get the correct Tuner Description from the Yaml File" in {
val caraPath = getClass.getResource("/cara_for_build.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val myJson = caraYaml.loadFile()

Expand All @@ -199,7 +199,7 @@ class CaraParserTest extends TestBase {
it should "raise an exception ilf there is more than one tuner in the Yaml File" in {
val caraPath = getClass.getResource("/cara_two_evaluator.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val myJson = caraYaml.loadFile()

Expand All @@ -213,7 +213,7 @@ class CaraParserTest extends TestBase {
"parseTuner" should "build the described Tuner of the Yaml File" in {
val caraPath = getClass.getResource("/cara_for_build.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val myJson = caraYaml.loadFile()

Expand All @@ -227,7 +227,7 @@ class CaraParserTest extends TestBase {
"build" should "build the described Pipeline of the Yaml File" in {
val caraPath = getClass.getResource("/cara_for_build.yaml").getPath
val caraYaml = CaraYaml(caraPath)
val caraParser = new CaraParser(caraYaml)
val caraParser = CaraParser(caraYaml)

val res = caraParser.build()

Expand All @@ -237,5 +237,6 @@ class CaraParserTest extends TestBase {
res.get.evaluator.isInstanceOf[RegressionEvaluator] shouldBe true
res.get.pipeline.getStages.map(_.extractParamMap().toSeq.map(_.value)).head should contain theSameElementsAs
exprectedRes.getStages.map(_.extractParamMap().toSeq.map(_.value)).head
res.get.tuner shouldBe TuningStageDescription("CrossValidator", "NumFolds", "3")
}
}