-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* wip: validator * add small validator in train * add config validation tool
- Loading branch information
Showing
18 changed files
with
273 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package ai.metarank.mode.validate | ||
|
||
sealed trait CheckResult | ||
|
||
object CheckResult { | ||
case object SuccessfulCheck extends CheckResult | ||
case class FailedCheck(reason: String) extends CheckResult | ||
} |
40 changes: 40 additions & 0 deletions
40
src/main/scala/ai/metarank/mode/validate/ConfigValidator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package ai.metarank.mode.validate | ||
|
||
import ai.metarank.mode.validate.CheckResult._ | ||
import ai.metarank.util.Logging | ||
import io.circe.{Json, JsonObject} | ||
import io.circe.yaml.parser.parse | ||
|
||
object ConfigValidator extends Logging { | ||
def check(file: String): CheckResult = { | ||
parse(file) match { | ||
case Left(value) => FailedCheck(s"yaml syntax error: ${value}") | ||
case Right(yaml) => | ||
yaml.asObject match { | ||
case Some(obj) => | ||
logger.info("config file is a YAML object") | ||
checkNonEmpty(obj, "interactions") match { | ||
case SuccessfulCheck => checkNonEmpty(obj, "features") | ||
case f: FailedCheck => f | ||
} | ||
case None => FailedCheck("config file is not an YAML dictionary") | ||
} | ||
} | ||
} | ||
|
||
def checkNonEmpty(obj: JsonObject, section: String): CheckResult = { | ||
obj(section) match { | ||
case Some(s) => | ||
logger.info(s"$section section exists") | ||
s.asArray match { | ||
case Some(list) if list.isEmpty => FailedCheck(s"'$section' section is empty") | ||
case Some(_) => | ||
logger.info(s"$section section is not empty") | ||
SuccessfulCheck | ||
case None => FailedCheck(s"'$section' section is not a list") | ||
} | ||
case None => FailedCheck(s"'$section' section is missing in config") | ||
} | ||
} | ||
|
||
} |
52 changes: 52 additions & 0 deletions
52
src/main/scala/ai/metarank/mode/validate/EventFileValidator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package ai.metarank.mode.validate | ||
|
||
import ai.metarank.mode.validate.CheckResult.{FailedCheck, SuccessfulCheck} | ||
import ai.metarank.model.Event | ||
import ai.metarank.model.Event.{InteractionEvent, MetadataEvent, RankingEvent} | ||
import ai.metarank.util.Logging | ||
import better.files.File | ||
import org.apache.commons.io.IOUtils | ||
import io.circe.parser._ | ||
|
||
import scala.collection.JavaConverters._ | ||
import java.nio.charset.StandardCharsets | ||
import java.util.zip.GZIPInputStream | ||
|
||
object EventFileValidator extends Logging { | ||
def check(file: File): CheckResult = { | ||
file.extension(includeDot = false) match { | ||
case Some("gz") | Some("gzip") => | ||
logger.info("GZip compression detected") | ||
val lines = | ||
IOUtils.lineIterator(new GZIPInputStream(file.newFileInputStream), StandardCharsets.UTF_8).asScala.toList | ||
checkContents(lines) | ||
case Some("json") | Some("jsonl") => | ||
logger.info("No compression detected") | ||
val lines = file.lineIterator.toList | ||
checkContents(lines) | ||
case other => FailedCheck(s"content type $other is not supported") | ||
} | ||
} | ||
|
||
def checkContents(lines: List[String]): CheckResult = { | ||
val parsed = lines.map(line => decode[Event](line)) | ||
val metadata = parsed.collect { case Right(m @ MetadataEvent(_, _, _, _, _)) => | ||
m | ||
} | ||
val ints = parsed.collect { case Right(i: InteractionEvent) => | ||
i | ||
} | ||
val rankings = parsed.collect { case Right(r: RankingEvent) => r } | ||
val failed = parsed.collect { case Left(x) => x } | ||
logger.info(s"total events: ${parsed.size}") | ||
logger.info(s"metadata events: ${metadata.size}") | ||
logger.info(s"interaction events: ${ints.size}") | ||
logger.info(s"ranking events: ${rankings.size}") | ||
logger.info(s"failed parsing events: ${failed.size}") | ||
if (metadata.nonEmpty && rankings.nonEmpty && ints.nonEmpty && failed.isEmpty) { | ||
SuccessfulCheck | ||
} else { | ||
FailedCheck("Problems with event consistency") | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package ai.metarank.mode.validate | ||
|
||
import ai.metarank.util.Logging | ||
import better.files.File | ||
import cats.effect.{ExitCode, IO, IOApp} | ||
|
||
object Validate extends IOApp with Logging { | ||
case class ValidationError(msg: String) extends Exception(msg) | ||
|
||
override def run(args: List[String]): IO[ExitCode] = { | ||
for { | ||
_ <- args match { | ||
case "--config" :: configPath :: Nil => checkConfig(File(configPath)) | ||
case "--data" :: dataPath :: Nil => checkData(File(dataPath)) | ||
case "--help" :: Nil => printHelp() | ||
case Nil => printHelp() | ||
case other => | ||
IO.raiseError(new IllegalArgumentException(s"argument $other is not supported, use '--help' for help")) | ||
} | ||
} yield { ExitCode.Success } | ||
} | ||
|
||
def printHelp(): IO[Unit] = IO { | ||
logger.info("Metarank validator tool") | ||
logger.info("Usage: metarank validate <options>") | ||
logger.info("") | ||
logger.info("Possible options:") | ||
logger.info(" --config <path> - Validate feature configuration file") | ||
logger.info(" --data <path> - Validate historical events dataset") | ||
logger.info(" --help - This help") | ||
} | ||
|
||
def checkConfig(cfg: File): IO[Unit] = ConfigValidator.check(cfg.contentAsString) match { | ||
case CheckResult.SuccessfulCheck => IO { logger.info("Config file is valid") } | ||
case CheckResult.FailedCheck(reason) => IO.raiseError(ValidationError(reason)) | ||
} | ||
def checkData(ds: File): IO[Unit] = EventFileValidator.check(ds) match { | ||
case CheckResult.SuccessfulCheck => IO { logger.info("Data file is valid") } | ||
case CheckResult.FailedCheck(reason) => IO.raiseError(ValidationError(reason)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
src/test/scala/ai/metarank/mode/validate/ConfigValidatorTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
package ai.metarank.mode.validate | ||
|
||
import ai.metarank.mode.validate.CheckResult.{FailedCheck, SuccessfulCheck} | ||
import org.scalatest.flatspec.AnyFlatSpec | ||
import org.scalatest.matchers.should.Matchers | ||
|
||
class ConfigValidatorTest extends AnyFlatSpec with Matchers { | ||
it should "accept valid file" in { | ||
val yaml = | ||
"""interactions: | ||
| - name: click | ||
| weight: 1 | ||
|features: | ||
| - name: foo | ||
| type: number | ||
| scope: item | ||
| source: metadata.foo""".stripMargin | ||
ConfigValidator.check(yaml) shouldBe SuccessfulCheck | ||
} | ||
it should "fail on empty file" in { | ||
ConfigValidator.check("") shouldBe a[FailedCheck] | ||
} | ||
|
||
it should "fail on missing interactions" in { | ||
val yaml = | ||
"""features: | ||
| - name: foo | ||
| type: number | ||
| scope: item | ||
| source: metadata.foo""".stripMargin | ||
ConfigValidator.check(yaml) shouldBe a[FailedCheck] | ||
} | ||
|
||
it should "fail on empty interactions" in { | ||
val yaml = | ||
"""interactions: | ||
|features: | ||
| - name: foo | ||
| type: number | ||
| scope: item | ||
| source: metadata.foo""".stripMargin | ||
ConfigValidator.check(yaml) shouldBe a[FailedCheck] | ||
} | ||
|
||
it should "fail on interactions being non-object" in { | ||
val yaml = | ||
"""interactions: true | ||
|features: | ||
| - name: foo | ||
| type: number | ||
| scope: item | ||
| source: metadata.foo""".stripMargin | ||
ConfigValidator.check(yaml) shouldBe a[FailedCheck] | ||
} | ||
|
||
it should "fail on missing features" in { | ||
val yaml = | ||
"""interactions: | ||
| - name: click | ||
| weight: 1 | ||
| """.stripMargin | ||
ConfigValidator.check(yaml) shouldBe a[FailedCheck] | ||
} | ||
|
||
it should "fail on empty features" in { | ||
val yaml = | ||
"""interactions: | ||
| - name: click | ||
| weight: 1 | ||
|features:""".stripMargin | ||
ConfigValidator.check(yaml) shouldBe a[FailedCheck] | ||
} | ||
|
||
it should "fail on non-obj features" in { | ||
val yaml = | ||
"""interactions: | ||
| - name: click | ||
| weight: 1 | ||
|features: true | ||
|""".stripMargin | ||
ConfigValidator.check(yaml) shouldBe a[FailedCheck] | ||
} | ||
} |
Oops, something went wrong.