Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a convention for loading yaml config files from the classpath #6

Merged
merged 6 commits into from May 10, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -14,7 +14,7 @@ Usage: data-validator [options]

--version
--verbose Print additional debug output.
--config <value> required validator config .yaml filename
--config <value> required validator config .yaml filename, prefix w/ 'classpath:' to load configuration from JVM classpath/resources, ex. '--config classpath:/config.yaml'
--jsonReport <value> optional JSON report filename
--htmlReport <value> optional HTML report filename
--vars k1=v1,k2=v2... other arguments
Expand Down
30 changes: 28 additions & 2 deletions src/main/scala/com/target/data_validator/ConfigParser.scala
Expand Up @@ -7,7 +7,7 @@ import io.circe._
import io.circe.generic.auto._
import io.circe.yaml.parser

import scala.io.Source
import scala.io.{BufferedSource, Source}
import scala.util.{Failure, Success, Try}

object ConfigParser extends LazyLogging {
Expand All @@ -28,9 +28,35 @@ object ConfigParser extends LazyLogging {
json.as[ValidatorConfig]
}

private def bufferContentsAsString(buffer: BufferedSource): String = {
val contents = buffer.mkString
buffer.close()
contents
}

private def loadFromFile(filename: String): String = {
logger.info(s"Attempting to load `$filename` from file system")
val buffer = Source.fromFile(filename)
bufferContentsAsString(buffer)
}

private def loadFromClasspath(filename: String): String = {
logger.info(s"Attempting to load `$filename` from classpath")
val is = getClass.getResourceAsStream(filename)
val buffer = Source.fromInputStream(is)
bufferContentsAsString(buffer)
}

def parseFile(filename: String, cliMap: Map[String, String]): Either[Error, ValidatorConfig] = {
logger.info(s"Parsing `$filename`")
Try(Source.fromFile(filename).mkString) match {

Try {
if (filename.startsWith("classpath:")) {
loadFromClasspath(filename.stripPrefix("classpath:"))
} else {
loadFromFile(filename)
}
} match {
case Success(contents) => parse(contents)
case Failure(thr) => Left[Error, ValidatorConfig](DecodingFailure.fromThrowable(thr, List.empty))
}
Expand Down
6 changes: 5 additions & 1 deletion src/main/scala/com/target/data_validator/Main.scala
Expand Up @@ -135,7 +135,11 @@ object Main extends LazyLogging with EventLog {
c.copy(verbose = true)).text("Print additional debug output.")

opt[String]("config").action((fn, c) =>
c.copy(configFilename = fn)).text("required validator config .yaml filename")
c.copy(configFilename = fn))
.text(
"required validator config .yaml filename, " +
"prefix w/ 'classpath:' to load configuration from JVM classpath/resources, " +
"ex. '--config classpath:/config.yaml'")

opt[String]("jsonReport").action((fn, c) =>
c.copy(jsonReport = Some(fn))).text("optional JSON report filename")
Expand Down
39 changes: 39 additions & 0 deletions src/test/resources/test_config.yaml
@@ -0,0 +1,39 @@
numKeyCols: 2
numErrorsToReport: 742
email:
smtpHost: smtpHost
subject: subject
from: from
to:
- to
detailedErrors: true
vars:
- name: foo
value: bar

outputs:
- filename: /user/home/sample.json

- pipe: /apps/dv2kafka.py
ignoreError: true
tables:
- db: foo
table: bar
keyColumns:
- one
- two
checks:
- type: rowCount
minNumRows: 10294
- type: nullCheck
column: mdse_item_i
- orcFile: LocalFile.orc
condition: "foo < 10"
checks:
- type: nullCheck
column: start_d
- parquetFile: LocFile.parquet
condition: "bar < 10"
checks:
- type: nullCheck
column: end_d
75 changes: 49 additions & 26 deletions src/test/scala/com/target/data_validator/ConfigParserSpec.scala
Expand Up @@ -10,6 +10,31 @@ class ConfigParserSpec extends FunSpec with BeforeAndAfterAll {
// Silence is golden!
override def beforeAll(): Unit = TestingSparkSession.configTestLog4j("OFF", "OFF")

val expectedConfiguration = ValidatorConfig(
2,
742, // scalastyle:ignore magic.number
Some(EmailConfig("smtpHost", "subject", "from", List("to"))),
detailedErrors = true,
Some(List(NameValue("foo", Json.fromString("bar")))),
Some(
List[ValidatorOutput](
FileOutput("/user/home/sample.json", None),
PipeOutput("/apps/dv2kafka.py", Some(true))
)
),
List(
ValidatorHiveTable(
"foo",
"bar",
Some(List("one", "two")),
None,
List(MinNumRows(10294), NullCheck("mdse_item_i")) // scalastyle:ignore magic.number
),
ValidatorOrcFile("LocalFile.orc", None, Some("foo < 10"), List(NullCheck("start_d"))),
ValidatorParquetFile("LocFile.parquet", None, Some("bar < 10"), List(NullCheck("end_d")))
)
)

describe("ConfigParser") {

describe("parse") {
Expand Down Expand Up @@ -58,32 +83,30 @@ class ConfigParserSpec extends FunSpec with BeforeAndAfterAll {
| column: end_d
""".stripMargin)

assert(config == Right(
ValidatorConfig(
2,
742, // scalastyle:ignore magic.number
Some(EmailConfig("smtpHost", "subject", "from", List("to"))),
detailedErrors = true,
Some(List(NameValue("foo", Json.fromString("bar")))),
Some(
List[ValidatorOutput](
FileOutput("/user/home/sample.json", None),
PipeOutput("/apps/dv2kafka.py", Some(true))
)
),
List(
ValidatorHiveTable(
"foo",
"bar",
Some(List("one", "two")),
None,
List(MinNumRows(10294), NullCheck("mdse_item_i")) // scalastyle:ignore magic.number
),
ValidatorOrcFile("LocalFile.orc", None, Some("foo < 10"), List(NullCheck("start_d"))),
ValidatorParquetFile("LocFile.parquet", None, Some("bar < 10"), List(NullCheck("end_d")))
)
)
))
assert(config == Right(expectedConfiguration))
}

}

describe("parseFile") {

it("should support loading config files by path") {
val output = ConfigParser.parseFile("src/test/resources/test_config.yaml", Map.empty)
assert(output == Right(expectedConfiguration))
}

it("should support classpath configuration loading with the prefix 'classpath:'") {
val output = ConfigParser.parseFile("classpath:/test_config.yaml", Map.empty)
assert(output == Right(expectedConfiguration))
}

it("should not confuse classpath and non classpath file loading") {
val paths = Seq("classpath:src/test/resources/test_config.yaml", "test_config.yaml")

paths.foreach { path =>
val output = ConfigParser.parseFile(path, Map.empty)
assert(output.isLeft)
}
}

}
Expand Down