Skip to content

Commit c6c552d

Browse files
andrew-colemanvbarua
authored andcommitted
fix(spark): sporadic failures in Hive test suite (#540)
The builds were sporadically failing with the error: io.substrait.spark.HiveTableSuite > Insert into Hive table FAILED org.scalatest.exceptions.TestFailedException: Expected 4, but got 5 It turns out that the row values were not being correctly serialised / deserialised into the Hive table correctly, so even in the cases were the test passed (based on number of rows added), the values inserted were still incorrect. This commit fixes that, and adds an extra check in the test case(s) to assert the correct values are inserted. Signed-off-by: Andrew Coleman <andrew_coleman@uk.ibm.com>
1 parent 26e1e3e commit c6c552d

File tree

3 files changed

+7
-3
lines changed

3 files changed

+7
-3
lines changed

spark/src/main/scala/io/substrait/spark/logical/ToLogicalPlan.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ class ToLogicalPlan(spark: SparkSession = SparkSession.builder().getOrCreate())
470470
case WriteOp.INSERT if isHive =>
471471
withChild(child) {
472472
InsertIntoHiveTable(
473-
table,
473+
catalogTable(write.getNames.asScala, ToSparkType.toStructType(write.getTableSchema)),
474474
Map.empty,
475475
child,
476476
write.getCreateMode == CreateMode.REPLACE_IF_EXISTS,
@@ -554,8 +554,8 @@ class ToLogicalPlan(spark: SparkSession = SparkSession.builder().getOrCreate())
554554
val loc = spark.conf.get(StaticSQLConf.WAREHOUSE_PATH.key)
555555
val storage = CatalogStorageFormat(
556556
locationUri = Some(URI.create(f"$loc/$table")),
557-
inputFormat = None,
558-
outputFormat = None,
557+
inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"),
558+
outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"),
559559
serde = None,
560560
compressed = false,
561561
properties = Map.empty

spark/src/test/scala/io/substrait/spark/HiveTableSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ class HiveTableSuite extends SparkFunSuite {
9595
// and again...
9696
spark.sessionState.executePlan(plan).executedPlan.execute()
9797
assertResult(5)(spark.sql("select * from test").count())
98+
// check it has inserted the correct values
99+
assertResult(3)(spark.sql("select * from test where ID = 1003 and VALUE = 'again'").count())
98100
}
99101

100102
test("Create Table As Select") {

spark/src/test/scala/io/substrait/spark/LocalFiles.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ class LocalFiles extends SharedSparkSession {
218218
// and again...
219219
spark.sessionState.executePlan(plan).executedPlan.execute()
220220
assertResult(5)(spark.sql("select * from test").count())
221+
// check it has inserted the correct values
222+
assertResult(3)(spark.sql("select * from test where ID = 1003 and VALUE = 'again'").count())
221223
}
222224

223225
test("Append to CSV file") {

0 commit comments

Comments
 (0)