Skip to content

Commit

Permalink
[HUDI-5111] Improve integration test coverage (apache#7092)
Browse files Browse the repository at this point in the history

Co-authored-by: Raymond Xu <2701446+xushiyan@users.noreply.github.com>
  • Loading branch information
2 people authored and Alexey Kudinkin committed Dec 14, 2022
1 parent 4cd1722 commit 09119fb
Show file tree
Hide file tree
Showing 19 changed files with 238 additions and 52 deletions.
Expand Up @@ -45,9 +45,21 @@ dag_content:
delete_input_data: false
type: ValidateDatasetNode
deps: first_insert
first_presto_query:
config:
execute_itr_count: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 30000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 5
delete_input_data: true
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -71,15 +71,15 @@ dag_content:
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
execute_itr_count: 20
validate_hive: true
delete_input_data: true
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: second_hive_sync
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 7600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 30
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 7600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
19 changes: 16 additions & 3 deletions docker/demo/config/test-suite/deltastreamer-medium-clustering.yaml
Expand Up @@ -17,7 +17,7 @@
# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-medium-clustering.yaml
dag_rounds: 20
dag_rounds: 15
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -62,14 +62,27 @@ dag_content:
deps: first_upsert
second_validate:
config:
validate_once_every_itr: 3
validate_hive: false
delete_input_data: true
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 15
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 20
execute_itr_count: 15
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -17,7 +17,7 @@
# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 20
dag_rounds: 15
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -68,9 +68,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 15
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 20
execute_itr_count: 15
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
14 changes: 13 additions & 1 deletion docker/demo/config/test-suite/deltastreamer-non-partitioned.yaml
Expand Up @@ -56,8 +56,20 @@ dag_content:
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 6
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 11000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 6
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: detlastreamer-long-running-example.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
16 changes: 14 additions & 2 deletions docker/demo/config/test-suite/simple-clustering.yaml
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: simple-clustering.yaml
dag_rounds: 30
dag_rounds: 15
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
Expand Down Expand Up @@ -54,11 +54,23 @@ dag_content:
deps: first_delete
first_cluster:
config:
execute_itr_count: 25
execute_itr_count: 10
type: ClusteringNode
deps: first_validate
second_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_cluster
first_presto_query:
config:
validate_once_every_itr: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 8300
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
12 changes: 12 additions & 0 deletions docker/demo/config/test-suite/simple-deltastreamer.yaml
Expand Up @@ -68,3 +68,15 @@ dag_content:
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
validate_once_every_itr: 3
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 9600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
14 changes: 13 additions & 1 deletion docker/demo/config/test-suite/spark-immutable-dataset.yaml
Expand Up @@ -45,9 +45,21 @@ dag_content:
delete_input_data: false
type: ValidateDatasetNode
deps: first_insert
first_presto_query:
config:
execute_itr_count: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 48000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 5
delete_input_data: true
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Expand Up @@ -48,6 +48,18 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 6
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 6000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 6
Expand Down
16 changes: 14 additions & 2 deletions docker/demo/config/test-suite/spark-long-running.yaml
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-spark-deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_rounds: 20
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
Expand Down Expand Up @@ -49,9 +49,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 30
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 189000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 30
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query

0 comments on commit 09119fb

Please sign in to comment.