From 093b1ad5ab35ed763076d538060a653c4b63d1b6 Mon Sep 17 00:00:00 2001 From: Bharathy Date: Fri, 2 Dec 2022 09:10:37 +0530 Subject: [PATCH] Fix remote_txn on PG15 In remote_txn, testcases which kill remote processes on data nodes, tend to rollback transactions and as part of the process, WARNINGS/ERROR are reported to client. Client however reports WARNINGS/ERROR in different order intermittently. This is an issue specific to psql utility. Placing psql in gdb and trying to diagnise the problem does not reproduce the issue. This patch fixes the tests by not reporting WARNINGS. Fixes #4837 --- .github/gh_matrix_builder.py | 4 +-- tsl/test/expected/remote_txn.out | 47 +++++++++++++++++++------------- tsl/test/sql/remote_txn.sql | 24 ++++++++++++++-- 3 files changed, 51 insertions(+), 24 deletions(-) diff --git a/.github/gh_matrix_builder.py b/.github/gh_matrix_builder.py index 5918d2b8c34..5846d640e90 100755 --- a/.github/gh_matrix_builder.py +++ b/.github/gh_matrix_builder.py @@ -155,9 +155,7 @@ def macos_config(overrides): # below tests are tracked as part of #4972 "installcheck_args": "SKIPS='dist_move_chunk' " # below tests are tracked as part of #4835 - "IGNORES='telemetry_stats partialize_finalize " - # below tests are tracked as part of #4837 - "remote_txn'", + "IGNORES='telemetry_stats partialize_finalize'", } ) ) diff --git a/tsl/test/expected/remote_txn.out b/tsl/test/expected/remote_txn.out index 6451e622a2d..64a3a5b0d64 100644 --- a/tsl/test/expected/remote_txn.out +++ b/tsl/test/expected/remote_txn.out @@ -194,6 +194,10 @@ FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; --the next few statements inject faults before the commit. They should all fail --and be rolled back with no unresolved state +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT remote_node_killer_set_event('pre-commit', 'loopback'); remote_node_killer_set_event @@ -208,16 +212,14 @@ BEGIN; (1 row) SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (20003,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); -NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (20003,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') remote_exec ------------- (1 row) COMMIT; -WARNING: kill event: pre-commit -WARNING: transaction rollback on data node "loopback" failed ERROR: [loopback]: terminating connection due to administrator command +RESET client_min_messages; SELECT debug_waitpoint_release('remote_conn_xact_end'); debug_waitpoint_release ------------------------- @@ -243,6 +245,10 @@ SELECT count(*) FROM pg_prepared_xacts; 0 (1 row) +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT remote_node_killer_set_event('waiting-commit', 'loopback'); remote_node_killer_set_event @@ -257,7 +263,6 @@ BEGIN; (1 row) SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (20004,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); -NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (20004,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') remote_exec ------------- @@ -272,9 +277,8 @@ NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (20004,1,'bleh', '2001-01-0 (1 row) COMMIT; -WARNING: kill event: waiting-commit -WARNING: transaction rollback on data node "loopback" failed ERROR: [loopback]: terminating connection due to administrator command +RESET client_min_messages; SELECT debug_waitpoint_release('remote_conn_xact_end'); debug_waitpoint_release ------------------------- @@ -571,10 +575,10 @@ SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 10003; --during waiting-prepare-transaction the data node process could die before or after --executing the prepare transaction. To be safe to either case rollback using heal_server. -SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); - bool ------- - t +SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); + remote_txn_heal_data_node +--------------------------- + 0 (1 row) SELECT count(*) FROM pg_prepared_xacts; @@ -800,10 +804,10 @@ SELECT debug_waitpoint_release('remote_conn_xact_end'); --at this point the commit prepared might or might not have been executed before --the data node process was killed. --but in any case, healing the server will bring it into a known state -SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); - bool ------- - t +SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); + remote_txn_heal_data_node +--------------------------- + 1 (1 row) --heal does not use the connection cache, so unaffected @@ -910,9 +914,12 @@ SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 81; ALTER TABLE "S 1"."T 1" DROP CONSTRAINT t1_pkey, ADD CONSTRAINT t1_pkey PRIMARY KEY ("C 1") DEFERRABLE INITIALLY DEFERRED; --test ROLLBACK TRANSACTION on failure in PREPARE TRANSACTION. +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); -NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') remote_exec ------------- @@ -926,8 +933,8 @@ NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-0 (1 row) COMMIT; -WARNING: transaction rollback on data node "loopback" failed ERROR: [loopback]: duplicate key value violates unique constraint "t1_pkey" +RESET client_min_messages; --connection should be removed since PREPARE TRANSACTION failed SELECT node_name, connection_status, transaction_status, transaction_depth, processing FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; @@ -945,16 +952,18 @@ SELECT count(*) FROM pg_prepared_xacts; --this has an error on the second connection. So should force conn1 to prepare transaction --ok and then have the txn fail on conn2. Thus conn1 would do a ROLLBACK PREPARED. --conn2 would do a ROLLBACK TRANSACTION. +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10010,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); -NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (10010,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') remote_exec ------------- (1 row) SELECT test.remote_exec('{loopback2}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); -NOTICE: [loopback2]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') remote_exec ------------- @@ -970,8 +979,8 @@ NOTICE: [loopback2]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01- (2 rows) COMMIT; -WARNING: transaction rollback on data node "loopback2" failed ERROR: [loopback2]: duplicate key value violates unique constraint "t1_pkey" +RESET client_min_messages; --one connection should remain and be idle SELECT node_name, connection_status, transaction_status, transaction_depth, processing FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; diff --git a/tsl/test/sql/remote_txn.sql b/tsl/test/sql/remote_txn.sql index a79b18d9501..248b0eb973c 100644 --- a/tsl/test/sql/remote_txn.sql +++ b/tsl/test/sql/remote_txn.sql @@ -136,11 +136,16 @@ FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; --the next few statements inject faults before the commit. They should all fail --and be rolled back with no unresolved state +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT remote_node_killer_set_event('pre-commit', 'loopback'); SELECT debug_waitpoint_enable('remote_conn_xact_end'); SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (20003,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); COMMIT; +RESET client_min_messages; SELECT debug_waitpoint_release('remote_conn_xact_end'); -- Failed connection should be cleared @@ -150,6 +155,10 @@ FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 20003; SELECT count(*) FROM pg_prepared_xacts; +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT remote_node_killer_set_event('waiting-commit', 'loopback'); SELECT debug_waitpoint_enable('remote_conn_xact_end'); @@ -158,6 +167,7 @@ BEGIN; SELECT node_name, connection_status, transaction_status, transaction_depth, processing FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; COMMIT; +RESET client_min_messages; SELECT debug_waitpoint_release('remote_conn_xact_end'); --connection failed during commit, so should be cleared from the cache @@ -293,7 +303,7 @@ SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 10003; --during waiting-prepare-transaction the data node process could die before or after --executing the prepare transaction. To be safe to either case rollback using heal_server. -SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); +SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); SELECT count(*) FROM pg_prepared_xacts; SELECT count(*) from _timescaledb_catalog.remote_txn; @@ -382,7 +392,7 @@ SELECT debug_waitpoint_release('remote_conn_xact_end'); --at this point the commit prepared might or might not have been executed before --the data node process was killed. --but in any case, healing the server will bring it into a known state -SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); +SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback')); --heal does not use the connection cache, so unaffected SELECT node_name, connection_status, transaction_status, transaction_depth, processing @@ -436,11 +446,16 @@ ALTER TABLE "S 1"."T 1" DROP CONSTRAINT t1_pkey, ADD CONSTRAINT t1_pkey PRIMARY KEY ("C 1") DEFERRABLE INITIALLY DEFERRED; --test ROLLBACK TRANSACTION on failure in PREPARE TRANSACTION. +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); SELECT node_name, connection_status, transaction_status, transaction_depth, processing FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; COMMIT; +RESET client_min_messages; --connection should be removed since PREPARE TRANSACTION failed SELECT node_name, connection_status, transaction_status, transaction_depth, processing @@ -451,6 +466,10 @@ SELECT count(*) FROM pg_prepared_xacts; --this has an error on the second connection. So should force conn1 to prepare transaction --ok and then have the txn fail on conn2. Thus conn1 would do a ROLLBACK PREPARED. --conn2 would do a ROLLBACK TRANSACTION. +-- We set min message level to "error" since order of warnings generated +-- is different due to timing issues but check that the transaction was +-- rolled back after the commit. +SET client_min_messages TO error; BEGIN; SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10010,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); SELECT test.remote_exec('{loopback2}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$); @@ -459,6 +478,7 @@ BEGIN; SELECT node_name, connection_status, transaction_status, transaction_depth, processing FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4; COMMIT; +RESET client_min_messages; --one connection should remain and be idle SELECT node_name, connection_status, transaction_status, transaction_depth, processing