Skip to content

Commit

Permalink
Fix remote_txn on PG15
Browse files Browse the repository at this point in the history
In remote_txn, testcases which kill remote processes on data nodes,
tend to rollback transactions and as part of the process, WARNINGS/ERROR
are reported to client. Client however reports WARNINGS/ERROR in different
order intermittently. This is an issue specific to psql utility. Placing
psql in gdb and trying to diagnise the problem does not reproduce the issue.

This patch fixes the tests by not reporting WARNINGS.

Fixes timescale#4837
  • Loading branch information
sb230132 committed Dec 2, 2022
1 parent 1a806e2 commit 093b1ad
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 24 deletions.
4 changes: 1 addition & 3 deletions .github/gh_matrix_builder.py
Expand Up @@ -155,9 +155,7 @@ def macos_config(overrides):
# below tests are tracked as part of #4972
"installcheck_args": "SKIPS='dist_move_chunk' "
# below tests are tracked as part of #4835
"IGNORES='telemetry_stats partialize_finalize "
# below tests are tracked as part of #4837
"remote_txn'",
"IGNORES='telemetry_stats partialize_finalize'",
}
)
)
Expand Down
47 changes: 28 additions & 19 deletions tsl/test/expected/remote_txn.out
Expand Up @@ -194,6 +194,10 @@ FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;

--the next few statements inject faults before the commit. They should all fail
--and be rolled back with no unresolved state
-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT remote_node_killer_set_event('pre-commit', 'loopback');
remote_node_killer_set_event
Expand All @@ -208,16 +212,14 @@ BEGIN;
(1 row)

SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (20003,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (20003,1,'bleh', '2001-01-01', '2001-01-01', 'bleh')
remote_exec
-------------

(1 row)

COMMIT;
WARNING: kill event: pre-commit
WARNING: transaction rollback on data node "loopback" failed
ERROR: [loopback]: terminating connection due to administrator command
RESET client_min_messages;
SELECT debug_waitpoint_release('remote_conn_xact_end');
debug_waitpoint_release
-------------------------
Expand All @@ -243,6 +245,10 @@ SELECT count(*) FROM pg_prepared_xacts;
0
(1 row)

-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT remote_node_killer_set_event('waiting-commit', 'loopback');
remote_node_killer_set_event
Expand All @@ -257,7 +263,6 @@ BEGIN;
(1 row)

SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (20004,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (20004,1,'bleh', '2001-01-01', '2001-01-01', 'bleh')
remote_exec
-------------

Expand All @@ -272,9 +277,8 @@ NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (20004,1,'bleh', '2001-01-0
(1 row)

COMMIT;
WARNING: kill event: waiting-commit
WARNING: transaction rollback on data node "loopback" failed
ERROR: [loopback]: terminating connection due to administrator command
RESET client_min_messages;
SELECT debug_waitpoint_release('remote_conn_xact_end');
debug_waitpoint_release
-------------------------
Expand Down Expand Up @@ -571,10 +575,10 @@ SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 10003;

--during waiting-prepare-transaction the data node process could die before or after
--executing the prepare transaction. To be safe to either case rollback using heal_server.
SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));
bool
------
t
SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));
remote_txn_heal_data_node
---------------------------
0
(1 row)

SELECT count(*) FROM pg_prepared_xacts;
Expand Down Expand Up @@ -800,10 +804,10 @@ SELECT debug_waitpoint_release('remote_conn_xact_end');
--at this point the commit prepared might or might not have been executed before
--the data node process was killed.
--but in any case, healing the server will bring it into a known state
SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));
bool
------
t
SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));
remote_txn_heal_data_node
---------------------------
1
(1 row)

--heal does not use the connection cache, so unaffected
Expand Down Expand Up @@ -910,9 +914,12 @@ SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 81;
ALTER TABLE "S 1"."T 1" DROP CONSTRAINT t1_pkey,
ADD CONSTRAINT t1_pkey PRIMARY KEY ("C 1") DEFERRABLE INITIALLY DEFERRED;
--test ROLLBACK TRANSACTION on failure in PREPARE TRANSACTION.
-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh')
remote_exec
-------------

Expand All @@ -926,8 +933,8 @@ NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-0
(1 row)

COMMIT;
WARNING: transaction rollback on data node "loopback" failed
ERROR: [loopback]: duplicate key value violates unique constraint "t1_pkey"
RESET client_min_messages;
--connection should be removed since PREPARE TRANSACTION failed
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;
Expand All @@ -945,16 +952,18 @@ SELECT count(*) FROM pg_prepared_xacts;
--this has an error on the second connection. So should force conn1 to prepare transaction
--ok and then have the txn fail on conn2. Thus conn1 would do a ROLLBACK PREPARED.
--conn2 would do a ROLLBACK TRANSACTION.
-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10010,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
NOTICE: [loopback]: INSERT INTO "S 1"."T 1" VALUES (10010,1,'bleh', '2001-01-01', '2001-01-01', 'bleh')
remote_exec
-------------

(1 row)

SELECT test.remote_exec('{loopback2}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
NOTICE: [loopback2]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh')
remote_exec
-------------

Expand All @@ -970,8 +979,8 @@ NOTICE: [loopback2]: INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-
(2 rows)

COMMIT;
WARNING: transaction rollback on data node "loopback2" failed
ERROR: [loopback2]: duplicate key value violates unique constraint "t1_pkey"
RESET client_min_messages;
--one connection should remain and be idle
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;
Expand Down
24 changes: 22 additions & 2 deletions tsl/test/sql/remote_txn.sql
Expand Up @@ -136,11 +136,16 @@ FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;

--the next few statements inject faults before the commit. They should all fail
--and be rolled back with no unresolved state
-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT remote_node_killer_set_event('pre-commit', 'loopback');
SELECT debug_waitpoint_enable('remote_conn_xact_end');
SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (20003,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
COMMIT;
RESET client_min_messages;
SELECT debug_waitpoint_release('remote_conn_xact_end');

-- Failed connection should be cleared
Expand All @@ -150,6 +155,10 @@ FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;
SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 20003;
SELECT count(*) FROM pg_prepared_xacts;

-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT remote_node_killer_set_event('waiting-commit', 'loopback');
SELECT debug_waitpoint_enable('remote_conn_xact_end');
Expand All @@ -158,6 +167,7 @@ BEGIN;
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;
COMMIT;
RESET client_min_messages;
SELECT debug_waitpoint_release('remote_conn_xact_end');

--connection failed during commit, so should be cleared from the cache
Expand Down Expand Up @@ -293,7 +303,7 @@ SELECT count(*) FROM "S 1"."T 1" WHERE "C 1" = 10003;

--during waiting-prepare-transaction the data node process could die before or after
--executing the prepare transaction. To be safe to either case rollback using heal_server.
SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));
SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));
SELECT count(*) FROM pg_prepared_xacts;
SELECT count(*) from _timescaledb_catalog.remote_txn;

Expand Down Expand Up @@ -382,7 +392,7 @@ SELECT debug_waitpoint_release('remote_conn_xact_end');
--at this point the commit prepared might or might not have been executed before
--the data node process was killed.
--but in any case, healing the server will bring it into a known state
SELECT true FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));
SELECT * FROM _timescaledb_internal.remote_txn_heal_data_node((SELECT OID FROM pg_foreign_server WHERE srvname = 'loopback'));

--heal does not use the connection cache, so unaffected
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
Expand Down Expand Up @@ -436,11 +446,16 @@ ALTER TABLE "S 1"."T 1" DROP CONSTRAINT t1_pkey,
ADD CONSTRAINT t1_pkey PRIMARY KEY ("C 1") DEFERRABLE INITIALLY DEFERRED;

--test ROLLBACK TRANSACTION on failure in PREPARE TRANSACTION.
-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;
COMMIT;
RESET client_min_messages;

--connection should be removed since PREPARE TRANSACTION failed
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
Expand All @@ -451,6 +466,10 @@ SELECT count(*) FROM pg_prepared_xacts;
--this has an error on the second connection. So should force conn1 to prepare transaction
--ok and then have the txn fail on conn2. Thus conn1 would do a ROLLBACK PREPARED.
--conn2 would do a ROLLBACK TRANSACTION.
-- We set min message level to "error" since order of warnings generated
-- is different due to timing issues but check that the transaction was
-- rolled back after the commit.
SET client_min_messages TO error;
BEGIN;
SELECT test.remote_exec('{loopback}', $$ INSERT INTO "S 1"."T 1" VALUES (10010,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
SELECT test.remote_exec('{loopback2}', $$ INSERT INTO "S 1"."T 1" VALUES (10001,1,'bleh', '2001-01-01', '2001-01-01', 'bleh') $$);
Expand All @@ -459,6 +478,7 @@ BEGIN;
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
FROM _timescaledb_internal.show_connection_cache() ORDER BY 1,4;
COMMIT;
RESET client_min_messages;

--one connection should remain and be idle
SELECT node_name, connection_status, transaction_status, transaction_depth, processing
Expand Down

0 comments on commit 093b1ad

Please sign in to comment.