From d29af94a0d637ff77b62055757ef07a38772cddb Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Wed, 4 Mar 2026 11:53:27 +0500 Subject: [PATCH 1/6] Replace wal_sender_timeout-based liveness with TCP keepalive. The apply worker previously relied on wal_sender_timeout as both a server-side disconnect trigger and an indirect keepalive pressure on the subscriber. This caused spurious disconnects in two scenarios: a flood of 'w' messages keeping the subscriber too busy to send 'r' feedback in time, and large transactions whose apply time exceeded wal_sender_timeout. The workaround was maybe_send_feedback(), which force-sent 'r' after every 10 'w' messages or wal_sender_timeout/2, whichever came first. This was a fragile band-aid that coupled subscriber behavior to a server GUC it cannot control. Replace the entire mechanism with a clean two-layer model: - TCP keepalive (keepalives_idle=10, keepalives_interval=5, keepalives_count=3) is the primary liveness detector on both sides. A dead network or crashed host is detected in ~25 seconds. - wal_sender_timeout=0 is set on replication connections so the walsender never disconnects due to missing 'r' feedback. Liveness on the server side is now handled entirely by TCP keepalive. - spock.apply_idle_timeout (default 300s) is a subscriber-side safety net for a hung-but-connected walsender whose TCP keepalive probes are answered by the kernel but sends no data. Set to 0 to disable. Fix a bug in last_receive_timestamp handling: it was updated unconditionally after every PQgetCopyData call, including when r==0 (no data available). Each 1-second WL_TIMEOUT spin silently reset the timer, making apply_idle_timeout never fire. Move the update to after the r==0 guard so it reflects actual data receipt only. Remove maybe_send_feedback() as it is no longer needed. --- include/spock.h | 1 + src/spock.c | 40 ++++++++++++++++++++++--- src/spock_apply.c | 76 +++++++++++------------------------------------ 3 files changed, 55 insertions(+), 62 deletions(-) diff --git a/include/spock.h b/include/spock.h index 891d616b..61383625 100644 --- a/include/spock.h +++ b/include/spock.h @@ -55,6 +55,7 @@ extern int spock_feedback_frequency; extern bool check_all_uc_indexes; extern bool spock_enable_quiet_mode; extern int log_origin_change; +extern int spock_apply_idle_timeout; extern char *shorten_hash(const char *str, int maxlen); extern void gen_slot_name(Name slot_name, char *dbname, diff --git a/src/spock.c b/src/spock.c index f23c9f94..e9bb12a8 100644 --- a/src/spock.c +++ b/src/spock.c @@ -148,6 +148,7 @@ int spock_feedback_frequency; bool check_all_uc_indexes = false; bool spock_enable_quiet_mode = false; int log_origin_change = SPOCK_ORIGIN_NONE; +int spock_apply_idle_timeout = 300; static emit_log_hook_type prev_emit_log_hook = NULL; static Checkpoint_hook_type prev_Checkpoint_hook = NULL; @@ -317,7 +318,7 @@ get_spock_table_oid(const char *table) return reloid; } -#define CONN_PARAM_ARRAY_SIZE 9 +#define CONN_PARAM_ARRAY_SIZE 10 static PGconn * spock_connect_base(const char *connstr, const char *appname, @@ -357,17 +358,32 @@ spock_connect_base(const char *connstr, const char *appname, vals[i] = "1"; i++; keys[i] = "keepalives_idle"; - vals[i] = "20"; + vals[i] = "10"; i++; keys[i] = "keepalives_interval"; - vals[i] = "20"; + vals[i] = "5"; i++; keys[i] = "keepalives_count"; - vals[i] = "5"; + vals[i] = "3"; i++; keys[i] = "replication"; vals[i] = replication ? "database" : NULL; i++; + /* + * For replication connections, disable the server-side walsender timeout. + * Liveness detection is handled by TCP keepalives (keepalives_idle / + * keepalives_interval / keepalives_count above) on both sides, and by + * spock.apply_idle_timeout on the subscriber side as a safety net for a + * hung-but-connected walsender. Leaving wal_sender_timeout active would + * cause spurious disconnects whenever the subscriber is legitimately busy + * applying a large transaction and cannot send 'r' feedback in time. + */ + if (replication) + { + keys[i] = "options"; + vals[i] = "-c wal_sender_timeout=0"; + i++; + } keys[i] = NULL; vals[i] = NULL; @@ -1223,6 +1239,22 @@ _PG_init(void) PGC_SUSET, 0, NULL, NULL, NULL); + DefineCustomIntVariable("spock.apply_idle_timeout", + "Maximum idle time in seconds before apply worker reconnects", + "Safety net for detecting a hung walsender that keeps the " + "TCP connection alive but stops sending data. The timer " + "resets on any received message. Set to 0 to disable and " + "rely solely on TCP keepalive for liveness detection.", + &spock_apply_idle_timeout, + 300, + 0, + INT_MAX, + PGC_SIGHUP, + GUC_UNIT_S, + NULL, + NULL, + NULL); + if (IsBinaryUpgrade) return; diff --git a/src/spock_apply.c b/src/spock_apply.c index 17e5bc99..4d22b48a 100644 --- a/src/spock_apply.c +++ b/src/spock_apply.c @@ -229,8 +229,6 @@ static bool should_log_exception(bool failed); static ApplyReplayEntry * apply_replay_entry_create(int r, char *buf); static void apply_replay_entry_free(ApplyReplayEntry * entry); static void apply_replay_queue_reset(void); -static void maybe_send_feedback(PGconn *applyconn, XLogRecPtr lsn_to_send, - TimestampTz *last_receive_timestamp); static void append_feedback_position(XLogRecPtr recvpos); static void get_feedback_position(XLogRecPtr *recvpos, XLogRecPtr *writepos, XLogRecPtr *flushpos, XLogRecPtr *max_recvpos); @@ -2967,25 +2965,24 @@ apply_work(PGconn *streamConn) } /* - * The walsender is supposed to ping us for a status update every - * wal_sender_timeout / 2 milliseconds. If we don't get those, we - * assume that we have lost the connection. - * - * Note: keepalive configuration is supposed to cover this but is - * apparently unreliable. + * Connection liveness is handled by TCP keepalive (primary) + * and PQstatus == CONNECTION_BAD (above). The idle timeout + * below is a safety net for the case where the walsender + * process is alive but hung -- TCP probes succeed because the + * kernel ACKs them, but no data is being sent. */ - if (rc & WL_TIMEOUT) + if (rc & WL_TIMEOUT && spock_apply_idle_timeout > 0) { TimestampTz timeout; timeout = TimestampTzPlusMilliseconds(last_receive_timestamp, - (wal_sender_timeout * 3) / 2); + (long) spock_apply_idle_timeout * 1000L); if (GetCurrentTimestamp() > timeout) { MySpockWorker->worker_status = SPOCK_WORKER_STATUS_STOPPED; - elog(ERROR, "SPOCK %s: terminating apply due to missing " - "walsender ping", - MySubscription->name); + elog(ERROR, "SPOCK %s: no data received for %d seconds, " + "reconnecting (spock.apply_idle_timeout)", + MySubscription->name, spock_apply_idle_timeout); } } @@ -3049,8 +3046,6 @@ apply_work(PGconn *streamConn) /* We are not in replay mode so receive from the stream */ r = PQgetCopyData(applyconn, &buf, 1); - last_receive_timestamp = GetCurrentTimestamp(); - /* Check for errors */ if (r == -1) { @@ -3082,6 +3077,14 @@ apply_work(PGconn *streamConn) break; } + /* + * We received actual data. Update the idle-timeout clock + * only here, after confirming r > 0, so that a WL_TIMEOUT + * spin with no incoming data does not silently reset the + * timer and mask a hung walsender. + */ + last_receive_timestamp = GetCurrentTimestamp(); + /* * We have a valid message, create an apply queue entry * but don't add it to the queue yet. @@ -3111,16 +3114,6 @@ apply_work(PGconn *streamConn) end_lsn = pq_getmsgint64(msg); pq_getmsgint64(msg); /* sendTime */ - /* - * Call maybe_send_feedback before last_received is - * updated. This ordering guarantees that feedback LSN - * never advertises a position beyond what has actually - * been received and processed. Prevents skipping over - * unapplied changes due to premature flush LSN. - */ - maybe_send_feedback(applyconn, last_received, - &last_receive_timestamp); - if (last_received < start_lsn) last_received = start_lsn; @@ -4106,39 +4099,6 @@ apply_replay_queue_reset(void) MemoryContextReset(ApplyReplayContext); } -/* - * Check if we should send feedback based on message count or timeout. - * Resets internal state if feedback is sent. - */ -static void -maybe_send_feedback(PGconn *applyconn, XLogRecPtr lsn_to_send, - TimestampTz *last_receive_timestamp) -{ - static int w_message_count = 0; - TimestampTz now = GetCurrentTimestamp(); - - w_message_count++; - - /* - * Send feedback if wal_sender_timeout/2 has passed or after - * spock.feedback_frequency 'w' messages. - */ - if (TimestampDifferenceExceeds(*last_receive_timestamp, now, wal_sender_timeout / 2) || - w_message_count >= spock_feedback_frequency) - { - elog(DEBUG2, "SPOCK %s: force sending feedback after %d 'w' messages or timeout", - MySubscription->name, w_message_count); - - /* - * We need to send feedback to the walsender process to avoid remote - * wal_sender_timeout. - */ - send_feedback(applyconn, lsn_to_send, now, true); - *last_receive_timestamp = now; - w_message_count = 0; - } -} - /* * Advance the replication origin for forwarded transactions. * From de48404be293f1cffb8058cab8ff0fefa7fc3453 Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Tue, 31 Mar 2026 11:57:50 -0700 Subject: [PATCH 2/6] Remove feedback_frequency GUC, not used --- docs/configuring.md | 9 --------- include/spock.h | 1 - src/spock.c | 17 ----------------- 3 files changed, 27 deletions(-) diff --git a/docs/configuring.md b/docs/configuring.md index 7021ba88..5c8579fb 100644 --- a/docs/configuring.md +++ b/docs/configuring.md @@ -191,15 +191,6 @@ keepalive options, etc. the upstream server disappears unexpectedly. To disable them add `keepalives = 0` to `spock.extra_connection_options`. -#### `spock.feedback_frequency` - -Controls how many WAL messages the apply worker processes before sending -an LSN feedback packet to the provider. Lower values increase feedback -overhead due to synchronous socket flushes; higher values reduce overhead -during bulk catch-up. There is a time-based guard (wal_sender_timeout / 2) -that ensures connection liveness regardless of this setting. The default -is 200. - ### `spock.include_ddl_repset` `spock.include_ddl_repset` enables spock to automatically add tables to diff --git a/include/spock.h b/include/spock.h index 61383625..291b0975 100644 --- a/include/spock.h +++ b/include/spock.h @@ -51,7 +51,6 @@ extern bool allow_ddl_from_functions; extern int restart_delay_default; extern int restart_delay_on_exception; extern int spock_replay_queue_size; /* Deprecated - no longer used */ -extern int spock_feedback_frequency; extern bool check_all_uc_indexes; extern bool spock_enable_quiet_mode; extern int log_origin_change; diff --git a/src/spock.c b/src/spock.c index e9bb12a8..d30a3dcb 100644 --- a/src/spock.c +++ b/src/spock.c @@ -144,7 +144,6 @@ bool allow_ddl_from_functions = false; int restart_delay_default; int restart_delay_on_exception; int spock_replay_queue_size; /* Deprecated - no longer used */ -int spock_feedback_frequency; bool check_all_uc_indexes = false; bool spock_enable_quiet_mode = false; int log_origin_change = SPOCK_ORIGIN_NONE; @@ -1183,22 +1182,6 @@ _PG_init(void) NULL, NULL); - DefineCustomIntVariable("spock.feedback_frequency", - "Number of WAL messages between feedback to provider", - "Controls how often the apply worker sends LSN feedback " - "to the provider during replication. Lower values increase " - "feedback overhead; the time-based guard (wal_sender_timeout/2) " - "ensures liveness regardless of this setting.", - &spock_feedback_frequency, - 200, - 1, - INT_MAX, - PGC_SIGHUP, - 0, - NULL, - NULL, - NULL); - DefineCustomEnumVariable("spock.readonly", gettext_noop("Controls cluster read-only mode."), NULL, From c0d4423eac1ea8297df431ca3b5f67d313a375c1 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Thu, 2 Apr 2026 21:17:05 +0500 Subject: [PATCH 3/6] Remove wal_sender_timeout=0 from code. Also document conservative wal_sender_timeout and apply_idle_timeout settings. --- docs/configuring.md | 28 ++++++++++++++++++++++++++++ src/spock.c | 15 --------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/docs/configuring.md b/docs/configuring.md index 5c8579fb..de7240a3 100644 --- a/docs/configuring.md +++ b/docs/configuring.md @@ -191,6 +191,34 @@ keepalive options, etc. the upstream server disappears unexpectedly. To disable them add `keepalives = 0` to `spock.extra_connection_options`. +### `wal_sender_timeout` + +For Spock replication, set `wal_sender_timeout` to a conservative value such +as `5min` (300000ms) on each node in `postgresql.conf`: + +``` +wal_sender_timeout = '5min' +``` + +The default PostgreSQL value of `60s` can cause spurious disconnects when +the subscriber is busy applying a large transaction and cannot send feedback +in time. A higher value gives the apply worker enough headroom while still +detecting truly dead connections. Liveness detection is primarily handled by +TCP keepalives, and `spock.apply_idle_timeout` provides an additional +subscriber-side safety net. + +### `spock.apply_idle_timeout` + +Maximum idle time (in seconds) before the apply worker reconnects to the +provider. This acts as a safety net for detecting a hung walsender that keeps +the TCP connection alive but stops sending data. The timer resets on any +received message. Set to `0` to disable and rely solely on TCP keepalive for +liveness detection. Default: `300` (5 minutes). + +``` +spock.apply_idle_timeout = 300 +``` + ### `spock.include_ddl_repset` `spock.include_ddl_repset` enables spock to automatically add tables to diff --git a/src/spock.c b/src/spock.c index d30a3dcb..8e58d494 100644 --- a/src/spock.c +++ b/src/spock.c @@ -368,21 +368,6 @@ spock_connect_base(const char *connstr, const char *appname, keys[i] = "replication"; vals[i] = replication ? "database" : NULL; i++; - /* - * For replication connections, disable the server-side walsender timeout. - * Liveness detection is handled by TCP keepalives (keepalives_idle / - * keepalives_interval / keepalives_count above) on both sides, and by - * spock.apply_idle_timeout on the subscriber side as a safety net for a - * hung-but-connected walsender. Leaving wal_sender_timeout active would - * cause spurious disconnects whenever the subscriber is legitimately busy - * applying a large transaction and cannot send 'r' feedback in time. - */ - if (replication) - { - keys[i] = "options"; - vals[i] = "-c wal_sender_timeout=0"; - i++; - } keys[i] = NULL; vals[i] = NULL; From 20eaf46896a761da0062eefafdb1b79157ea1cf8 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Fri, 3 Apr 2026 11:50:18 +0500 Subject: [PATCH 4/6] Quote paths in Makefile to handle spaces in directory names. --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index c0b0f9aa..56bc9b3d 100644 --- a/Makefile +++ b/Makefile @@ -19,8 +19,8 @@ SRCS := $(wildcard src/*.c) \ OBJS = $(filter-out src/spock_output.o, $(SRCS:.c=.o)) PG_CPPFLAGS += -I$(libpq_srcdir) \ - -I$(realpath include) \ - -I$(realpath src/compat/$(PGVER)) \ + '-I$(realpath include)' \ + '-I$(realpath src/compat/$(PGVER))' \ -Werror=implicit-function-declaration SHLIB_LINK += $(libpq) $(filter -lintl, $(LIBS)) @@ -38,11 +38,11 @@ include $(PGXS) spock_output.o: src/spock_output.c $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< -spock_version=$(shell grep "^\#define \" $(realpath include/spock.h) | cut -d'"' -f2) +spock_version=$(shell grep "^\#define \" "$(realpath include/spock.h)" | cut -d'"' -f2) requires = control_path = $(abspath $(srcdir))/spock.control spock.control: spock.control.in include/spock.h - sed 's/__SPOCK_VERSION__/$(spock_version)/;s/__REQUIRES__/$(requires)/' $(realpath $(srcdir)/spock.control.in) > $(control_path) + sed 's/__SPOCK_VERSION__/$(spock_version)/;s/__REQUIRES__/$(requires)/' "$(realpath $(srcdir)/spock.control.in)" > "$(control_path)" all: spock.control From 894ac52bd12f650a1441a2dea7ec6875e515577c Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Fri, 3 Apr 2026 09:48:01 -0700 Subject: [PATCH 5/6] Remove no longer used maybe_send_feedback() --- src/spock_apply.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/spock_apply.c b/src/spock_apply.c index 4d22b48a..2ca69640 100644 --- a/src/spock_apply.c +++ b/src/spock_apply.c @@ -3012,13 +3012,6 @@ apply_work(PGconn *streamConn) */ if (spock_readonly == READONLY_ALL) { - /* - * Send feedback to keep walsender alive - we may avoid it - * with introduction of TCP keepalive approach. - */ - maybe_send_feedback(applyconn, last_received, - &last_receive_timestamp); - /* * In case of an exception we can't break out of the loop * because exception processing code may also modify the From 518a9d8ecb9dada515ab668bc0d1a3ab68ef7afe Mon Sep 17 00:00:00 2001 From: Mason Sharp Date: Fri, 3 Apr 2026 09:48:35 -0700 Subject: [PATCH 6/6] Adjust recently added quoting --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 56bc9b3d..e8e5600e 100644 --- a/Makefile +++ b/Makefile @@ -19,8 +19,8 @@ SRCS := $(wildcard src/*.c) \ OBJS = $(filter-out src/spock_output.o, $(SRCS:.c=.o)) PG_CPPFLAGS += -I$(libpq_srcdir) \ - '-I$(realpath include)' \ - '-I$(realpath src/compat/$(PGVER))' \ + -I"$(realpath include)" \ + -I"$(realpath src/compat/$(PGVER))" \ -Werror=implicit-function-declaration SHLIB_LINK += $(libpq) $(filter -lintl, $(LIBS))