Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Properly update retry_last_ts when hitting the maximum retry interval #16156

Merged
merged 4 commits into from Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/16156.bugfix
@@ -0,0 +1 @@
Fix a bug introduced in 1.87 where synapse would send an excessive amount of federation requests to servers which have been offline for a long time. Contributed by Nico.
deepbluev7 marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 3 additions & 1 deletion synapse/storage/databases/main/transactions.py
Expand Up @@ -242,6 +242,8 @@ def _set_destination_retry_timings_txn(
) -> None:
# Upsert retry time interval if retry_interval is zero (i.e. we're
# resetting it) or greater than the existing retry interval.
# We also upsert when the new retry interval is the same as the existing one,
# since it will be the case when `destination_max_retry_interval` is reached.
#
# WARNING: This is executed in autocommit, so we shouldn't add any more
# SQL calls in here (without being very careful).
Expand All @@ -257,7 +259,7 @@ def _set_destination_retry_timings_txn(
WHERE
EXCLUDED.retry_interval = 0
OR destinations.retry_interval IS NULL
OR destinations.retry_interval < EXCLUDED.retry_interval
OR destinations.retry_interval <= EXCLUDED.retry_interval
"""

txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
Expand Down
51 changes: 51 additions & 0 deletions tests/util/test_retryutils.py
Expand Up @@ -108,3 +108,54 @@ def test_limiter(self) -> None:

new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
self.assertIsNone(new_timings)

def test_max_retry_interval(self) -> None:
"""Test that `destination_max_retry_interval` setting works as expected"""
store = self.hs.get_datastores().main

destination_max_retry_interval_ms = (
self.hs.config.federation.destination_max_retry_interval_ms
)

self.get_success(get_retry_limiter("test_dest", self.clock, store))
self.pump(1)

failure_ts = self.clock.time_msec()

# Simulate reaching destination_max_retry_interval
self.get_success(
store.set_destination_retry_timings(
"test_dest",
failure_ts=failure_ts,
retry_last_ts=failure_ts,
retry_interval=destination_max_retry_interval_ms,
)
)

# Check it fails
self.get_failure(
get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination
)

# Get past retry_interval and we can try again, and still throw an error to continue the backoff
self.reactor.advance(destination_max_retry_interval_ms / 1000 + 1)
limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
self.pump(1)
try:
with limiter:
self.pump(1)
raise AssertionError("argh")
except AssertionError:
pass

self.pump()

# retry_interval does not increase and stays at destination_max_retry_interval_ms
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
assert new_timings is not None
self.assertEqual(new_timings.retry_interval, destination_max_retry_interval_ms)

# Check it fails
self.get_failure(
get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination
)