Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Avoid deep recursion in appservice recovery (#5885)
Browse files Browse the repository at this point in the history
Hopefully, this will fix a stack overflow when recovering an appservice.

The recursion here leads to a huge chain of deferred callbacks, which then
overflows the stack when the chain completes. `inlineCallbacks` makes a better
job of this if we use iteration instead.

Clean up the code a bit too, while we're there.
  • Loading branch information
richvdh committed Aug 20, 2019
1 parent c886f97 commit baa3f4a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 18 deletions.
1 change: 1 addition & 0 deletions changelog.d/5885.bugfix
@@ -0,0 +1 @@
Fix stack overflow when recovering an appservice which had an outage.
43 changes: 25 additions & 18 deletions synapse/appservice/scheduler.py
Expand Up @@ -224,7 +224,9 @@ def _retry():
"as-recoverer-%s" % (self.service.id,), self.retry
)

self.clock.call_later((2 ** self.backoff_counter), _retry)
delay = 2 ** self.backoff_counter
logger.info("Scheduling retries on %s in %fs", self.service.id, delay)
self.clock.call_later(delay, _retry)

def _backoff(self):
# cap the backoff to be around 8.5min => (2^9) = 512 secs
Expand All @@ -234,25 +236,30 @@ def _backoff(self):

@defer.inlineCallbacks
def retry(self):
logger.info("Starting retries on %s", self.service.id)
try:
txn = yield self.store.get_oldest_unsent_txn(self.service)
if txn:
while True:
txn = yield self.store.get_oldest_unsent_txn(self.service)
if not txn:
# nothing left: we're done!
self.callback(self)
return

logger.info(
"Retrying transaction %s for AS ID %s", txn.id, txn.service.id
)
sent = yield txn.send(self.as_api)
if sent:
yield txn.complete(self.store)
# reset the backoff counter and retry immediately
self.backoff_counter = 1
yield self.retry()
else:
self._backoff()
else:
self._set_service_recovered()
except Exception as e:
logger.exception(e)
self._backoff()

def _set_service_recovered(self):
self.callback(self)
if not sent:
break

yield txn.complete(self.store)

# reset the backoff counter and then process the next transaction
self.backoff_counter = 1

except Exception:
logger.exception("Unexpected error running retries")

# we didn't manage to send all of the transactions before we got an error of
# some flavour: reschedule the next retry.
self._backoff()

0 comments on commit baa3f4a

Please sign in to comment.