Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Merge pull request #3875 from matrix-org/erikj/extra_timeouts
Browse files Browse the repository at this point in the history
Add an awful secondary timeout to fix wedged requests
  • Loading branch information
erikjohnston committed Sep 14, 2018
2 parents 024be6c + 335b23a commit c30cfff
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 0 deletions.
1 change: 1 addition & 0 deletions changelog.d/3875.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Mitigate outbound federation randomly becoming wedged
11 changes: 11 additions & 0 deletions synapse/http/matrixfederationclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
)
from synapse.http.endpoint import matrix_federation_endpoint
from synapse.util import logcontext
from synapse.util.async_helpers import timeout_no_seriously
from synapse.util.logcontext import make_deferred_yieldable
from synapse.util.metrics import Measure

Expand Down Expand Up @@ -228,6 +229,16 @@ def _request(self, destination, method, path,
)
request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor())

# Sometimes the timeout above doesn't work, so lets hack yet
# another layer of timeouts in in the vain hope that at some
# point the world made sense and this really really really
# should work.
request_deferred = timeout_no_seriously(
request_deferred,
timeout=_sec_timeout * 2,
reactor=self.hs.get_reactor(),
)

with Measure(self.clock, "outbound_request"):
response = yield make_deferred_yieldable(
request_deferred,
Expand Down
51 changes: 51 additions & 0 deletions synapse/util/async_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,54 @@ def _cancelled_to_timed_out_error(value, timeout):
value.trap(CancelledError)
raise DeferredTimeoutError(timeout, "Deferred")
return value


def timeout_no_seriously(deferred, timeout, reactor):
"""The in build twisted deferred addTimeout (and the method above)
completely fail to time things out under some unknown circumstances.
Lets try a different way of timing things out and maybe that will make
things work?!
TODO: Kill this with fire.
"""

new_d = defer.Deferred()

timed_out = [False]

def time_it_out():
timed_out[0] = True
deferred.cancel()

if not new_d.called:
new_d.errback(DeferredTimeoutError(timeout, "Deferred"))

delayed_call = reactor.callLater(timeout, time_it_out)

def convert_cancelled(value):
if timed_out[0]:
return _cancelled_to_timed_out_error(value, timeout)
return value

deferred.addBoth(convert_cancelled)

def cancel_timeout(result):
# stop the pending call to cancel the deferred if it's been fired
if delayed_call.active():
delayed_call.cancel()
return result

deferred.addBoth(cancel_timeout)

def success_cb(val):
if not new_d.called:
new_d.callback(val)

def failure_cb(val):
if not new_d.called:
new_d.errback(val)

deferred.addCallbacks(success_cb, failure_cb)

return new_d

0 comments on commit c30cfff

Please sign in to comment.