Skip to content

Commit

Permalink
Limit the size of .well-known lookups.
Browse files Browse the repository at this point in the history
This is a port of matrix-org/synapse#8950 and matrix-org/synapse#9108.

This is not a straight port, the code is simplified a bit:

* Type hints are removed since sydent supports Python 2.7.
* The size of the response is not returned, only the bytes.
  • Loading branch information
clokep committed Mar 17, 2021
1 parent 4d96e71 commit 0523511
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 2 deletions.
101 changes: 101 additions & 0 deletions sydent/http/httpcommon.py
Expand Up @@ -15,8 +15,14 @@
# limitations under the License.

import logging
from io import BytesIO

import twisted.internet.ssl
from twisted.internet import defer, protocol
from twisted.internet.protocol import connectionDone
from twisted.web._newclient import ResponseDone
from twisted.web.http import PotentialDataLoss
from twisted.web.iweb import UNKNOWN_LENGTH

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,3 +68,98 @@ def makeTrustRoot(self):
return twisted.internet._sslverify.OpenSSLCertificateAuthorities([caCert.original])
else:
return twisted.internet.ssl.OpenSSLDefaultPaths()



class BodyExceededMaxSize(Exception):
"""The maximum allowed size of the HTTP body was exceeded."""


class _DiscardBodyWithMaxSizeProtocol(protocol.Protocol):
"""A protocol which immediately errors upon receiving data."""

def __init__(self, deferred):
self.deferred = deferred

def _maybe_fail(self):
"""
Report a max size exceed error and disconnect the first time this is called.
"""
if not self.deferred.called:
self.deferred.errback(BodyExceededMaxSize())
# Close the connection (forcefully) since all the data will get
# discarded anyway.
self.transport.abortConnection()

def dataReceived(self, data) -> None:
self._maybe_fail()

def connectionLost(self, reason) -> None:
self._maybe_fail()


class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
"""A protocol which reads body to a stream, erroring if the body exceeds a maximum size."""

def __init__(self, deferred, max_size):
self.stream = BytesIO()
self.deferred = deferred
self.length = 0
self.max_size = max_size

def dataReceived(self, data) -> None:
# If the deferred was called, bail early.
if self.deferred.called:
return

self.stream.write(data)
self.length += len(data)
# The first time the maximum size is exceeded, error and cancel the
# connection. dataReceived might be called again if data was received
# in the meantime.
if self.max_size is not None and self.length >= self.max_size:
self.deferred.errback(BodyExceededMaxSize())
# Close the connection (forcefully) since all the data will get
# discarded anyway.
self.transport.abortConnection()

def connectionLost(self, reason = connectionDone) -> None:
# If the maximum size was already exceeded, there's nothing to do.
if self.deferred.called:
return

if reason.check(ResponseDone):
self.deferred.callback(self.stream.getvalue())
elif reason.check(PotentialDataLoss):
# stolen from https://github.com/twisted/treq/pull/49/files
# http://twistedmatrix.com/trac/ticket/4840
self.deferred.callback(self.stream.getvalue())
else:
self.deferred.errback(reason)


def read_body_with_max_size(response, max_size):
"""
Read a HTTP response body to a file-object. Optionally enforcing a maximum file size.
If the maximum file size is reached, the returned Deferred will resolve to a
Failure with a BodyExceededMaxSize exception.
Args:
response: The HTTP response to read from.
max_size: The maximum file size to allow.
Returns:
A Deferred which resolves to the read body.
"""
d = defer.Deferred()

# If the Content-Length header gives a size larger than the maximum allowed
# size, do not bother downloading the body.
if max_size is not None and response.length != UNKNOWN_LENGTH:
if response.length > max_size:
response.deliverBody(_DiscardBodyWithMaxSizeProtocol(d))
return d

response.deliverBody(_ReadBodyWithMaxSizeProtocol(d, max_size))
return d
9 changes: 7 additions & 2 deletions sydent/http/matrixfederationagent.py
Expand Up @@ -26,11 +26,12 @@
from twisted.internet import defer
from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
from twisted.internet.interfaces import IStreamClientEndpoint
from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent, readBody
from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent
from twisted.web.http import stringToDatetime
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent

from sydent.http.httpcommon import BodyExceededMaxSize, read_body_with_max_size
from sydent.http.srvresolver import SrvResolver, pick_server_from_list
from sydent.util.ttlcache import TTLCache

Expand All @@ -46,6 +47,9 @@
# cap for .well-known cache period
WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600

# The maximum size (in bytes) to allow a well-known file to be.
WELL_KNOWN_MAX_SIZE = 50 * 1024 # 50 KiB

logger = logging.getLogger(__name__)
well_known_cache = TTLCache('well-known')

Expand Down Expand Up @@ -316,7 +320,7 @@ def _do_get_well_known(self, server_name):
logger.info("Fetching %s", uri_str)
try:
response = yield self._well_known_agent.request(b"GET", uri)
body = yield readBody(response)
body = yield read_body_with_max_size(response, WELL_KNOWN_MAX_SIZE)
if response.code != 200:
raise Exception("Non-200 response %s" % (response.code, ))

Expand All @@ -334,6 +338,7 @@ def _do_get_well_known(self, server_name):
cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD
cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER)
defer.returnValue((None, cache_period))
return

result = parsed_body["m.server"].encode("ascii")

Expand Down

0 comments on commit 0523511

Please sign in to comment.