Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Reduce the memory usage of previewing media files #9421

Merged
merged 5 commits into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/9421.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reduce the amount of memory used when generating the URL preview of a file that is larger than the `max_spider_size`.
26 changes: 12 additions & 14 deletions synapse/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
)
from twisted.web.http import PotentialDataLoss
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent, IBodyProducer, IResponse
from twisted.web.iweb import UNKNOWN_LENGTH, IAgent, IBodyProducer, IResponse

from synapse.api.errors import Codes, HttpResponseException, SynapseError
from synapse.http import QuieterFileBodyProducer, RequestTimedOutError, redact_uri
Expand Down Expand Up @@ -407,6 +407,9 @@ async def request(
agent=self.agent,
data=body_producer,
headers=headers,
# Avoid buffering the body in treq since we do not reuse
# response bodies.
unbuffered=True,
**self._extra_treq_args,
) # type: defer.Deferred

Expand Down Expand Up @@ -699,18 +702,6 @@ async def get_file(

resp_headers = dict(response.headers.getAllRawHeaders())

if (
b"Content-Length" in resp_headers
and max_size
and int(resp_headers[b"Content-Length"][0]) > max_size
):
logger.warning("Requested URL is too large > %r bytes" % (max_size,))
raise SynapseError(
502,
"Requested file is too large > %r bytes" % (max_size,),
Codes.TOO_LARGE,
)

if response.code > 299:
logger.warning("Got %d when downloading %s" % (response.code, url))
raise SynapseError(502, "Got error %d" % (response.code,), Codes.UNKNOWN)
Expand Down Expand Up @@ -777,7 +768,9 @@ def dataReceived(self, data: bytes) -> None:
# in the meantime.
if self.max_size is not None and self.length >= self.max_size:
self.deferred.errback(BodyExceededMaxSize())
self.transport.loseConnection()
# Close the connection (forcefully) since all the data will get
# discarded anyway.
self.transport.abortConnection()

def connectionLost(self, reason: Failure) -> None:
# If the maximum size was already exceeded, there's nothing to do.
Expand Down Expand Up @@ -811,6 +804,11 @@ def read_body_with_max_size(
Returns:
A Deferred which resolves to the length of the read body.
"""
# If the Content-Length header gives a size larger than the maximum allowed
# size, do not bother downloading the body.
if max_size is not None and response.length != UNKNOWN_LENGTH:
if response.length > max_size:
return defer.fail(BodyExceededMaxSize())

d = defer.Deferred()
response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size))
Expand Down
9 changes: 5 additions & 4 deletions tests/http/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from twisted.python.failure import Failure
from twisted.web.client import ResponseDone
from twisted.web.iweb import UNKNOWN_LENGTH

from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size

Expand All @@ -27,12 +28,12 @@
class ReadBodyWithMaxSizeTests(TestCase):
def setUp(self):
"""Start reading the body, returns the response, result and proto"""
self.response = Mock()
response = Mock(length=UNKNOWN_LENGTH)
self.result = BytesIO()
self.deferred = read_body_with_max_size(self.response, self.result, 6)
self.deferred = read_body_with_max_size(response, self.result, 6)

# Fish the protocol out of the response.
self.protocol = self.response.deliverBody.call_args[0][0]
self.protocol = response.deliverBody.call_args[0][0]
self.protocol.transport = Mock()

def _cleanup_error(self):
Expand Down Expand Up @@ -88,7 +89,7 @@ def test_additional_data(self):
self.protocol.dataReceived(b"1234567890")
self.assertIsInstance(self.deferred.result, Failure)
self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize)
self.protocol.transport.loseConnection.assert_called_once()
self.protocol.transport.abortConnection.assert_called_once()

# More data might have come in.
self.protocol.dataReceived(b"1234567890")
Expand Down