From 9c1f69fc7d22c8f8f4967da9af68538c2be9ea47 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 2 Sep 2020 15:38:19 -0400 Subject: [PATCH 01/10] Add test gif. --- tests/rest/media/v1/test_media_storage.py | 36 ++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index f4f3e5677791..68ddbc49a8f6 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -120,6 +120,11 @@ class _TestImage: extension = attr.ib(type=bytes) expected_cropped = attr.ib(type=Optional[bytes]) expected_scaled = attr.ib(type=Optional[bytes]) + expected_found = attr.ib(default=True, type=bool) + + +with open("test.gif", "rb") as f: + data = f.read() @parameterized_class( @@ -161,6 +166,8 @@ class _TestImage: None, ), ), + # an empty file + (_TestImage(b"", b"image/gif", b".gif", None, None, False,),), ], ) class MediaRepoTests(unittest.HomeserverTestCase): @@ -303,12 +310,16 @@ def test_disposition_none(self): self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None) def test_thumbnail_crop(self): - self._test_thumbnail("crop", self.test_image.expected_cropped) + self._test_thumbnail( + "crop", self.test_image.expected_cropped, self.test_image.expected_found + ) def test_thumbnail_scale(self): - self._test_thumbnail("scale", self.test_image.expected_scaled) + self._test_thumbnail( + "scale", self.test_image.expected_scaled, self.test_image.expected_found + ) - def _test_thumbnail(self, method, expected_body): + def _test_thumbnail(self, method, expected_body, expected_found): params = "?width=32&height=32&method=" + method request, channel = self.make_request( "GET", self.media_id + params, shorthand=False @@ -325,11 +336,16 @@ def _test_thumbnail(self, method, expected_body): ) self.pump() - self.assertEqual(channel.code, 200) - if expected_body is not None: - self.assertEqual( - channel.result["body"], expected_body, channel.result["body"] - ) + if expected_found: + self.assertEqual(channel.code, 200) + if expected_body is not None: + self.assertEqual( + channel.result["body"], expected_body, channel.result["body"] + ) + else: + # ensure that the result is at least some valid image + Image.open(BytesIO(channel.result["body"])) else: - # ensure that the result is at least some valid image - Image.open(BytesIO(channel.result["body"])) + # A 404 with no body. + self.assertEqual(channel.code, 404) + self.assertEqual(channel.result["body"], "") From d7d34548b0235ea1cb87a62eceaaa749a8921e32 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 2 Sep 2020 15:59:38 -0400 Subject: [PATCH 02/10] Do not 500 when generating thumbnails of empty files. --- synapse/rest/media/v1/media_repository.py | 66 ++++++++++++++++++++--- synapse/rest/media/v1/thumbnailer.py | 14 ++++- tests/rest/media/v1/test_media_storage.py | 1 - 3 files changed, 70 insertions(+), 11 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6fb4039e9877..c453648f4801 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -53,7 +53,7 @@ from .preview_url_resource import PreviewUrlResource from .storage_provider import StorageProviderWrapper from .thumbnail_resource import ThumbnailResource -from .thumbnailer import Thumbnailer +from .thumbnailer import Thumbnailer, ThumbnailError from .upload_resource import UploadResource logger = logging.getLogger(__name__) @@ -460,13 +460,29 @@ def _generate_thumbnail(self, thumbnailer, t_width, t_height, t_method, t_type): return t_byte_source async def generate_local_exact_thumbnail( - self, media_id, t_width, t_height, t_method, t_type, url_cache - ): + self, + media_id: str, + t_width: int, + t_height: int, + t_method: str, + t_type: str, + url_cache: str, + ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( FileInfo(None, media_id, url_cache=url_cache) ) - thumbnailer = Thumbnailer(input_path) + try: + thumbnailer = Thumbnailer(input_path) + except ThumbnailError: + logger.warning( + "Unable to generate a thumbnail for local media %s using a method of %s and type of %s", + media_id, + t_method, + t_type, + ) + return None + t_byte_source = await defer_to_thread( self.hs.get_reactor(), self._generate_thumbnail, @@ -506,14 +522,35 @@ async def generate_local_exact_thumbnail( return output_path + # Source not found, cannot return a thumbnail. + return None + async def generate_remote_exact_thumbnail( - self, server_name, file_id, media_id, t_width, t_height, t_method, t_type - ): + self, + server_name: str, + file_id: str, + media_id: str, + t_width: int, + t_height: int, + t_method: str, + t_type: str, + ) -> Optional[str]: input_path = await self.media_storage.ensure_media_is_in_local_cache( FileInfo(server_name, file_id, url_cache=False) ) - thumbnailer = Thumbnailer(input_path) + try: + thumbnailer = Thumbnailer(input_path) + except ThumbnailError: + logger.warning( + "Unable to generate a thumbnail for remote media %s from %s using a method of %s and type of %s", + media_id, + server_name, + t_method, + t_type, + ) + return None + t_byte_source = await defer_to_thread( self.hs.get_reactor(), self._generate_thumbnail, @@ -559,6 +596,9 @@ async def generate_remote_exact_thumbnail( return output_path + # Source not found, cannot return a thumbnail. + return None + async def _generate_thumbnails( self, server_name: Optional[str], @@ -590,7 +630,17 @@ async def _generate_thumbnails( FileInfo(server_name, file_id, url_cache=url_cache) ) - thumbnailer = Thumbnailer(input_path) + try: + thumbnailer = Thumbnailer(input_path) + except ThumbnailError: + logger.warning( + "Unable to generate thumbnails for remote media %s from %s using a method of %s and type of %s", + media_id, + server_name, + media_type, + ) + return None + m_width = thumbnailer.width m_height = thumbnailer.height diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 7126997134d2..e41df767fdb2 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -15,7 +15,7 @@ import logging from io import BytesIO -from PIL import Image as Image +from PIL import Image logger = logging.getLogger(__name__) @@ -31,12 +31,22 @@ } +class ThumbnailError(Exception): + """An error occurred generating a thumbnail.""" + + class Thumbnailer(object): FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"} def __init__(self, input_path): - self.image = Image.open(input_path) + try: + self.image = Image.open(input_path) + except OSError: + # If an error occurs opening the image, a thumbnail won't be able to + # be generated. + raise ThumbnailError() + self.width, self.height = self.image.size self.transpose_method = None try: diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 68ddbc49a8f6..729537e050de 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -348,4 +348,3 @@ def _test_thumbnail(self, method, expected_body, expected_found): else: # A 404 with no body. self.assertEqual(channel.code, 404) - self.assertEqual(channel.result["body"], "") From 02715b6cbd3b6c6ec93b53b264d8e644a8b0154f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 2 Sep 2020 16:12:44 -0400 Subject: [PATCH 03/10] Add a changelog. --- changelog.d/8236.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/8236.bugfix diff --git a/changelog.d/8236.bugfix b/changelog.d/8236.bugfix new file mode 100644 index 000000000000..6f048710159f --- /dev/null +++ b/changelog.d/8236.bugfix @@ -0,0 +1 @@ +Fix a longstanding bug where files that could not be thumbnailed would result in an Internal Server Error. From 1223f3cde6669c8dcb997866759a45796f64891f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 2 Sep 2020 16:45:25 -0400 Subject: [PATCH 04/10] Don't return JSON from non-JSON APIs. --- synapse/rest/media/v1/_base.py | 32 ++++++++++++++++++----- tests/rest/media/v1/test_media_storage.py | 1 + 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 20ddb9550b29..2a56c8c1ea4c 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -21,6 +21,7 @@ from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender +from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError, cs_error from synapse.http.server import finish_request, respond_with_json @@ -69,13 +70,30 @@ def parse_media_id(request): ) -def respond_404(request): - respond_with_json( - request, - 404, - cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), - send_cors=True, - ) +def respond_404(request: Request): + respond_with_code(request, 404) + + +def respond_with_code(request: Request, code: int): + """ + Sends an empty response with a status code. + + Args: + request: The http request to respond to. + code: The HTTP response code. + """ + # could alternatively use request.notifyFinish() and flip a flag when + # the Deferred fires, but since the flag is RIGHT THERE it seems like + # a waste. + if request._disconnected: + logger.warning( + "Not sending response to request %s, already disconnected.", request + ) + return + + request.setResponseCode(code) + request.setHeader(b"Content-Length", b"0") + finish_request(request) async def respond_with_file( diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 729537e050de..9b4397d65a52 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -348,3 +348,4 @@ def _test_thumbnail(self, method, expected_body, expected_found): else: # A 404 with no body. self.assertEqual(channel.code, 404) + self.assertNotIn("body", channel.result) From 32bd5c3351a29080adf0ade3724e15da7ea7038f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 2 Sep 2020 16:45:41 -0400 Subject: [PATCH 05/10] Return a 400 error if the thumbnail cannot be generated. --- synapse/rest/media/v1/media_repository.py | 4 ++-- synapse/rest/media/v1/thumbnail_resource.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index c453648f4801..145b32f8d5c6 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -522,7 +522,7 @@ async def generate_local_exact_thumbnail( return output_path - # Source not found, cannot return a thumbnail. + # Could not generate thumbnail. return None async def generate_remote_exact_thumbnail( @@ -596,7 +596,7 @@ async def generate_remote_exact_thumbnail( return output_path - # Source not found, cannot return a thumbnail. + # Could not generate thumbnail. return None async def _generate_thumbnails( diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index a83535b97b5e..529b323e3a41 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -23,6 +23,7 @@ FileInfo, parse_media_id, respond_404, + respond_with_code, respond_with_file, respond_with_responder, ) @@ -173,7 +174,7 @@ async def _select_or_generate_local_thumbnail( await respond_with_file(request, desired_type, file_path) else: logger.warning("Failed to generate thumbnail") - respond_404(request) + respond_with_code(400) async def _select_or_generate_remote_thumbnail( self, @@ -235,7 +236,7 @@ async def _select_or_generate_remote_thumbnail( await respond_with_file(request, desired_type, file_path) else: logger.warning("Failed to generate thumbnail") - respond_404(request) + respond_with_code(400) async def _respond_remote_thumbnail( self, request, server_name, media_id, width, height, method, m_type From 04c019dc4976927210d144215c85110c9acaf6ae Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 3 Sep 2020 10:30:36 -0400 Subject: [PATCH 06/10] Revert some changes. --- synapse/rest/media/v1/_base.py | 32 +++++---------------- synapse/rest/media/v1/thumbnail_resource.py | 5 ++-- tests/rest/media/v1/test_media_storage.py | 11 +++++-- 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 2a56c8c1ea4c..20ddb9550b29 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -21,7 +21,6 @@ from twisted.internet.interfaces import IConsumer from twisted.protocols.basic import FileSender -from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError, cs_error from synapse.http.server import finish_request, respond_with_json @@ -70,30 +69,13 @@ def parse_media_id(request): ) -def respond_404(request: Request): - respond_with_code(request, 404) - - -def respond_with_code(request: Request, code: int): - """ - Sends an empty response with a status code. - - Args: - request: The http request to respond to. - code: The HTTP response code. - """ - # could alternatively use request.notifyFinish() and flip a flag when - # the Deferred fires, but since the flag is RIGHT THERE it seems like - # a waste. - if request._disconnected: - logger.warning( - "Not sending response to request %s, already disconnected.", request - ) - return - - request.setResponseCode(code) - request.setHeader(b"Content-Length", b"0") - finish_request(request) +def respond_404(request): + respond_with_json( + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, + ) async def respond_with_file( diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 529b323e3a41..a83535b97b5e 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -23,7 +23,6 @@ FileInfo, parse_media_id, respond_404, - respond_with_code, respond_with_file, respond_with_responder, ) @@ -174,7 +173,7 @@ async def _select_or_generate_local_thumbnail( await respond_with_file(request, desired_type, file_path) else: logger.warning("Failed to generate thumbnail") - respond_with_code(400) + respond_404(request) async def _select_or_generate_remote_thumbnail( self, @@ -236,7 +235,7 @@ async def _select_or_generate_remote_thumbnail( await respond_with_file(request, desired_type, file_path) else: logger.warning("Failed to generate thumbnail") - respond_with_code(400) + respond_404(request) async def _respond_remote_thumbnail( self, request, server_name, media_id, width, height, method, m_type diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 9b4397d65a52..0c45d5acf3c6 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -130,7 +130,7 @@ class _TestImage: @parameterized_class( ("test_image",), [ - # smol png + # smoll png ( _TestImage( unhexlify( @@ -348,4 +348,11 @@ def _test_thumbnail(self, method, expected_body, expected_found): else: # A 404 with no body. self.assertEqual(channel.code, 404) - self.assertNotIn("body", channel.result) + self.assertEqual( + channel.json_body, + { + "errcode": "M_NOT_FOUND", + "error": "Not found [b'example.com', b'12345?width=32&height=32&method=%s']" + % method, + }, + ) From cf8facb6f393a7da47be0372e15c11eff8118337 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 3 Sep 2020 10:49:19 -0400 Subject: [PATCH 07/10] Raise a 400 error from Synapse. --- synapse/rest/media/v1/thumbnail_resource.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index a83535b97b5e..30421b663a72 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -16,6 +16,7 @@ import logging +from synapse.api.errors import SynapseError from synapse.http.server import DirectServeJsonResource, set_cors_headers from synapse.http.servlet import parse_integer, parse_string @@ -173,7 +174,7 @@ async def _select_or_generate_local_thumbnail( await respond_with_file(request, desired_type, file_path) else: logger.warning("Failed to generate thumbnail") - respond_404(request) + raise SynapseError(400, "Failed to generate thumbnail.") async def _select_or_generate_remote_thumbnail( self, @@ -235,7 +236,7 @@ async def _select_or_generate_remote_thumbnail( await respond_with_file(request, desired_type, file_path) else: logger.warning("Failed to generate thumbnail") - respond_404(request) + raise SynapseError(400, "Failed to generate thumbnail.") async def _respond_remote_thumbnail( self, request, server_name, media_id, width, height, method, m_type From f650c01c1f7b90d7969695e93e04f3f377639ea8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 3 Sep 2020 11:53:57 -0400 Subject: [PATCH 08/10] Remove debug code. --- tests/rest/media/v1/test_media_storage.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 0c45d5acf3c6..159adb0e3509 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -123,10 +123,6 @@ class _TestImage: expected_found = attr.ib(default=True, type=bool) -with open("test.gif", "rb") as f: - data = f.read() - - @parameterized_class( ("test_image",), [ From 81a0b0fc9f7e7d08e0fb9b81538521629534f492 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 3 Sep 2020 12:03:04 -0400 Subject: [PATCH 09/10] Fix comment. --- tests/rest/media/v1/test_media_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 159adb0e3509..5f897d49cf47 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -342,7 +342,7 @@ def _test_thumbnail(self, method, expected_body, expected_found): # ensure that the result is at least some valid image Image.open(BytesIO(channel.result["body"])) else: - # A 404 with no body. + # A 404 with a JSON body. self.assertEqual(channel.code, 404) self.assertEqual( channel.json_body, From b5b96ea65731f3eb86d5c06b7defd1ab3c23d004 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 9 Sep 2020 07:42:18 -0400 Subject: [PATCH 10/10] Add original error to log lines. --- synapse/rest/media/v1/media_repository.py | 15 +++++++++------ synapse/rest/media/v1/thumbnailer.py | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 20a14a9eea40..69f353d46f9e 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -474,12 +474,13 @@ async def generate_local_exact_thumbnail( try: thumbnailer = Thumbnailer(input_path) - except ThumbnailError: + except ThumbnailError as e: logger.warning( - "Unable to generate a thumbnail for local media %s using a method of %s and type of %s", + "Unable to generate a thumbnail for local media %s using a method of %s and type of %s: %s", media_id, t_method, t_type, + e, ) return None @@ -541,13 +542,14 @@ async def generate_remote_exact_thumbnail( try: thumbnailer = Thumbnailer(input_path) - except ThumbnailError: + except ThumbnailError as e: logger.warning( - "Unable to generate a thumbnail for remote media %s from %s using a method of %s and type of %s", + "Unable to generate a thumbnail for remote media %s from %s using a method of %s and type of %s: %s", media_id, server_name, t_method, t_type, + e, ) return None @@ -632,12 +634,13 @@ async def _generate_thumbnails( try: thumbnailer = Thumbnailer(input_path) - except ThumbnailError: + except ThumbnailError as e: logger.warning( - "Unable to generate thumbnails for remote media %s from %s using a method of %s and type of %s", + "Unable to generate thumbnails for remote media %s from %s using a method of %s and type of %s: %s", media_id, server_name, media_type, + e, ) return None diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 93dd77e474ea..457ad6031ce2 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -42,10 +42,10 @@ class Thumbnailer: def __init__(self, input_path): try: self.image = Image.open(input_path) - except OSError: + except OSError as e: # If an error occurs opening the image, a thumbnail won't be able to # be generated. - raise ThumbnailError() + raise ThumbnailError from e self.width, self.height = self.image.size self.transpose_method = None