Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Do not fail completely if oEmbed autodiscovery fails. #15092

Merged
merged 2 commits into from Feb 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/15092.bugfix
@@ -0,0 +1 @@
Fix a long-standing bug where a URL preview would break if the discovered oEmbed failed to download.
33 changes: 23 additions & 10 deletions synapse/rest/media/v1/preview_url_resource.py
Expand Up @@ -163,6 +163,10 @@ class PreviewUrlResource(DirectServeJsonResource):
7. Stores the result in the database cache.
4. Returns the result.

If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
does not fail. As much information as possible is returned.

The in-memory cache expires after 1 hour.

Expired entries in the database cache (and their associated media files) are
Expand Down Expand Up @@ -364,16 +368,25 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
oembed_url = self._oembed.autodiscover_from_html(tree)
og_from_oembed: JsonDict = {}
if oembed_url:
oembed_info = await self._handle_url(
oembed_url, user, allow_data_urls=True
)
(
og_from_oembed,
author_name,
expiration_ms,
) = await self._handle_oembed_response(
url, oembed_info, expiration_ms
)
try:
oembed_info = await self._handle_url(
oembed_url, user, allow_data_urls=True
)
except Exception as e:
# Fetching the oEmbed info failed, don't block the entire URL preview.
logger.warning(
"oEmbed fetch failed during URL preview: %s errored with %s",
oembed_url,
e,
)
else:
(
og_from_oembed,
author_name,
expiration_ms,
) = await self._handle_oembed_response(
url, oembed_info, expiration_ms
)

# Parse Open Graph information from the HTML in case the oEmbed
# response failed or is incomplete.
Expand Down
44 changes: 41 additions & 3 deletions tests/rest/media/v1/test_url_preview.py
Expand Up @@ -660,7 +660,7 @@ def test_nonexistent_image(self) -> None:
"""If the preview image doesn't exist, ensure some data is returned."""
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]

end_content = (
result = (
b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
)

Expand All @@ -681,8 +681,8 @@ def test_nonexistent_image(self) -> None:
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: text/html; charset="utf8"\r\n\r\n'
)
% (len(end_content),)
+ end_content
% (len(result),)
+ result
)

self.pump()
Expand All @@ -691,6 +691,44 @@ def test_nonexistent_image(self) -> None:
# The image should not be in the result.
self.assertNotIn("og:image", channel.json_body)

def test_oembed_failure(self) -> None:
"""If the autodiscovered oEmbed URL fails, ensure some data is returned."""
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]

result = b"""
<title>oEmbed Autodiscovery Fail</title>
<link rel="alternate" type="application/json+oembed"
href="http://example.com/oembed?url=http%3A%2F%2Fmatrix.org&format=json"
title="matrixdotorg" />
"""

channel = self.make_request(
"GET",
"preview_url?url=http://matrix.org",
shorthand=False,
await_result=False,
)
self.pump()

client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: text/html; charset="utf8"\r\n\r\n'
)
% (len(result),)
+ result
)

self.pump()
self.assertEqual(channel.code, 200)

# The image should not be in the result.
self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail")

def test_data_url(self) -> None:
"""
Requesting to preview a data URL is not supported.
Expand Down