Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Register media URLs via regex. #16419

Merged
merged 9 commits into from Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/16419.misc
@@ -0,0 +1 @@
Update registration of media repository URLs.
2 changes: 1 addition & 1 deletion synapse/http/server.py
Expand Up @@ -266,7 +266,7 @@ class HttpServer(Protocol):
def register_paths(
self,
method: str,
path_patterns: Iterable[Pattern],
path_patterns: Iterable[Pattern[str]],
callback: ServletCallback,
servlet_classname: str,
) -> None:
Expand Down
48 changes: 4 additions & 44 deletions synapse/media/_base.py
Expand Up @@ -26,11 +26,11 @@
from twisted.protocols.basic import FileSender
from twisted.web.server import Request

from synapse.api.errors import Codes, SynapseError, cs_error
from synapse.api.errors import Codes, cs_error
from synapse.http.server import finish_request, respond_with_json
from synapse.http.site import SynapseRequest
from synapse.logging.context import make_deferred_yieldable
from synapse.util.stringutils import is_ascii, parse_and_validate_server_name
from synapse.util.stringutils import is_ascii

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -84,52 +84,12 @@
]


def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
"""Parses the server name, media ID and optional file name from the request URI

Also performs some rough validation on the server name.

Args:
request: The `Request`.

Returns:
A tuple containing the parsed server name, media ID and optional file name.

Raises:
SynapseError(404): if parsing or validation fail for any reason
"""
try:
# The type on postpath seems incorrect in Twisted 21.2.0.
postpath: List[bytes] = request.postpath # type: ignore
assert postpath

# This allows users to append e.g. /test.png to the URL. Useful for
# clients that parse the URL to see content type.
server_name_bytes, media_id_bytes = postpath[:2]
server_name = server_name_bytes.decode("utf-8")
media_id = media_id_bytes.decode("utf8")

# Validate the server name, raising if invalid
parse_and_validate_server_name(server_name)

file_name = None
if len(postpath) > 2:
try:
file_name = urllib.parse.unquote(postpath[-1].decode("utf-8"))
except UnicodeDecodeError:
pass
return server_name, media_id, file_name
except Exception:
raise SynapseError(
404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
)


def respond_404(request: SynapseRequest) -> None:
assert request.path is not None
respond_with_json(
request,
404,
cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
cs_error("Not found '%s'" % (request.path.decode(),), code=Codes.NOT_FOUND),
send_cors=True,
)

Expand Down
10 changes: 9 additions & 1 deletion synapse/media/media_repository.py
Expand Up @@ -48,6 +48,7 @@
from synapse.media.media_storage import MediaStorage
from synapse.media.storage_provider import StorageProviderWrapper
from synapse.media.thumbnailer import Thumbnailer, ThumbnailError
from synapse.media.url_previewer import UrlPreviewer
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.types import UserID
from synapse.util.async_helpers import Linearizer
Expand Down Expand Up @@ -114,7 +115,7 @@ def __init__(self, hs: "HomeServer"):
)
storage_providers.append(provider)

self.media_storage = MediaStorage(
self.media_storage: MediaStorage = MediaStorage(
self.hs, self.primary_base_path, self.filepaths, storage_providers
)

Expand Down Expand Up @@ -142,6 +143,13 @@ def __init__(self, hs: "HomeServer"):
MEDIA_RETENTION_CHECK_PERIOD_MS,
)

if hs.config.media.url_preview_enabled:
self.url_previewer: Optional[UrlPreviewer] = UrlPreviewer(
hs, self, self.media_storage
)
else:
self.url_previewer = None

def _start_update_recently_accessed(self) -> Deferred:
return run_as_background_process(
"update_recently_accessed_media", self._update_recently_accessed
Expand Down
13 changes: 6 additions & 7 deletions synapse/rest/media/config_resource.py
Expand Up @@ -14,17 +14,19 @@
# limitations under the License.
#

import re
from typing import TYPE_CHECKING

from synapse.http.server import DirectServeJsonResource, respond_with_json
from synapse.http.server import respond_with_json
from synapse.http.servlet import RestServlet
from synapse.http.site import SynapseRequest

if TYPE_CHECKING:
from synapse.server import HomeServer


class MediaConfigResource(DirectServeJsonResource):
isLeaf = True
class MediaConfigResource(RestServlet):
PATTERNS = [re.compile("/_matrix/media/(r0|v3|v1)/config$")]

def __init__(self, hs: "HomeServer"):
super().__init__()
Expand All @@ -33,9 +35,6 @@ def __init__(self, hs: "HomeServer"):
self.auth = hs.get_auth()
self.limits_dict = {"m.upload.size": config.media.max_upload_size}

async def _async_render_GET(self, request: SynapseRequest) -> None:
async def on_GET(self, request: SynapseRequest) -> None:
await self.auth.get_user_by_req(request)
respond_with_json(request, 200, self.limits_dict, send_cors=True)

async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
respond_with_json(request, 200, {}, send_cors=True)
40 changes: 26 additions & 14 deletions synapse/rest/media/download_resource.py
Expand Up @@ -13,16 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import TYPE_CHECKING
import re
from typing import TYPE_CHECKING, Optional

from synapse.http.server import (
DirectServeJsonResource,
set_corp_headers,
set_cors_headers,
)
from synapse.http.servlet import parse_boolean
from synapse.http.server import set_corp_headers, set_cors_headers
from synapse.http.servlet import RestServlet, parse_boolean
from synapse.http.site import SynapseRequest
from synapse.media._base import parse_media_id, respond_404
from synapse.media._base import respond_404
from synapse.util.stringutils import parse_and_validate_server_name

if TYPE_CHECKING:
from synapse.media.media_repository import MediaRepository
Expand All @@ -31,15 +29,28 @@
logger = logging.getLogger(__name__)


class DownloadResource(DirectServeJsonResource):
isLeaf = True
class DownloadResource(RestServlet):
PATTERNS = [
re.compile(
"/_matrix/media/(r0|v3|v1)/download/(?P<server_name>[^/]*)/(?P<media_id>[^/]*)(/(?P<file_name>[^/]*))?$"
)
]

def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
super().__init__()
self.media_repo = media_repo
self._is_mine_server_name = hs.is_mine_server_name

async def _async_render_GET(self, request: SynapseRequest) -> None:
async def on_GET(
self,
request: SynapseRequest,
server_name: str,
media_id: str,
file_name: Optional[str] = None,
) -> None:
# Validate the server name, raising if invalid
parse_and_validate_server_name(server_name)

set_cors_headers(request)
set_corp_headers(request)
request.setHeader(
Expand All @@ -58,9 +69,8 @@ async def _async_render_GET(self, request: SynapseRequest) -> None:
b"Referrer-Policy",
b"no-referrer",
)
server_name, media_id, name = parse_media_id(request)
if self._is_mine_server_name(server_name):
await self.media_repo.get_local_media(request, media_id, name)
await self.media_repo.get_local_media(request, media_id, file_name)
else:
allow_remote = parse_boolean(request, "allow_remote", default=True)
if not allow_remote:
Expand All @@ -72,4 +82,6 @@ async def _async_render_GET(self, request: SynapseRequest) -> None:
respond_404(request)
return

await self.media_repo.get_remote_media(request, server_name, media_id, name)
await self.media_repo.get_remote_media(
request, server_name, media_id, file_name
)
33 changes: 22 additions & 11 deletions synapse/rest/media/media_repository_resource.py
Expand Up @@ -15,7 +15,7 @@
from typing import TYPE_CHECKING

from synapse.config._base import ConfigError
from synapse.http.server import UnrecognizedRequestResource
from synapse.http.server import HttpServer, JsonResource

from .config_resource import MediaConfigResource
from .download_resource import DownloadResource
Expand All @@ -27,7 +27,7 @@
from synapse.server import HomeServer


class MediaRepositoryResource(UnrecognizedRequestResource):
class MediaRepositoryResource(JsonResource):
"""File uploading and downloading.

Uploads are POSTed to a resource which returns a token which is used to GET
Expand Down Expand Up @@ -70,24 +70,35 @@ class MediaRepositoryResource(UnrecognizedRequestResource):
width and height are close to the requested size and the aspect matches
the requested size. The client should scale the image if it needs to fit
within a given rectangle.

This gets mounted at various points under /_matrix/media, including:
* /_matrix/media/r0
* /_matrix/media/v1
* /_matrix/media/v3
"""

def __init__(self, hs: "HomeServer"):
# If we're not configured to use it, raise if we somehow got here.
if not hs.config.media.can_load_media_repo:
raise ConfigError("Synapse is not configured to use a media repo.")

super().__init__()
JsonResource.__init__(self, hs, canonical_json=False)
self.register_servlets(self, hs)

@staticmethod
def register_servlets(http_server: HttpServer, hs: "HomeServer") -> None:
media_repo = hs.get_media_repository()

self.putChild(b"upload", UploadResource(hs, media_repo))
self.putChild(b"download", DownloadResource(hs, media_repo))
self.putChild(
b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage)
# Note that many of these should not exist as v1 endpoints, but empirically
# a lot of traffic still goes to them.

UploadResource(hs, media_repo).register(http_server)
DownloadResource(hs, media_repo).register(http_server)
ThumbnailResource(hs, media_repo, media_repo.media_storage).register(
http_server
)
if hs.config.media.url_preview_enabled:
self.putChild(
b"preview_url",
PreviewUrlResource(hs, media_repo, media_repo.media_storage),
PreviewUrlResource(hs, media_repo, media_repo.media_storage).register(
http_server
)
self.putChild(b"config", MediaConfigResource(hs))
MediaConfigResource(hs).register(http_server)
26 changes: 9 additions & 17 deletions synapse/rest/media/preview_url_resource.py
Expand Up @@ -13,24 +13,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from typing import TYPE_CHECKING

from synapse.http.server import (
DirectServeJsonResource,
respond_with_json,
respond_with_json_bytes,
)
from synapse.http.servlet import parse_integer, parse_string
from synapse.http.server import respond_with_json_bytes
from synapse.http.servlet import RestServlet, parse_integer, parse_string
from synapse.http.site import SynapseRequest
from synapse.media.media_storage import MediaStorage
from synapse.media.url_previewer import UrlPreviewer

if TYPE_CHECKING:
from synapse.media.media_repository import MediaRepository
from synapse.server import HomeServer


class PreviewUrlResource(DirectServeJsonResource):
class PreviewUrlResource(RestServlet):
"""
The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API
for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix
Expand All @@ -48,7 +44,7 @@ class PreviewUrlResource(DirectServeJsonResource):
* Matrix cannot be used to distribute the metadata between homeservers.
"""

isLeaf = True
PATTERNS = [re.compile("/_matrix/media/(r0|v3|v1)/preview_url$")]

def __init__(
self,
Expand All @@ -62,20 +58,16 @@ def __init__(
self.clock = hs.get_clock()
self.media_repo = media_repo
self.media_storage = media_storage
assert self.media_repo.url_previewer is not None
self.url_previewer = self.media_repo.url_previewer

self._url_previewer = UrlPreviewer(hs, media_repo, media_storage)

async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
request.setHeader(b"Allow", b"OPTIONS, GET")
respond_with_json(request, 200, {}, send_cors=True)

async def _async_render_GET(self, request: SynapseRequest) -> None:
async def on_GET(self, request: SynapseRequest) -> None:
# XXX: if get_user_by_req fails, what should we do in an async render?
requester = await self.auth.get_user_by_req(request)
url = parse_string(request, "url", required=True)
ts = parse_integer(request, "ts")
if ts is None:
ts = self.clock.time_msec()

og = await self._url_previewer.preview(url, requester.user, ts)
og = await self.url_previewer.preview(url, requester.user, ts)
respond_with_json_bytes(request, 200, og, send_cors=True)