Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add metrics to track /messages response time by room size #13545

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13533.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Track HTTP response times over 10 seconds from `/messages` (`synapse_room_message_list_rest_servlet_response_time_seconds`).
1 change: 1 addition & 0 deletions changelog.d/13545.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update metrics to track `/messages` response time by room size.
79 changes: 78 additions & 1 deletion synapse/rest/client/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
""" This module contains REST servlets to do with rooms: /rooms/<paths> """
import logging
import re
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
from enum import Enum
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple, Type, TypeVar
from urllib import parse as urlparse

from prometheus_client.core import Histogram

from twisted.web.server import Request

from synapse import event_auth
Expand Down Expand Up @@ -61,6 +64,69 @@
logger = logging.getLogger(__name__)


T_RoomSize = TypeVar("T_RoomSize", bound="_RoomSize")


class _RoomSize(Enum):
"""
Enum to differentiate sizes of rooms. This is a pretty good aproximation
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
about how hard it will be to get events in the room. We could also look at
room "complexity".
"""

# This doesn't necessarily mean the room is a DM, just that there is a DM
# amount of people there.
DM_SIZE = "direct_message_size"
SMALL = "small"
SUBSTANTIAL = "substantial"
LARGE = "large"

@classmethod
def from_member_count(cls: Type[T_RoomSize], member_count: int) -> "_RoomSize":
if member_count <= 2:
return _RoomSize.DM_SIZE
elif member_count < 100:
return _RoomSize.SMALL
elif member_count < 1000:
return _RoomSize.SUBSTANTIAL
else:
return _RoomSize.LARGE
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved


# This is an extra metric on top of `synapse_http_server_response_time_seconds`
# which times the same sort of thing but this one allows us to see values
# greater than 10s. We use a separate dedicated histogram with its own buckets
# so that we don't increase the cardinality of the general one because it's
# multiplied across hundreds of servlets.
messsages_response_timer = Histogram(
"synapse_room_message_list_rest_servlet_response_time_seconds",
"sec",
# We have a label for room size so we can try to see a more realistic
# picture of /messages response time for bigger rooms. We don't want the
# tiny rooms that can always respond fast skewing our results when we're trying
# to optimize the bigger cases.
["room_size"],
buckets=(
0.005,
0.01,
0.025,
0.05,
0.1,
0.25,
0.5,
1.0,
2.5,
5.0,
10.0,
30.0,
60.0,
120.0,
180.0,
"+Inf",
),
)


class TransactionRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
Expand Down Expand Up @@ -556,13 +622,18 @@ class RoomMessageListRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
self._hs = hs
self.clock = hs.get_clock()
self.pagination_handler = hs.get_pagination_handler()
self.auth = hs.get_auth()
self.store = hs.get_datastores().main

async def on_GET(
self, request: SynapseRequest, room_id: str
) -> Tuple[int, JsonDict]:
processing_start_time = self.clock.time_msec()
# Fire and forget and hope that we get a result by the end.
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
room_member_count_co = self.store.get_number_joined_users_in_room(room_id)
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved

requester = await self.auth.get_user_by_req(request, allow_guest=True)
pagination_config = await PaginationConfig.from_request(
self.store, request, default_limit=10
Expand Down Expand Up @@ -593,6 +664,12 @@ async def on_GET(
event_filter=event_filter,
)

processing_end_time = self.clock.time_msec()
room_member_count = await room_member_count_co
messsages_response_timer.labels(
room_size=_RoomSize.from_member_count(room_member_count)
).observe((processing_start_time - processing_end_time) / 1000)

return 200, msgs


Expand Down