Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Faster joins: filter out non local events when a room doesn't have it…
Browse files Browse the repository at this point in the history
…s full state (#14404)


Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
  • Loading branch information
Mathieu Velten committed Nov 21, 2022
1 parent 640cb3c commit 1526ff3
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 13 deletions.
1 change: 1 addition & 0 deletions changelog.d/14404.misc
@@ -0,0 +1 @@
Faster joins: filter out non local events when a room doesn't have its full state.
1 change: 1 addition & 0 deletions synapse/federation/sender/per_destination_queue.py
Expand Up @@ -505,6 +505,7 @@ async def _catch_up_transmission_loop(self) -> None:
new_pdus = await filter_events_for_server(
self._storage_controllers,
self._destination,
self._server_name,
new_pdus,
redact=False,
)
Expand Down
15 changes: 10 additions & 5 deletions synapse/handlers/federation.py
Expand Up @@ -379,6 +379,7 @@ async def _maybe_backfill_inner(
filtered_extremities = await filter_events_for_server(
self._storage_controllers,
self.server_name,
self.server_name,
events_to_check,
redact=False,
check_history_visibility_only=True,
Expand Down Expand Up @@ -1231,7 +1232,9 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]:
async def on_backfill_request(
self, origin: str, room_id: str, pdu_list: List[str], limit: int
) -> List[EventBase]:
await self._event_auth_handler.assert_host_in_room(room_id, origin)
# We allow partially joined rooms since in this case we are filtering out
# non-local events in `filter_events_for_server`.
await self._event_auth_handler.assert_host_in_room(room_id, origin, True)

# Synapse asks for 100 events per backfill request. Do not allow more.
limit = min(limit, 100)
Expand All @@ -1252,7 +1255,7 @@ async def on_backfill_request(
)

events = await filter_events_for_server(
self._storage_controllers, origin, events
self._storage_controllers, origin, self.server_name, events
)

return events
Expand Down Expand Up @@ -1283,7 +1286,7 @@ async def get_persisted_pdu(
await self._event_auth_handler.assert_host_in_room(event.room_id, origin)

events = await filter_events_for_server(
self._storage_controllers, origin, [event]
self._storage_controllers, origin, self.server_name, [event]
)
event = events[0]
return event
Expand All @@ -1296,7 +1299,9 @@ async def on_get_missing_events(
latest_events: List[str],
limit: int,
) -> List[EventBase]:
await self._event_auth_handler.assert_host_in_room(room_id, origin)
# We allow partially joined rooms since in this case we are filtering out
# non-local events in `filter_events_for_server`.
await self._event_auth_handler.assert_host_in_room(room_id, origin, True)

# Only allow up to 20 events to be retrieved per request.
limit = min(limit, 20)
Expand All @@ -1309,7 +1314,7 @@ async def on_get_missing_events(
)

missing_events = await filter_events_for_server(
self._storage_controllers, origin, missing_events
self._storage_controllers, origin, self.server_name, missing_events
)

return missing_events
Expand Down
29 changes: 26 additions & 3 deletions synapse/visibility.py
Expand Up @@ -563,7 +563,8 @@ def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str:

async def filter_events_for_server(
storage: StorageControllers,
server_name: str,
target_server_name: str,
local_server_name: str,
events: List[EventBase],
redact: bool = True,
check_history_visibility_only: bool = False,
Expand Down Expand Up @@ -603,7 +604,7 @@ def check_event_is_visible(
# if the server is either in the room or has been invited
# into the room.
for ev in memberships.values():
assert get_domain_from_id(ev.state_key) == server_name
assert get_domain_from_id(ev.state_key) == target_server_name

memtype = ev.membership
if memtype == Membership.JOIN:
Expand All @@ -622,6 +623,24 @@ def check_event_is_visible(
# to no users having been erased.
erased_senders = {}

# Filter out non-local events when we are in the middle of a partial join, since our servers
# list can be out of date and we could leak events to servers not in the room anymore.
# This can also be true for local events but we consider it to be an acceptable risk.

# We do this check as a first step and before retrieving membership events because
# otherwise a room could be fully joined after we retrieve those, which would then bypass
# this check but would base the filtering on an outdated view of the membership events.

partial_state_invisible_events = set()
if not check_history_visibility_only:

This comment has been minimized.

Copy link
@DMRobertson

DMRobertson Mar 8, 2023

Contributor

I'm very confused here. Why do we only ignore partial state events if we're not checking history visibility?

This comment has been minimized.

Copy link
@MatMaul

MatMaul Mar 9, 2023

Contributor

It is probably related to the Complement test failures we are seeing on your WIP PR, I think I had the same ones without it.
The only place where check_history_visibility_only would be True and skip this bloc is in the backfill code here.
I am trying to remember the rational but I am failing for now, I may have try that to see if tests were happy, then completely forgot to rationalize it afterwards :/ .

This comment has been minimized.

Copy link
@MatMaul

MatMaul Mar 9, 2023

Contributor

Right so the important commit is here and was using a dedicated boolean instead.
bd7ba64 is probably a mistake TBH... It blurs the concerns just for the sake of having one boolean.

for e in events:
sender_domain = get_domain_from_id(e.sender)
if (
sender_domain != local_server_name
and await storage.main.is_partial_state_room(e.room_id)
):
partial_state_invisible_events.add(e)

# Let's check to see if all the events have a history visibility
# of "shared" or "world_readable". If that's the case then we don't
# need to check membership (as we know the server is in the room).
Expand All @@ -636,7 +655,7 @@ def check_event_is_visible(
if event_to_history_vis[e.event_id]
not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE)
],
server_name,
target_server_name,
)

to_return = []
Expand All @@ -645,6 +664,10 @@ def check_event_is_visible(
visible = check_event_is_visible(
event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {})
)

if e in partial_state_invisible_events:
visible = False

if visible and not erased:
to_return.append(e)
elif redact:
Expand Down
10 changes: 5 additions & 5 deletions tests/test_visibility.py
Expand Up @@ -61,7 +61,7 @@ def test_filtering(self) -> None:

filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "test_server", events_to_filter
self._storage_controllers, "test_server", "hs", events_to_filter
)
)

Expand All @@ -83,7 +83,7 @@ def test_filter_outlier(self) -> None:
self.assertEqual(
self.get_success(
filter_events_for_server(
self._storage_controllers, "remote_hs", [outlier]
self._storage_controllers, "remote_hs", "hs", [outlier]
)
),
[outlier],
Expand All @@ -94,7 +94,7 @@ def test_filter_outlier(self) -> None:

filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "remote_hs", [outlier, evt]
self._storage_controllers, "remote_hs", "local_hs", [outlier, evt]
)
)
self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}")
Expand All @@ -106,7 +106,7 @@ def test_filter_outlier(self) -> None:
# be redacted)
filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "other_server", [outlier, evt]
self._storage_controllers, "other_server", "local_hs", [outlier, evt]
)
)
self.assertEqual(filtered[0], outlier)
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_erased_user(self) -> None:
# ... and the filtering happens.
filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "test_server", events_to_filter
self._storage_controllers, "test_server", "local_hs", events_to_filter
)
)

Expand Down

0 comments on commit 1526ff3

Please sign in to comment.