Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Optimize backfill receiving to have less missing prev_event thrashing (scratch) #13864

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
a847a35
Fix `have_seen_event` cache not being invalidated
MadLittleMods Sep 21, 2022
f6393db
Add changelog
MadLittleMods Sep 21, 2022
d07947d
Fix `have_seen_event` cache not being invalidated
MadLittleMods Sep 21, 2022
f2a5c70
Assert is not None
MadLittleMods Sep 22, 2022
b23b3e4
Calculate the stream_ordering from newest -> oldest (in the correct o…
MadLittleMods Sep 22, 2022
a25821d
Try sort backfill points by tie breaking on stream_ordering
MadLittleMods Sep 22, 2022
5a5c324
Scratch try different orders just to see how the tests pass differently
MadLittleMods Sep 22, 2022
1054f91
Merge branch 'develop' into madlittlemods/13856-fix-have-seen-events-…
MadLittleMods Sep 22, 2022
2162ab5
Invalidate cache like #13796
MadLittleMods Sep 22, 2022
0cdc7bf
Fix `@cachedList` on `_have_seen_events_dict`
MadLittleMods Sep 22, 2022
5b9b645
Add test description
MadLittleMods Sep 23, 2022
9fb750d
Better changelog
MadLittleMods Sep 23, 2022
4fa8f05
Add test to make sure we can actually clear entries just by room_id
MadLittleMods Sep 23, 2022
b9be6c5
Raise exception so we don't run into this arg mismatch again
MadLittleMods Sep 23, 2022
f8dc17b
Add test to ensure the safety works
MadLittleMods Sep 23, 2022
44e9746
Merge branch 'madlittlemods/13856-fix-have-seen-events-not-being-inva…
MadLittleMods Sep 23, 2022
33d12a5
Debugging
MadLittleMods Sep 24, 2022
78b4434
More debugging
MadLittleMods Sep 24, 2022
85451b9
Merge branch 'madlittlemods/13356-messages-investigation-scratch-v1' …
MadLittleMods Sep 26, 2022
31e2c10
Non-working test
MadLittleMods Sep 27, 2022
7a3ded0
Test running but no expected results yet
MadLittleMods Sep 27, 2022
6423938
Better assertion message
MadLittleMods Sep 27, 2022
20f4d1c
Use event_id to compare
MadLittleMods Sep 27, 2022
62f35ea
Show extra unepexpected events
MadLittleMods Sep 28, 2022
4dcb2f6
Align more to Complement test which does pass
MadLittleMods Sep 29, 2022
50b11cb
I think working same as Complement reverse_chronological, only the in…
MadLittleMods Sep 29, 2022
cfa5e57
Try chronolgoical which rejects the historical
MadLittleMods Sep 29, 2022
aaa9679
Simplify case
MadLittleMods Sep 29, 2022
1ed0276
Simplify case more (no more alice)
MadLittleMods Sep 29, 2022
61c1296
Working once you connect the floating insertion event
MadLittleMods Sep 29, 2022
5faebbd
Seems to work with Maria
MadLittleMods Sep 29, 2022
8f8c1a0
WIP: Connect state_chain to prev_event and the batch to the state_cha…
MadLittleMods Sep 29, 2022
3f8fef2
Explain why auth/state necessary here
MadLittleMods Sep 30, 2022
8a69706
Add stream_ordering to debug string
MadLittleMods Oct 13, 2022
0b104e3
Ordering off because not setting stream_ordering
MadLittleMods Oct 13, 2022
c5b1ce8
Log maria membership event
MadLittleMods Oct 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13863.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix `have_seen_event` cache not being invalidated after we persist an event which causes inefficiency effects like extra `/state` federation calls.
2 changes: 1 addition & 1 deletion docker/complement/conf/workers-shared-extra.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ federation_ip_range_blacklist: []
# Disable server rate-limiting
rc_federation:
window_size: 1000
sleep_limit: 10
sleep_limit: 99999
sleep_delay: 500
reject_limit: 99999
concurrent: 3
Expand Down
5 changes: 5 additions & 0 deletions synapse/events/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ async def build(
auth_event_ids = self._event_auth_handler.compute_auth_events(
self, state_ids
)
logger.info(
"builder.build state_ids=%s auth_event_ids=%s",
state_ids,
auth_event_ids,
)

format_version = self.room_version.event_format
# The types of auth/prev events changes between event versions.
Expand Down
112 changes: 51 additions & 61 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from unpaddedbase64 import decode_base64

from synapse import event_auth
from synapse.api.constants import EventContentFields, EventTypes, Membership
from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
from synapse.api.errors import (
AuthError,
CodeMessageException,
Expand All @@ -60,13 +60,7 @@
from synapse.federation.federation_client import InvalidResponseError
from synapse.http.servlet import assert_params_in_dict
from synapse.logging.context import nested_logging_context
from synapse.logging.tracing import (
SynapseTags,
set_attribute,
start_active_span,
tag_args,
trace,
)
from synapse.logging.tracing import SynapseTags, set_attribute, tag_args, trace
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.module_api import NOT_SPAM
from synapse.replication.http.federation import (
Expand Down Expand Up @@ -125,6 +119,7 @@ class _BackfillPoint:

event_id: str
depth: int
stream_ordering: int
type: _BackfillPointType


Expand Down Expand Up @@ -231,16 +226,24 @@ async def _maybe_backfill_inner(
processing. Only used for timing.
"""
backwards_extremities = [
_BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY)
for event_id, depth in await self.store.get_backfill_points_in_room(room_id)
_BackfillPoint(
event_id, depth, stream_ordering, _BackfillPointType.BACKWARDS_EXTREMITY
)
for event_id, depth, stream_ordering in await self.store.get_backfill_points_in_room(
room_id=room_id,
current_depth=current_depth,
)
]

insertion_events_to_be_backfilled: List[_BackfillPoint] = []
if self.hs.config.experimental.msc2716_enabled:
insertion_events_to_be_backfilled = [
_BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT)
for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room(
room_id
_BackfillPoint(
event_id, depth, stream_ordering, _BackfillPointType.INSERTION_PONT
)
for event_id, depth, stream_ordering in await self.store.get_insertion_event_backward_extremities_in_room(
room_id=room_id,
current_depth=current_depth,
)
]
logger.debug(
Expand All @@ -249,18 +252,14 @@ async def _maybe_backfill_inner(
insertion_events_to_be_backfilled,
)

if not backwards_extremities and not insertion_events_to_be_backfilled:
logger.debug("Not backfilling as no extremeties found.")
return False

# we now have a list of potential places to backpaginate from. We prefer to
# start with the most recent (ie, max depth), so let's sort the list.
sorted_backfill_points: List[_BackfillPoint] = sorted(
itertools.chain(
backwards_extremities,
insertion_events_to_be_backfilled,
),
key=lambda e: -int(e.depth),
key=lambda e: (-e.depth, -e.stream_ordering, e.event_id),
)

logger.debug(
Expand All @@ -273,6 +272,29 @@ async def _maybe_backfill_inner(
sorted_backfill_points,
)

# If we have no backfill points lower than the `current_depth` then
# either we can a) bail or b) still attempt to backfill. We opt to try
# backfilling anyway just in case we do get relevant events.
if not sorted_backfill_points and current_depth != MAX_DEPTH:
logger.debug(
"_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway."
)
return await self._maybe_backfill_inner(
room_id=room_id,
# We use `MAX_DEPTH` so that we find all backfill points next
# time (all events are below the `MAX_DEPTH`)
current_depth=MAX_DEPTH,
limit=limit,
processing_start_time=processing_start_time,
)
elif not sorted_backfill_points and current_depth == MAX_DEPTH:
# Even after trying again with `MAX_DEPTH`, we didn't find any
# backward extremities to backfill from.
logger.debug(
"_maybe_backfill_inner: Not backfilling as no backward extremeties found."
)
return False

# If we're approaching an extremity we trigger a backfill, otherwise we
# no-op.
#
Expand All @@ -286,43 +308,16 @@ async def _maybe_backfill_inner(
# XXX: shouldn't we do this *after* the filter by depth below? Again, we don't
# care about events that have happened after our current position.
#
max_depth = sorted_backfill_points[0].depth
if current_depth - 2 * limit > max_depth:
max_depth_of_backfill_points = sorted_backfill_points[0].depth
if current_depth - 2 * limit > max_depth_of_backfill_points:
logger.debug(
"Not backfilling as we don't need to. %d < %d - 2 * %d",
max_depth,
max_depth_of_backfill_points,
current_depth,
limit,
)
return False

# We ignore extremities that have a greater depth than our current depth
# as:
# 1. we don't really care about getting events that have happened
# after our current position; and
# 2. we have likely previously tried and failed to backfill from that
# extremity, so to avoid getting "stuck" requesting the same
# backfill repeatedly we drop those extremities.
#
# However, we need to check that the filtered extremities are non-empty.
# If they are empty then either we can a) bail or b) still attempt to
# backfill. We opt to try backfilling anyway just in case we do get
# relevant events.
#
filtered_sorted_backfill_points = [
t for t in sorted_backfill_points if t.depth <= current_depth
]
if filtered_sorted_backfill_points:
logger.debug(
"_maybe_backfill_inner: backfill points before current depth: %s",
filtered_sorted_backfill_points,
)
sorted_backfill_points = filtered_sorted_backfill_points
else:
logger.debug(
"_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway."
)

# For performance's sake, we only want to paginate from a particular extremity
# if we can actually see the events we'll get. Otherwise, we'd just spend a lot
# of resources to get redacted events. We check each extremity in turn and
Expand All @@ -344,14 +339,10 @@ async def _maybe_backfill_inner(
# attempting to paginate before backfill reached the visible history.

extremities_to_request: List[str] = []
for i, bp in enumerate(sorted_backfill_points):
for bp in sorted_backfill_points:
if len(extremities_to_request) >= 5:
break

set_attribute(
SynapseTags.RESULT_PREFIX + "backfill_point" + str(i), str(bp)
)

# For regular backwards extremities, we don't have the extremity events
# themselves, so we need to actually check the events that reference them -
# their "successor" events.
Expand Down Expand Up @@ -396,7 +387,7 @@ async def _maybe_backfill_inner(
)
return False

logger.debug(
logger.info(
"_maybe_backfill_inner: extremities_to_request %s", extremities_to_request
)
set_attribute(
Expand All @@ -408,13 +399,12 @@ async def _maybe_backfill_inner(
str(len(extremities_to_request)),
)

with start_active_span("getting likely_domains"):
# Now we need to decide which hosts to hit first.
# First we try hosts that are already in the room.
# TODO: HEURISTIC ALERT.
likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
)
# Now we need to decide which hosts to hit first.
# First we try hosts that are already in the room.
# TODO: HEURISTIC ALERT.
likely_domains = (
await self._storage_controllers.state.get_current_hosts_in_room(room_id)
)

async def try_backfill(domains: Collection[str]) -> bool:
# TODO: Should we try multiple of these at a time?
Expand Down