From ffe3016a2673f44ba22465146cf4ee87187d6109 Mon Sep 17 00:00:00 2001 From: Karl Nilsson Date: Thu, 6 Nov 2025 13:54:20 +0000 Subject: [PATCH] Fix bug where the persisted last applied could be higher than last written. This could happen as some members can apply indexes even if they haven't yet been locally written fully as long as the commit index is higher. In this case it would persist the last applied index even if the entry had not been persisted by the WAL. If the WAL then crashed and the node then restarted the member would fail at recovery as it would try to recover up to a last applied index that does not exist. --- src/ra_server.erl | 19 ++++++++++++++++--- test/ra_server_SUITE.erl | 28 +++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/ra_server.erl b/src/ra_server.erl index 0cc2a55f..61cc1694 100644 --- a/src/ra_server.erl +++ b/src/ra_server.erl @@ -2218,10 +2218,23 @@ persist_last_applied(#{persisted_last_applied := PLA, last_applied := LA} = State) when LA =< PLA -> % if last applied is less than PL for some reason do nothing State; -persist_last_applied(#{last_applied := LastApplied, +persist_last_applied(#{last_applied := LastApplied0, + log := Log, cfg := #cfg{uid = UId} = Cfg} = State) -> - ok = ra_log_meta:store(meta_name(Cfg), UId, last_applied, LastApplied), - State#{persisted_last_applied => LastApplied}. + {LastWrittenIdx, _} = ra_log:last_written(Log), + LastApplied = min(LastApplied0, LastWrittenIdx), + + PersistedLastApplied = maps:get(persisted_last_applied, State, 0), + %% only persist the last applied if the last written has also caught up, + %% else it is possible that on recovery the last applied index refers to + %% indexes that never were durably written + if LastApplied > PersistedLastApplied -> + ok = ra_log_meta:store(meta_name(Cfg), UId, last_applied, LastApplied), + State#{persisted_last_applied => LastApplied}; + true -> + State + end. + -spec update_peer(ra_server_id(), diff --git a/test/ra_server_SUITE.erl b/test/ra_server_SUITE.erl index 63d97b79..6228d1e6 100644 --- a/test/ra_server_SUITE.erl +++ b/test/ra_server_SUITE.erl @@ -101,7 +101,8 @@ all() -> receive_snapshot_heartbeat_dropped, receive_snapshot_heartbeat_reply_dropped, - handle_down + handle_down, + persist_last_applied_with_unwritten ]. -define(MACFUN, fun (E, _) -> E end). @@ -1752,6 +1753,7 @@ follower_install_snapshot_machine_version(_Config) -> effective_machine_module = MacMod1, effective_machine_version = 1}, last_applied := 4, + persisted_last_applied := 4, cluster_index_term := {4, 5}, machine_state := SnapData, %% old machine state commit_index := 4}, @@ -3125,6 +3127,30 @@ handle_down(_config) -> ra_server:handle_down(follower, machine, self(), noproc, State), ok. +persist_last_applied_with_unwritten(_Config) -> + %% only actually persist the last applied index _if_ it is also confirmed + %% to be written to disk + N1 = ?N1, + Init = empty_state(3, n1), + AER1 = #append_entries_rpc{term = 1, leader_id = N1, prev_log_index = 0, + prev_log_term = 0, leader_commit = 1, + entries = [entry(1, 1, one)]}, + {follower, #{leader_id := N1, + current_term := 1, + commit_index := 1, + log := Log0, + persisted_last_applied := 0, + last_applied := 1} = State0, _} = + ra_server:handle_follower(AER1, Init), + ?assertMatch({0,_}, ra_log:last_written(Log0)), + #{persisted_last_applied := 0} = ra_server:persist_last_applied(State0), + {Log, _} = ra_log:handle_event({written, 1, {1, 1}}, Log0), + #{persisted_last_applied := 1} = + ra_server:persist_last_applied(State0#{log => Log}), + + ok. + + set_peer_query_index(State, PeerId, QueryIndex) -> #{cluster := Cluster} = State, #{PeerId := Peer} = Cluster,