Skip to content

Commit

Permalink
vsr: prevent state sync from breaking hash chain
Browse files Browse the repository at this point in the history
The added test is a minimization of #1532.

If replica state-syncs into a newer view, its log may be disconnected
from a checkpoint. To fix this, require that replica jumps view before
jumping sync target.

The effect here is that, after state sync, there's _still_ a break
between the checkpoint and the log, but, as replica is in a view_change
state, it doesn't assume valid hash chain and waits until an SV to
correct the log.

SEED: 17270152460592452540
Closes: #1532
  • Loading branch information
matklad committed Feb 27, 2024
1 parent 5ae6bcd commit cf25219
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/vsr/replica.zig
Original file line number Diff line number Diff line change
Expand Up @@ -8368,6 +8368,7 @@ pub fn ReplicaType(
inline .commit, .ping => |h| .{
.checkpoint_id = h.checkpoint_id,
.checkpoint_op = h.checkpoint_op,
.view = h.view,
},
else => return,
};
Expand All @@ -8388,6 +8389,17 @@ pub fn ReplicaType(
@panic("checkpoint diverged");
}

if (candidate.view > self.view) {
log.debug("{}: on_{s}: jump_sync_target: ignoring, newer view" ++
" (view={} candidate.view={})", .{
self.replica,
@tagName(header.command),
self.view,
candidate.view,
});
return;
}

// Don't sync backwards, or to our current checkpoint.
if (candidate.checkpoint_op <= self.op_checkpoint()) return;

Expand Down
78 changes: 78 additions & 0 deletions src/vsr/replica_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const assert = std.debug.assert;
const maybe = stdx.maybe;
const log = std.log.scoped(.test_replica);
const expectEqual = std.testing.expectEqual;
const expect = std.testing.expectEqual;
const allocator = std.testing.allocator;

const stdx = @import("../stdx.zig");
Expand Down Expand Up @@ -1138,6 +1139,83 @@ test "Cluster: sync: slightly lagging replica" {
try expectEqual(t.replica(.R_).commit(), checkpoint_1_trigger + 3);
}

test "Cluster: sync: checkpoint from a newer view" {
// B1 appends (but does not commit) prepares across a checkpoint boundary.
// Then the cluster truncates those prepares and commits past checkpoint the trigger.
// When B1 subsequently joins, it should state sync and truncate the log. Immediately
// after state sync, the log doesn't connect to B1's new checkpoint.
const t = try TestContext.init(.{ .replica_count = 6 });
defer t.deinit();

var c = t.clients(0, t.cluster.clients.len);
try c.request(checkpoint_1 - 1, checkpoint_1 - 1);
try expectEqual(t.replica(.R_).commit(), checkpoint_1 - 1);

var a0 = t.replica(.A0);
var b1 = t.replica(.B1);

{
// Prevent A0 from committing, prevent any other replica from becoming a primary, and
// only allow B1 to learn about A0 prepares.
t.replica(.R_).drop(.R_, .incoming, .prepare);
t.replica(.R_).drop(.R_, .incoming, .prepare_ok);
t.replica(.R_).drop(.R_, .incoming, .start_view_change);
b1.pass(.A0, .incoming, .prepare);
b1.filter(.A0, .incoming, struct {
// Force b1 to sync, rather than repair.
fn drop_message(message: *Message) bool {
switch (message.into_any()) {
.prepare => |prepare| {
return (prepare.header.op == checkpoint_1);
},
else => return false,
}
}
}.drop_message);
try c.request(checkpoint_1_trigger + 1, checkpoint_1 - 1);
try expectEqual(a0.op_head(), checkpoint_1_trigger - 1);
try expectEqual(b1.op_head(), checkpoint_1_trigger - 1);
try expectEqual(a0.commit(), checkpoint_1 - 1);
try expectEqual(b1.commit(), checkpoint_1 - 1);
}

{
// Make the rest of cluster prepare and commit a different sequence of prepares.
t.replica(.R_).pass(.R_, .incoming, .prepare);
t.replica(.R_).pass(.R_, .incoming, .prepare_ok);
t.replica(.R_).pass(.R_, .incoming, .start_view_change);

a0.drop_all(.R_, .bidirectional);
b1.drop_all(.R_, .bidirectional);
try c.request(checkpoint_2, checkpoint_2);
}

{
// Let B1 rejoin, but prevent it from jumping into view change.
b1.pass_all(.R_, .bidirectional);
b1.drop(.R_, .bidirectional, .start_view);
b1.drop(.R_, .incoming, .ping);
b1.drop(.R_, .incoming, .pong);

// TODO: Explicit coverage marks: This should hit the
// "jump_sync_target: ignoring, newer view" log line.
const b1_view_before = b1.view();
try c.request(checkpoint_2_trigger - 1, checkpoint_2_trigger - 1);
try expectEqual(b1_view_before, b1.view());
try expectEqual(b1.op_checkpoint(), 0); // B1 ignores new checkpoint.

b1.stop(); // It should be ignored even after restart.
try b1.open();
t.run();
try expectEqual(b1_view_before, b1.view());
try expectEqual(b1.op_checkpoint(), 0);
}

t.replica(.R_).pass_all(.R_, .bidirectional);
t.run();
try expectEqual(t.replica(.R_).commit(), checkpoint_2_trigger - 1);
}

test "Cluster: prepare beyond checkpoint trigger" {
const t = try TestContext.init(.{ .replica_count = 3 });
defer t.deinit();
Expand Down

0 comments on commit cf25219

Please sign in to comment.