Skip to content

Commit

Permalink
Merge pull request #1239 from tigerbeetle/cb22/update-upsert
Browse files Browse the repository at this point in the history
LSM: Revert groove to use update + insert and vendor hasUniqueRepresentation
  • Loading branch information
cb22 committed Oct 24, 2023
2 parents 317a338 + e81a087 commit f821719
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 38 deletions.
15 changes: 10 additions & 5 deletions src/lsm/forest_fuzz.zig
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,17 @@ const Environment = struct {
}
}

fn put_account(env: *Environment, a: *const Account) void {
env.forest.grooves.accounts.upsert(a);
fn put_account(env: *Environment, a: *const Account, maybe_old: ?*const Account) void {
if (maybe_old) |old| {
env.forest.grooves.accounts.update(.{ .old = old, .new = a });
} else {
env.forest.grooves.accounts.insert(a);
}
}

fn get_account(env: *Environment, id: u128) ?Account {
fn get_account(env: *Environment, id: u128) ?*const Account {
const account = env.forest.grooves.accounts.get(id) orelse return null;
return account.*;
return account;
}

// The forest should behave like a simple key-value data-structure.
Expand Down Expand Up @@ -463,7 +467,8 @@ const Environment = struct {
.put_account => |put| {
// The forest requires prefetch before put.
try env.prefetch_account(put.account.id);
env.put_account(&put.account);
const lsm_account = env.get_account(put.account.id);
env.put_account(&put.account, lsm_account);
try model.put_account(&put.account, put.op);
},
.get_account => |id| {
Expand Down
66 changes: 46 additions & 20 deletions src/lsm/groove.zig
Original file line number Diff line number Diff line change
Expand Up @@ -863,10 +863,12 @@ pub fn GrooveType(
}
};

/// Insert the value into the objects tree and associated index trees, asserting that it doesn't
/// already exist.
/// Insert the value into the objects tree and associated index trees. It's up to the
/// caller to ensure it doesn't already exist.
pub fn insert(groove: *Groove, object: *const Object) void {
assert(!groove.objects_cache.has(@field(object, primary_field)));
if (constants.verify) {
assert(!groove.objects_cache.has(@field(object, primary_field)));
}

groove.objects_cache.upsert(object);

Expand All @@ -886,37 +888,54 @@ pub fn GrooveType(
}
}

/// Insert the value (or update it, if it exists).
/// Update the value. Requires the old object to be provided.
/// Update the object and index trees by diff'ing the old and new values.
pub fn upsert(groove: *Groove, new: *const Object) void {
// Explicit stack copy needed - otherwise old will == new!
const old: Object = (groove.objects_cache.get(@field(new, primary_field)) orelse {
return groove.insert(new);
}).*;
pub fn update(
groove: *Groove,
values: struct { old: *const Object, new: *const Object },
) void {
const old = values.old;
const new = values.new;

assert(@field(old, primary_field) == @field(new, primary_field));
assert(old.timestamp == new.timestamp);
if (constants.verify) {
const old_from_cache = groove.objects_cache.get(@field(old, primary_field)).?;

groove.objects_cache.upsert(new);
// While all that's actually required is that the _contents_ of the old_from_cache
// and old objects are identical, in current usage they're always the same piece of
// memory. We'll assert that for now, and this can be weakened in future if
// required.
assert(old_from_cache == old);
}

// Sanity check to ensure the caller didn't accidentally pass in an alias.
assert(new != old);

// The ID can't change, so no need to update the ID tree.
if (has_id) assert(old.id == new.id);
assert(old.timestamp == new.timestamp);

// Update the object tree entry if any of the fields (even ignored) are different.
if (!std.mem.eql(u8, std.mem.asBytes(&old), std.mem.asBytes(new))) {
// Unlike the index trees, the new and old values in the object tree share the
// same key. Therefore put() is sufficient to overwrite the old value.
groove.objects.put(new);
// The ID can't change, so no need to update the ID tree. Update the object tree entry
// if any of the fields (even ignored) are different. We assume the caller will pass in
// an object that has changes.
// Unlike the index trees, the new and old values in the object tree share the same
// key. Therefore put() is sufficient to overwrite the old value.
if (constants.verify) {
const tombstone = ObjectTreeHelpers(Object).tombstone;
const key_from_value = ObjectTreeHelpers(Object).key_from_value;

assert(!stdx.equal_bytes(Object, old, new));
assert(key_from_value(old) == key_from_value(new));
assert(!tombstone(old) and !tombstone(new));
}

inline for (std.meta.fields(IndexTrees)) |field| {
const Helper = IndexTreeFieldHelperType(field.name);
const old_index = Helper.derive_index(&old);
const old_index = Helper.derive_index(old);
const new_index = Helper.derive_index(new);

// Only update the indexes that change.
if (!std.meta.eql(old_index, new_index)) {
if (old_index) |index| {
const old_index_value = Helper.derive_value(&old, index);
const old_index_value = Helper.derive_value(old, index);
@field(groove.indexes, field.name).remove(&old_index_value);
}

Expand All @@ -926,6 +945,13 @@ pub fn GrooveType(
}
}
}

// Putting the objects_cache upsert after the index tree updates is critical:
// We diff the old and new objects, but the old object will be a pointer into the
// objects_cache. If we upsert first, there's a high chance old.* == new.* (always,
// unless old comes from the stash) and no secondary indexes will be updated!
groove.objects_cache.upsert(new);
groove.objects.put(new);
}

/// Asserts that the object with the given PrimaryKey exists.
Expand Down
27 changes: 17 additions & 10 deletions src/state_machine.zig
Original file line number Diff line number Diff line change
Expand Up @@ -876,8 +876,8 @@ pub fn StateMachineType(
dr_account_new.debits_posted += amount;
cr_account_new.credits_posted += amount;
}
self.forest.grooves.accounts.upsert(&dr_account_new);
self.forest.grooves.accounts.upsert(&cr_account_new);
self.forest.grooves.accounts.update(.{ .old = dr_account, .new = &dr_account_new });
self.forest.grooves.accounts.update(.{ .old = cr_account, .new = &cr_account_new });

self.commit_timestamp = t.timestamp;
return .ok;
Expand Down Expand Up @@ -1006,8 +1006,8 @@ pub fn StateMachineType(
cr_account_new.credits_posted += amount;
}

self.forest.grooves.accounts.upsert(&dr_account_new);
self.forest.grooves.accounts.upsert(&cr_account_new);
self.forest.grooves.accounts.update(.{ .old = dr_account, .new = &dr_account_new });
self.forest.grooves.accounts.update(.{ .old = cr_account, .new = &cr_account_new });

self.commit_timestamp = t.timestamp;
return .ok;
Expand Down Expand Up @@ -1398,12 +1398,19 @@ fn check(test_table: []const u8) !void {
.setup => |b| {
assert(operation == null);

var account = context.state_machine.forest.grooves.accounts.get(b.account).?.*;
account.debits_pending = b.debits_pending;
account.debits_posted = b.debits_posted;
account.credits_pending = b.credits_pending;
account.credits_posted = b.credits_posted;
context.state_machine.forest.grooves.accounts.upsert(&account);
var account = context.state_machine.forest.grooves.accounts.get(b.account).?;
var account_new = account.*;

account_new.debits_pending = b.debits_pending;
account_new.debits_posted = b.debits_posted;
account_new.credits_pending = b.credits_pending;
account_new.credits_posted = b.credits_posted;
if (!stdx.equal_bytes(Account, &account_new, account)) {
context.state_machine.forest.grooves.accounts.update(.{
.old = account,
.new = &account_new,
});
}
},

.tick => |ticks| {
Expand Down
154 changes: 152 additions & 2 deletions src/stdx.zig
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ else
/// - `T` doesn't have any non-deterministic padding,
/// - `T` doesn't embed any pointers.
pub fn equal_bytes(comptime T: type, a: *const T, b: *const T) bool {
comptime assert(std.meta.trait.hasUniqueRepresentation(T));
comptime assert(has_unique_representation(T));
comptime assert(!has_pointers(T));
comptime assert(@sizeOf(T) * 8 == @bitSizeOf(T));

Expand Down Expand Up @@ -339,7 +339,7 @@ test no_padding {
pub inline fn hash_inline(value: anytype) u64 {
comptime {
assert(no_padding(@TypeOf(value)));
assert(std.meta.trait.hasUniqueRepresentation(@TypeOf(value)));
assert(has_unique_representation(@TypeOf(value)));
}
return low_level_hash(0, switch (@typeInfo(@TypeOf(value))) {
.Struct, .Int => std.mem.asBytes(&value),
Expand Down Expand Up @@ -520,3 +520,153 @@ pub fn fstatfs(fd: i32, statfs_buf: *StatFs) usize {
@intFromPtr(statfs_buf),
);
}

// TODO(Zig): https://github.com/ziglang/zig/issues/17592.
/// True if every value of the type `T` has a unique bit pattern representing it.
/// In other words, `T` has no unused bits and no padding.
pub fn has_unique_representation(comptime T: type) bool {
switch (@typeInfo(T)) {
else => return false, // TODO can we know if it's true for some of these types ?

.AnyFrame,
.Enum,
.ErrorSet,
.Fn,
=> return true,

.Bool => return false,

.Int => |info| return @sizeOf(T) * 8 == info.bits,

.Pointer => |info| return info.size != .Slice,

.Array => |info| return comptime has_unique_representation(info.child),

.Struct => |info| {
// Only consider packed structs unique if they are byte aligned.
if (info.backing_integer) |backing_integer| {
return @sizeOf(T) * 8 == @bitSizeOf(backing_integer);
}

var sum_size = @as(usize, 0);

inline for (info.fields) |field| {
const FieldType = field.type;
if (comptime !has_unique_representation(FieldType)) return false;
sum_size += @sizeOf(FieldType);
}

return @sizeOf(T) == sum_size;
},

.Vector => |info| return comptime has_unique_representation(info.child) and
@sizeOf(T) == @sizeOf(info.child) * info.len,
}
}

// Test vectors mostly from upstream, with some added to test the packed struct case.
test "has_unique_representation" {
const TestStruct1 = struct {
a: u32,
b: u32,
};

try std.testing.expect(has_unique_representation(TestStruct1));

const TestStruct2 = struct {
a: u32,
b: u16,
};

try std.testing.expect(!has_unique_representation(TestStruct2));

const TestStruct3 = struct {
a: u32,
b: u32,
};

try std.testing.expect(has_unique_representation(TestStruct3));

const TestStruct4 = struct { a: []const u8 };

try std.testing.expect(!has_unique_representation(TestStruct4));

const TestStruct5 = struct { a: TestStruct4 };

try std.testing.expect(!has_unique_representation(TestStruct5));

const TestStruct6 = packed struct {
a: u32,
b: u31,
};

try std.testing.expect(!has_unique_representation(TestStruct6));

const TestStruct7 = struct {
a: u64,
b: TestStruct6,
};

try std.testing.expect(!has_unique_representation(TestStruct7));

const TestStruct8 = packed struct {
a: u32,
b: u32,
};

try std.testing.expect(has_unique_representation(TestStruct8));

const TestStruct9 = struct {
a: u64,
b: TestStruct8,
};

try std.testing.expect(has_unique_representation(TestStruct9));

const TestStruct10 = packed struct {
a: TestStruct8,
b: TestStruct8,
};

try std.testing.expect(has_unique_representation(TestStruct10));

const TestUnion1 = packed union {
a: u32,
b: u16,
};

try std.testing.expect(!has_unique_representation(TestUnion1));

const TestUnion2 = extern union {
a: u32,
b: u16,
};

try std.testing.expect(!has_unique_representation(TestUnion2));

const TestUnion3 = union {
a: u32,
b: u16,
};

try std.testing.expect(!has_unique_representation(TestUnion3));

const TestUnion4 = union(enum) {
a: u32,
b: u16,
};

try std.testing.expect(!has_unique_representation(TestUnion4));

inline for ([_]type{ i0, u8, i16, u32, i64 }) |T| {
try std.testing.expect(has_unique_representation(T));
}
inline for ([_]type{ i1, u9, i17, u33, i24 }) |T| {
try std.testing.expect(!has_unique_representation(T));
}

try std.testing.expect(!has_unique_representation([]u8));
try std.testing.expect(!has_unique_representation([]const u8));

try std.testing.expect(has_unique_representation(@Vector(4, u16)));
}
2 changes: 1 addition & 1 deletion src/testing/state_machine.zig
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ pub fn StateMachineType(
const thing = state_machine.forest.grooves.things.get(op);
assert(thing == null);

state_machine.forest.grooves.things.upsert(&.{
state_machine.forest.grooves.things.insert(&.{
.timestamp = timestamp,
.id = op,
.value = @as(u64, @truncate(vsr.checksum(input))),
Expand Down
5 changes: 5 additions & 0 deletions src/tidy.zig
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ fn banned(source: []const u8) ?[]const u8 {
if (std.mem.indexOf(u8, source, "std." ++ "BoundedArray") != null) {
return "use stdx." ++ "BoundedArray instead of std version";
}

if (std.mem.indexOf(u8, source, "trait." ++ "hasUniqueRepresentation") != null) {
return "use stdx." ++ "has_unique_representation instead of std version";
}

return null;
}

Expand Down

0 comments on commit f821719

Please sign in to comment.