diff --git a/src/lsm/forest_fuzz.zig b/src/lsm/forest_fuzz.zig index 385aa3cec9..98f1a19358 100644 --- a/src/lsm/forest_fuzz.zig +++ b/src/lsm/forest_fuzz.zig @@ -283,13 +283,17 @@ const Environment = struct { } } - fn put_account(env: *Environment, a: *const Account) void { - env.forest.grooves.accounts.upsert(a); + fn put_account(env: *Environment, a: *const Account, maybe_old: ?*const Account) void { + if (maybe_old) |old| { + env.forest.grooves.accounts.update(.{ .old = old, .new = a }); + } else { + env.forest.grooves.accounts.insert(a); + } } - fn get_account(env: *Environment, id: u128) ?Account { + fn get_account(env: *Environment, id: u128) ?*const Account { const account = env.forest.grooves.accounts.get(id) orelse return null; - return account.*; + return account; } // The forest should behave like a simple key-value data-structure. @@ -441,8 +445,7 @@ const Environment = struct { env.forest.grooves.accounts.objects_cache.options.map_value_count_max; if (log_index % groove_map_value_count_max == 0) { - env.forest.grooves.accounts_immutable.objects_cache.compact(); - env.forest.grooves.accounts_mutable.objects_cache.compact(); + env.forest.grooves.accounts.objects_cache.compact(); } } } @@ -463,7 +466,8 @@ const Environment = struct { .put_account => |put| { // The forest requires prefetch before put. try env.prefetch_account(put.account.id); - env.put_account(&put.account); + const lsm_account = env.get_account(put.account.id); + env.put_account(&put.account, lsm_account); try model.put_account(&put.account, put.op); }, .get_account => |id| { diff --git a/src/lsm/groove.zig b/src/lsm/groove.zig index 959c20fb3d..46d2ca7b72 100644 --- a/src/lsm/groove.zig +++ b/src/lsm/groove.zig @@ -863,10 +863,12 @@ pub fn GrooveType( } }; - /// Insert the value into the objects tree and associated index trees, asserting that it doesn't - /// already exist. + /// Insert the value into the objects tree and associated index trees. It's up to the + /// caller to ensure it doesn't already exist. pub fn insert(groove: *Groove, object: *const Object) void { - assert(!groove.objects_cache.has(@field(object, primary_field))); + if (constants.verify) { + assert(!groove.objects_cache.has(@field(object, primary_field))); + } groove.objects_cache.upsert(object); @@ -886,37 +888,54 @@ pub fn GrooveType( } } - /// Insert the value (or update it, if it exists). + /// Update the value. Requires the old object to be provided. /// Update the object and index trees by diff'ing the old and new values. - pub fn upsert(groove: *Groove, new: *const Object) void { - // Explicit stack copy needed - otherwise old will == new! - const old: Object = (groove.objects_cache.get(@field(new, primary_field)) orelse { - return groove.insert(new); - }).*; + pub fn update( + groove: *Groove, + values: struct { old: *const Object, new: *const Object }, + ) void { + const old = values.old; + const new = values.new; - assert(@field(old, primary_field) == @field(new, primary_field)); - assert(old.timestamp == new.timestamp); + if (constants.verify) { + const old_from_cache = groove.objects_cache.get(@field(old, primary_field)).?; - groove.objects_cache.upsert(new); + // While all that's actually required is that the _contents_ of the old_from_cache + // and old objects are identical, in current usage they're always the same piece of + // memory. We'll assert that for now, and this can be weakened in future if + // required. + assert(old_from_cache == old); + } + + // Sanity check to ensure the caller didn't accidentally pass in an alias. + assert(new != old); - // The ID can't change, so no need to update the ID tree. + if (has_id) assert(old.id == new.id); + assert(old.timestamp == new.timestamp); - // Update the object tree entry if any of the fields (even ignored) are different. - if (!std.mem.eql(u8, std.mem.asBytes(&old), std.mem.asBytes(new))) { - // Unlike the index trees, the new and old values in the object tree share the - // same key. Therefore put() is sufficient to overwrite the old value. - groove.objects.put(new); + // The ID can't change, so no need to update the ID tree. Update the object tree entry + // if any of the fields (even ignored) are different. We assume the caller will pass in + // an object that has changes. + // Unlike the index trees, the new and old values in the object tree share the same + // key. Therefore put() is sufficient to overwrite the old value. + if (constants.verify) { + const tombstone = ObjectTreeHelpers(Object).tombstone; + const key_from_value = ObjectTreeHelpers(Object).key_from_value; + + assert(!stdx.equal_bytes(Object, old, new)); + assert(key_from_value(old) == key_from_value(new)); + assert(!tombstone(old) and !tombstone(new)); } inline for (std.meta.fields(IndexTrees)) |field| { const Helper = IndexTreeFieldHelperType(field.name); - const old_index = Helper.derive_index(&old); + const old_index = Helper.derive_index(old); const new_index = Helper.derive_index(new); // Only update the indexes that change. if (!std.meta.eql(old_index, new_index)) { if (old_index) |index| { - const old_index_value = Helper.derive_value(&old, index); + const old_index_value = Helper.derive_value(old, index); @field(groove.indexes, field.name).remove(&old_index_value); } @@ -926,6 +945,13 @@ pub fn GrooveType( } } } + + // Putting the objects_cache upsert after the index tree updates is critical: + // We diff the old and new objects, but the old object will be a pointer into the + // objects_cache. If we upsert first, there's a high chance old.* == new.* (always, + // unless old comes from the stash) and no secondary indexes will be updated! + groove.objects_cache.upsert(new); + groove.objects.put(new); } /// Asserts that the object with the given PrimaryKey exists. diff --git a/src/state_machine.zig b/src/state_machine.zig index 282ae4486d..70ffffc4fa 100644 --- a/src/state_machine.zig +++ b/src/state_machine.zig @@ -876,8 +876,8 @@ pub fn StateMachineType( dr_account_new.debits_posted += amount; cr_account_new.credits_posted += amount; } - self.forest.grooves.accounts.upsert(&dr_account_new); - self.forest.grooves.accounts.upsert(&cr_account_new); + self.forest.grooves.accounts.update(.{ .old = dr_account, .new = &dr_account_new }); + self.forest.grooves.accounts.update(.{ .old = cr_account, .new = &cr_account_new }); self.commit_timestamp = t.timestamp; return .ok; @@ -1006,8 +1006,8 @@ pub fn StateMachineType( cr_account_new.credits_posted += amount; } - self.forest.grooves.accounts.upsert(&dr_account_new); - self.forest.grooves.accounts.upsert(&cr_account_new); + self.forest.grooves.accounts.update(.{ .old = dr_account, .new = &dr_account_new }); + self.forest.grooves.accounts.update(.{ .old = cr_account, .new = &cr_account_new }); self.commit_timestamp = t.timestamp; return .ok; @@ -1398,12 +1398,19 @@ fn check(test_table: []const u8) !void { .setup => |b| { assert(operation == null); - var account = context.state_machine.forest.grooves.accounts.get(b.account).?.*; - account.debits_pending = b.debits_pending; - account.debits_posted = b.debits_posted; - account.credits_pending = b.credits_pending; - account.credits_posted = b.credits_posted; - context.state_machine.forest.grooves.accounts.upsert(&account); + var account = context.state_machine.forest.grooves.accounts.get(b.account).?; + var account_new = account.*; + + account_new.debits_pending = b.debits_pending; + account_new.debits_posted = b.debits_posted; + account_new.credits_pending = b.credits_pending; + account_new.credits_posted = b.credits_posted; + if (!stdx.equal_bytes(Account, &account_new, account)) { + context.state_machine.forest.grooves.accounts.update(.{ + .old = account, + .new = &account_new, + }); + } }, .tick => |ticks| { diff --git a/src/stdx.zig b/src/stdx.zig index c9b7c7b3c6..e7ae2ec155 100644 --- a/src/stdx.zig +++ b/src/stdx.zig @@ -242,7 +242,7 @@ else /// - `T` doesn't have any non-deterministic padding, /// - `T` doesn't embed any pointers. pub fn equal_bytes(comptime T: type, a: *const T, b: *const T) bool { - comptime assert(std.meta.trait.hasUniqueRepresentation(T)); + comptime assert(has_unique_representation(T)); comptime assert(!has_pointers(T)); comptime assert(@sizeOf(T) * 8 == @bitSizeOf(T)); @@ -339,7 +339,7 @@ test no_padding { pub inline fn hash_inline(value: anytype) u64 { comptime { assert(no_padding(@TypeOf(value))); - assert(std.meta.trait.hasUniqueRepresentation(@TypeOf(value))); + assert(has_unique_representation(@TypeOf(value))); } return low_level_hash(0, switch (@typeInfo(@TypeOf(value))) { .Struct, .Int => std.mem.asBytes(&value), @@ -520,3 +520,153 @@ pub fn fstatfs(fd: i32, statfs_buf: *StatFs) usize { @intFromPtr(statfs_buf), ); } + +// TODO(Zig): https://github.com/ziglang/zig/issues/17592. +/// True if every value of the type `T` has a unique bit pattern representing it. +/// In other words, `T` has no unused bits and no padding. +pub fn has_unique_representation(comptime T: type) bool { + switch (@typeInfo(T)) { + else => return false, // TODO can we know if it's true for some of these types ? + + .AnyFrame, + .Enum, + .ErrorSet, + .Fn, + => return true, + + .Bool => return false, + + .Int => |info| return @sizeOf(T) * 8 == info.bits, + + .Pointer => |info| return info.size != .Slice, + + .Array => |info| return comptime has_unique_representation(info.child), + + .Struct => |info| { + // Only consider packed structs unique if they are byte aligned. + if (info.backing_integer) |backing_integer| { + return @sizeOf(T) * 8 == @bitSizeOf(backing_integer); + } + + var sum_size = @as(usize, 0); + + inline for (info.fields) |field| { + const FieldType = field.type; + if (comptime !has_unique_representation(FieldType)) return false; + sum_size += @sizeOf(FieldType); + } + + return @sizeOf(T) == sum_size; + }, + + .Vector => |info| return comptime has_unique_representation(info.child) and + @sizeOf(T) == @sizeOf(info.child) * info.len, + } +} + +// Test vectors mostly from upstream, with some added to test the packed struct case. +test "has_unique_representation" { + const TestStruct1 = struct { + a: u32, + b: u32, + }; + + try std.testing.expect(has_unique_representation(TestStruct1)); + + const TestStruct2 = struct { + a: u32, + b: u16, + }; + + try std.testing.expect(!has_unique_representation(TestStruct2)); + + const TestStruct3 = struct { + a: u32, + b: u32, + }; + + try std.testing.expect(has_unique_representation(TestStruct3)); + + const TestStruct4 = struct { a: []const u8 }; + + try std.testing.expect(!has_unique_representation(TestStruct4)); + + const TestStruct5 = struct { a: TestStruct4 }; + + try std.testing.expect(!has_unique_representation(TestStruct5)); + + const TestStruct6 = packed struct { + a: u32, + b: u31, + }; + + try std.testing.expect(!has_unique_representation(TestStruct6)); + + const TestStruct7 = struct { + a: u64, + b: TestStruct6, + }; + + try std.testing.expect(!has_unique_representation(TestStruct7)); + + const TestStruct8 = packed struct { + a: u32, + b: u32, + }; + + try std.testing.expect(has_unique_representation(TestStruct8)); + + const TestStruct9 = struct { + a: u64, + b: TestStruct8, + }; + + try std.testing.expect(has_unique_representation(TestStruct9)); + + const TestStruct10 = packed struct { + a: TestStruct8, + b: TestStruct8, + }; + + try std.testing.expect(has_unique_representation(TestStruct10)); + + const TestUnion1 = packed union { + a: u32, + b: u16, + }; + + try std.testing.expect(!has_unique_representation(TestUnion1)); + + const TestUnion2 = extern union { + a: u32, + b: u16, + }; + + try std.testing.expect(!has_unique_representation(TestUnion2)); + + const TestUnion3 = union { + a: u32, + b: u16, + }; + + try std.testing.expect(!has_unique_representation(TestUnion3)); + + const TestUnion4 = union(enum) { + a: u32, + b: u16, + }; + + try std.testing.expect(!has_unique_representation(TestUnion4)); + + inline for ([_]type{ i0, u8, i16, u32, i64 }) |T| { + try std.testing.expect(has_unique_representation(T)); + } + inline for ([_]type{ i1, u9, i17, u33, i24 }) |T| { + try std.testing.expect(!has_unique_representation(T)); + } + + try std.testing.expect(!has_unique_representation([]u8)); + try std.testing.expect(!has_unique_representation([]const u8)); + + try std.testing.expect(has_unique_representation(@Vector(4, u16))); +} diff --git a/src/testing/state_machine.zig b/src/testing/state_machine.zig index 05bc040134..bede504870 100644 --- a/src/testing/state_machine.zig +++ b/src/testing/state_machine.zig @@ -167,7 +167,7 @@ pub fn StateMachineType( const thing = state_machine.forest.grooves.things.get(op); assert(thing == null); - state_machine.forest.grooves.things.upsert(&.{ + state_machine.forest.grooves.things.insert(&.{ .timestamp = timestamp, .id = op, .value = @as(u64, @truncate(vsr.checksum(input))), diff --git a/src/tidy.zig b/src/tidy.zig index 75e2f0d976..99ae89d0df 100644 --- a/src/tidy.zig +++ b/src/tidy.zig @@ -64,6 +64,11 @@ fn banned(source: []const u8) ?[]const u8 { if (std.mem.indexOf(u8, source, "std." ++ "BoundedArray") != null) { return "use stdx." ++ "BoundedArray instead of std version"; } + + if (std.mem.indexOf(u8, source, "trait." ++ "hasUniqueRepresentation") != null) { + return "use stdx." ++ "has_unique_representation instead of std version"; + } + return null; }