-
Notifications
You must be signed in to change notification settings - Fork 376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LSM: Revert groove to use update + insert and vendor hasUniqueRepresentation #1239
Changes from 5 commits
9848228
66fab94
65f21d6
06bb128
c367d1d
0733fb7
e81a087
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -863,10 +863,12 @@ pub fn GrooveType( | |||||
} | ||||||
}; | ||||||
|
||||||
/// Insert the value into the objects tree and associated index trees, asserting that it doesn't | ||||||
/// already exist. | ||||||
/// Insert the value into the objects tree and associated index trees. It's up to the | ||||||
/// caller to ensure it doesn't already exist. | ||||||
pub fn insert(groove: *Groove, object: *const Object) void { | ||||||
assert(!groove.objects_cache.has(@field(object, primary_field))); | ||||||
if (constants.verify) { | ||||||
assert(!groove.objects_cache.has(@field(object, primary_field))); | ||||||
} | ||||||
|
||||||
groove.objects_cache.upsert(object); | ||||||
|
||||||
|
@@ -886,37 +888,55 @@ pub fn GrooveType( | |||||
} | ||||||
} | ||||||
|
||||||
/// Insert the value (or update it, if it exists). | ||||||
/// Update the value. Requires the old object to be provided. | ||||||
/// Update the object and index trees by diff'ing the old and new values. | ||||||
pub fn upsert(groove: *Groove, new: *const Object) void { | ||||||
// Explicit stack copy needed - otherwise old will == new! | ||||||
const old: Object = (groove.objects_cache.get(@field(new, primary_field)) orelse { | ||||||
return groove.insert(new); | ||||||
}).*; | ||||||
pub fn update( | ||||||
groove: *Groove, | ||||||
values: struct { old: *const Object, new: *const Object }, | ||||||
) void { | ||||||
const old = values.old; | ||||||
const new = values.new; | ||||||
|
||||||
assert(@field(old, primary_field) == @field(new, primary_field)); | ||||||
assert(old.timestamp == new.timestamp); | ||||||
if (constants.verify) { | ||||||
const old_from_cache = groove.objects_cache.get(@field(old, primary_field)).?; | ||||||
assert(@field(old_from_cache, primary_field) == @field(old, primary_field)); | ||||||
assert(old_from_cache.timestamp == old.timestamp); | ||||||
} | ||||||
|
||||||
groove.objects_cache.upsert(new); | ||||||
// Sanity check to ensure the caller didn't accidentally pass in an alias. | ||||||
assert(new != old); | ||||||
|
||||||
// The ID can't change, so no need to update the ID tree. | ||||||
if (has_id) { | ||||||
assert(old.id == new.id); | ||||||
} | ||||||
assert(old.timestamp == new.timestamp); | ||||||
|
||||||
// Update the object tree entry if any of the fields (even ignored) are different. | ||||||
if (!std.mem.eql(u8, std.mem.asBytes(&old), std.mem.asBytes(new))) { | ||||||
// Unlike the index trees, the new and old values in the object tree share the | ||||||
// same key. Therefore put() is sufficient to overwrite the old value. | ||||||
groove.objects.put(new); | ||||||
// The ID can't change, so no need to update the ID tree. Update the object tree entry | ||||||
// if any of the fields (even ignored) are different. We assume the caller will pass in | ||||||
// an object that has changes. | ||||||
// Unlike the index trees, the new and old values in the object tree share the same | ||||||
// key. Therefore put() is sufficient to overwrite the old value. | ||||||
if (constants.verify) { | ||||||
const tombstone = ObjectTreeHelpers(Object).tombstone; | ||||||
const key_from_value = ObjectTreeHelpers(Object).key_from_value; | ||||||
|
||||||
assert(!stdx.equal_bytes(Object, old, new)); | ||||||
assert(key_from_value(old) == key_from_value(new)); | ||||||
assert(!tombstone(old) and !tombstone(new)); | ||||||
} | ||||||
|
||||||
groove.objects.put(new); | ||||||
groove.objects_cache.upsert(new); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes my my Rust eye twitch a bit. Isn't this shared mutable aliasing? I think the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have an explicit guard against this above; since I've been bitten by this before here 😄:
I think that's enough to prevent it from happening? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I think that's different. The tigerbeetle/src/state_machine.zig Line 870 in c367d1d
the tigerbeetle/src/state_machine.zig Line 813 in c367d1d
So However, when we do Not 100% sure that this could happen, but I think it does? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahhh yes, that is the case. Good catch! In the past, I took an explicit stack copy of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that's ok, as long as we document that with a I wonder if we even can require that this is the object from the cache? It must be, if we are updating, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am also reeeeeally wondering why CI is green here 🤔 in But in debug, I expect the overwrite to be reflected robustly. Could you also take a look what's up with tests passing here? Either my theory about the overwrite is wrong (good case), or we somehow don't test this case at all (which would be pretty scary). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
As far as I can tell, we don't have any tests (unit or fuzz) that actually check the result of the secondary index trees, and in this case, if Also just noticed the forest fuzz (which I forgot to run / update in this PR) has also been broken for a few weeks. I need to look at proper infra for fuzzing ASAP, but maybe as a workaround for now we can just run each fuzz for a fixed time on CI? We previously did run some more fuzzes in CI, but with a fixed seed for a single iteration. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It could in theory be a stack copy, but we don't have an explicit use case for that yet so I've made the check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think that is fixed as part of #1234, which merged this morning. |
||||||
|
||||||
inline for (std.meta.fields(IndexTrees)) |field| { | ||||||
const Helper = IndexTreeFieldHelperType(field.name); | ||||||
const old_index = Helper.derive_index(&old); | ||||||
const old_index = Helper.derive_index(old); | ||||||
const new_index = Helper.derive_index(new); | ||||||
|
||||||
// Only update the indexes that change. | ||||||
if (!std.meta.eql(old_index, new_index)) { | ||||||
cb22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
if (old_index) |index| { | ||||||
const old_index_value = Helper.derive_value(&old, index); | ||||||
const old_index_value = Helper.derive_value(old, index); | ||||||
@field(groove.indexes, field.name).remove(&old_index_value); | ||||||
} | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -242,7 +242,7 @@ else | |
/// - `T` doesn't have any non-deterministic padding, | ||
/// - `T` doesn't embed any pointers. | ||
pub fn equal_bytes(comptime T: type, a: *const T, b: *const T) bool { | ||
comptime assert(std.meta.trait.hasUniqueRepresentation(T)); | ||
comptime assert(hasUniqueRepresentation(T)); | ||
comptime assert(!has_pointers(T)); | ||
comptime assert(@sizeOf(T) * 8 == @bitSizeOf(T)); | ||
|
||
|
@@ -339,7 +339,7 @@ test no_padding { | |
pub inline fn hash_inline(value: anytype) u64 { | ||
comptime { | ||
assert(no_padding(@TypeOf(value))); | ||
assert(std.meta.trait.hasUniqueRepresentation(@TypeOf(value))); | ||
assert(hasUniqueRepresentation(@TypeOf(value))); | ||
} | ||
return low_level_hash(0, switch (@typeInfo(@TypeOf(value))) { | ||
.Struct, .Int => std.mem.asBytes(&value), | ||
|
@@ -520,3 +520,46 @@ pub fn fstatfs(fd: i32, statfs_buf: *StatFs) usize { | |
@intFromPtr(statfs_buf), | ||
); | ||
} | ||
|
||
// TODO(Zig): https://github.com/ziglang/zig/issues/17592. | ||
/// True if every value of the type `T` has a unique bit pattern representing it. | ||
/// In other words, `T` has no unused bits and no padding. | ||
pub fn hasUniqueRepresentation(comptime T: type) bool { | ||
cb22 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
switch (@typeInfo(T)) { | ||
else => return false, // TODO can we know if it's true for some of these types ? | ||
|
||
.AnyFrame, | ||
.Enum, | ||
.ErrorSet, | ||
.Fn, | ||
=> return true, | ||
|
||
.Bool => return false, | ||
|
||
.Int => |info| return @sizeOf(T) * 8 == info.bits, | ||
|
||
.Pointer => |info| return info.size != .Slice, | ||
|
||
.Array => |info| return comptime hasUniqueRepresentation(info.child), | ||
|
||
.Struct => |info| { | ||
// Packed structs are always unique. | ||
if (info.backing_integer != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a struct is packed, we don't need to recursively check it like the unpacked case below, since Zig ensures packed structs can only contain other packed structsa. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add a couple of tests here though, just to sanity-check things. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, for a good measure, let's assert that the bitsize of all fields adds up to the sizeof the whole struct (that is, that we don't have padding bits). I am thinking about cases like const S = packed struct {
x: u24,
y: u3,
}; I think the language may work both as "the spare bits after |
||
return true; | ||
} | ||
|
||
var sum_size = @as(usize, 0); | ||
|
||
inline for (info.fields) |field| { | ||
const FieldType = field.type; | ||
if (comptime !hasUniqueRepresentation(FieldType)) return false; | ||
sum_size += @sizeOf(FieldType); | ||
} | ||
|
||
return @sizeOf(T) == sum_size; | ||
}, | ||
|
||
.Vector => |info| return comptime hasUniqueRepresentation(info.child) and | ||
@sizeOf(T) == @sizeOf(info.child) * info.len, | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you assert that
stdx.bytes_equal(Object, old, old_from_cache);
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm now aserting they're actually one and the same: #1239 (comment)