From 2266b60b81783e3d70a0ef30de76b2ca35f29528 Mon Sep 17 00:00:00 2001 From: JianHong Zhao Date: Mon, 10 Nov 2025 20:30:42 +0800 Subject: [PATCH 1/2] feat: tuple decoding with components (#826) - Solve #822 --------- Co-authored-by: automergerpr-permission-manager[bot] <190534181+automergerpr-permission-manager[bot]@users.noreply.github.com> --- kmir/src/kmir/decoding.py | 19 ++++- kmir/src/kmir/kast.py | 2 +- kmir/src/kmir/kdist/mir-semantics/kmir.md | 4 +- kmir/src/kmir/kdist/mir-semantics/rt/data.md | 2 +- .../kmir/kdist/mir-semantics/rt/decoding.md | 45 +++++++----- kmir/src/kmir/kdist/mir-semantics/rt/types.md | 2 +- kmir/src/kmir/kdist/mir-semantics/ty.md | 3 +- kmir/src/kmir/ty.py | 30 +++++++- .../data/decode-value/tuple-bool-u32.expected | 2 + .../data/decode-value/tuple-bool-u32.json | 64 +++++++++++++++++ .../data/decode-value/tuple-empty.expected | 1 + .../data/decode-value/tuple-empty.json | 30 ++++++++ .../decode-value/tuple-i16-bool-u8.expected | 3 + .../data/decode-value/tuple-i16-bool-u8.json | 72 +++++++++++++++++++ .../data/decode-value/tuple-u8-i32.expected | 2 + .../data/decode-value/tuple-u8-i32.json | 66 +++++++++++++++++ .../integration/data/decode-value/type-table | 3 +- .../show/arith.smir.cli-info.expected | 2 +- 18 files changed, 323 insertions(+), 29 deletions(-) create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-bool-u32.expected create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-bool-u32.json create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-empty.expected create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-empty.json create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.expected create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.json create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-u8-i32.expected create mode 100644 kmir/src/tests/integration/data/decode-value/tuple-u8-i32.json diff --git a/kmir/src/kmir/decoding.py b/kmir/src/kmir/decoding.py index 8171e0ea9..4d0dc95fb 100644 --- a/kmir/src/kmir/decoding.py +++ b/kmir/src/kmir/decoding.py @@ -191,8 +191,8 @@ def decode_value(data: bytes, type_info: TypeMetadata, types: Mapping[Ty, TypeMe return _decode_array(data, elem_ty, length, types) case StructT(fields=fields, layout=layout): return _decode_struct(data=data, fields=fields, layout=layout, types=types) - case TupleT(components=components): - return _decode_tuple(data=data, component_tys=components, types=types) + case TupleT(components=components, layout=layout): + return _decode_tuple(data=data, component_tys=components, layout=layout, types=types) case EnumT( discriminants=discriminants, fields=fields, @@ -289,6 +289,7 @@ def _decode_tuple( *, data: bytes, component_tys: list[Ty], + layout: LayoutShape | None, types: Mapping[Ty, TypeMetadata], ) -> Value: if not component_tys: @@ -296,7 +297,19 @@ def _decode_tuple( raise ValueError(f'Zero-sized tuple expected empty data, got: {data!r}') return AggregateValue(0, []) - raise ValueError('Tuple decoding with components is not implemented yet') + if not layout: + raise ValueError('Tuple layout not provided') + + offsets = _extract_offsets(layout.fields) + + match layout.variants: + case Single(index=0): + pass + case _: + raise ValueError(f'Unexpected layout variants in tuple: {layout.variants}') + + field_values = _decode_fields(data=data, tys=component_tys, offsets=offsets, types=types) + return AggregateValue(0, field_values) def _decode_enum( diff --git a/kmir/src/kmir/kast.py b/kmir/src/kmir/kast.py index ed6b128f0..7268cf755 100644 --- a/kmir/src/kmir/kast.py +++ b/kmir/src/kmir/kast.py @@ -428,7 +428,7 @@ def _symbolic_value(self, ty: Ty, mutable: bool) -> tuple[KInner, Iterable[KInne ), ) - case TupleT(components): + case TupleT(components=components): elem_vars = [] elem_constraints = [] for _ty in components: diff --git a/kmir/src/kmir/kdist/mir-semantics/kmir.md b/kmir/src/kmir/kdist/mir-semantics/kmir.md index 991777e2d..cedc1382d 100644 --- a/kmir/src/kmir/kdist/mir-semantics/kmir.md +++ b/kmir/src/kmir/kdist/mir-semantics/kmir.md @@ -489,8 +489,8 @@ Therefore a heuristics is used here: syntax Bool ::= isTupleType ( TypeInfo ) [function, total] | isRefType ( TypeInfo ) [function, total] // ------------------------------------------------------- - rule isTupleType(typeInfoTupleType(_)) => true - rule isTupleType( _ ) => false [owise] + rule isTupleType(typeInfoTupleType(_, _)) => true + rule isTupleType( _ ) => false [owise] rule isRefType(typeInfoRefType(_)) => true rule isRefType( _ ) => false [owise] diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/data.md b/kmir/src/kmir/kdist/mir-semantics/rt/data.md index 5b1e489fd..a3d30ac5d 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/data.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/data.md @@ -1554,7 +1554,7 @@ Zero-sized types can be decoded trivially into their respective representation. rule #decodeConstant(constantKindZeroSized, _TY, typeInfoStructType(_, _, _, _)) => Aggregate(variantIdx(0), .List) ... // zero-sized tuple - rule #decodeConstant(constantKindZeroSized, _TY, typeInfoTupleType(_)) + rule #decodeConstant(constantKindZeroSized, _TY, typeInfoTupleType(_, _)) => Aggregate(variantIdx(0), .List) ... // zero-sized array rule #decodeConstant(constantKindZeroSized, _TY, typeInfoArrayType(_, _)) diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md b/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md index 77cc5bbd5..522df0846 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md @@ -86,10 +86,21 @@ When using layout offsets we always return fields in declaration order within th // --------------------------------------------------------------------------- // Use the offsets when they are provided and the input length is sufficient. rule #decodeValue(BYTES, typeInfoStructType(_, _, TYS, LAYOUT)) - => Aggregate(variantIdx(0), #decodeStructFieldsWithOffsets(BYTES, TYS, #structOffsets(LAYOUT))) - requires #structOffsets(LAYOUT) =/=K .MachineSizes - andBool 0 <=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT)) - andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT)) + => Aggregate(variantIdx(0), #decodeFieldsWithOffsets(BYTES, TYS, #layoutOffsets(LAYOUT))) + requires #layoutOffsets(LAYOUT) =/=K .MachineSizes + andBool 0 <=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT)) + andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT)) + [preserves-definedness] + +rule #decodeValue(BYTES, typeInfoTupleType(.Tys, _)) + => Aggregate(variantIdx(0), .List) + requires lengthBytes(BYTES) ==Int 0 + +rule #decodeValue(BYTES, typeInfoTupleType(TYS, LAYOUT)) + => Aggregate(variantIdx(0), #decodeFieldsWithOffsets(BYTES, TYS, #layoutOffsets(LAYOUT))) + requires #layoutOffsets(LAYOUT) =/=K .MachineSizes + andBool 0 <=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT)) + andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT)) [preserves-definedness] // --------------------------------------------------------------------------- @@ -101,10 +112,10 @@ rule #msBytes(machineSize(mirInt(NBITS))) => NBITS /Int 8 [preserves-definedness rule #msBytes(machineSize(NBITS)) => NBITS /Int 8 [owise, preserves-definedness] // Extract field offsets from the struct layout when available (Arbitrary only). -syntax MachineSizes ::= #structOffsets ( MaybeLayoutShape ) [function, total] -rule #structOffsets(someLayoutShape(layoutShape(fieldsShapeArbitrary(mk(OFFSETS)), _, _, _, _))) => OFFSETS -rule #structOffsets(noLayoutShape) => .MachineSizes -rule #structOffsets(_) => .MachineSizes [owise] +syntax MachineSizes ::= #layoutOffsets ( MaybeLayoutShape ) [function, total] +rule #layoutOffsets(someLayoutShape(layoutShape(fieldsShapeArbitrary(mk(OFFSETS)), _, _, _, _))) => OFFSETS +rule #layoutOffsets(noLayoutShape) => .MachineSizes +rule #layoutOffsets(_) => .MachineSizes [owise] // Minimum number of input bytes required to decode all fields by the chosen offsets. // Uses builtin maxInt to compute max(offset + size). The lists of types and @@ -118,17 +129,17 @@ rule #neededBytesForOffsets(TY TYS, OFFSET OFFSETS) rule #neededBytesForOffsets(_, _) => -1 [owise] // Decode each field at its byte offset and return values in declaration order. -syntax List ::= #decodeStructFieldsWithOffsets ( Bytes , Tys , MachineSizes ) [function, total] -rule #decodeStructFieldsWithOffsets(_, .Tys, _OFFSETS) => .List -rule #decodeStructFieldsWithOffsets(_, _TYS, .MachineSizes) => .List [owise] -rule #decodeStructFieldsWithOffsets(BYTES, TY TYS, OFFSET OFFSETS) +syntax List ::= #decodeFieldsWithOffsets ( Bytes , Tys , MachineSizes ) [function, total] +rule #decodeFieldsWithOffsets(_, .Tys, _OFFSETS) => .List +rule #decodeFieldsWithOffsets(_, _TYS, .MachineSizes) => .List [owise] +rule #decodeFieldsWithOffsets(BYTES, TY TYS, OFFSET OFFSETS) => ListItem( #decodeValue( substrBytes(BYTES, #msBytes(OFFSET), #msBytes(OFFSET) +Int #elemSize(lookupTy(TY))), lookupTy(TY) ) ) - #decodeStructFieldsWithOffsets(BYTES, TYS, OFFSETS) + #decodeFieldsWithOffsets(BYTES, TYS, OFFSETS) requires lengthBytes(BYTES) >=Int (#msBytes(OFFSET) +Int #elemSize(lookupTy(TY))) [preserves-definedness] ``` @@ -182,10 +193,10 @@ Known element sizes for common types: [owise] // ---- Tuples ---- - // Without layout, approximate as sum of element sizes (ignores padding). - rule #elemSize(typeInfoTupleType(.Tys)) => 0 - rule #elemSize(typeInfoTupleType(TY TYS)) - => #elemSize(lookupTy(TY)) +Int #elemSize(typeInfoTupleType(TYS)) + rule #elemSize(typeInfoTupleType(_TYS, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE) + rule #elemSize(typeInfoTupleType(.Tys, noLayoutShape)) => 0 + rule #elemSize(typeInfoTupleType(TY TYS, noLayoutShape)) + => #elemSize(lookupTy(TY)) +Int #elemSize(typeInfoTupleType(TYS, noLayoutShape)) // ---- Structs and Enums with layout ---- rule #elemSize(typeInfoStructType(_, _, _, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE) diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/types.md b/kmir/src/kmir/kdist/mir-semantics/rt/types.md index 5f6e92561..f0a7c333e 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/types.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/types.md @@ -148,7 +148,7 @@ Slices, `str`s and dynamic types require it, and any `Ty` that `is_sized` does ```k syntax Bool ::= #zeroSizedType ( TypeInfo ) [function, total] - rule #zeroSizedType(typeInfoTupleType(.Tys)) => true + rule #zeroSizedType(typeInfoTupleType(.Tys, _)) => true rule #zeroSizedType(typeInfoStructType(_, _, .Tys, _)) => true rule #zeroSizedType(typeInfoVoidType) => true // FIXME: Only unit tuples, empty structs, and void are recognized here; other diff --git a/kmir/src/kmir/kdist/mir-semantics/ty.md b/kmir/src/kmir/kdist/mir-semantics/ty.md index 9defdbb6e..ec3dc1411 100644 --- a/kmir/src/kmir/kdist/mir-semantics/ty.md +++ b/kmir/src/kmir/kdist/mir-semantics/ty.md @@ -274,7 +274,8 @@ syntax ExistentialPredicateBinders ::= List {ExistentialPredicateBinder, ""} | typeInfoArrayType(Ty, MaybeTyConst) [symbol(TypeInfo::ArrayType) , group(mir-enum---elem-type--size)] | typeInfoPtrType(Ty) [symbol(TypeInfo::PtrType) , group(mir-enum---pointee-type)] | typeInfoRefType(Ty) [symbol(TypeInfo::RefType) , group(mir-enum---pointee-type)] - | typeInfoTupleType(Tys) [symbol(TypeInfo::TupleType) , group(mir-enum---types)] + | typeInfoTupleType(types: Tys, layout: MaybeLayoutShape) + [symbol(TypeInfo::TupleType) , group(mir-enum---types--layout)] | typeInfoFunType(MIRString) [symbol(TypeInfo::FunType) , group(mir-enum)] | "typeInfoVoidType" [symbol(TypeInfo::VoidType) , group(mir-enum)] diff --git a/kmir/src/kmir/ty.py b/kmir/src/kmir/ty.py index b8978db82..338d34753 100644 --- a/kmir/src/kmir/ty.py +++ b/kmir/src/kmir/ty.py @@ -620,19 +620,47 @@ def from_raw(data: Any) -> RefT: @dataclass class TupleT(TypeMetadata): components: list[Ty] + layout: LayoutShape | None @staticmethod def from_raw(data: Any) -> TupleT: match data: + case { + 'TupleType': { + 'types': types, + 'layout': layout, + } + }: + return TupleT( + components=list(types), + layout=LayoutShape.from_raw(layout) if layout is not None else None, + ) case { 'TupleType': { 'types': types, } }: - return TupleT(list(types)) + return TupleT( + components=list(types), + layout=None, + ) case _: raise _cannot_parse_as('TupleT', data) + def nbytes(self, types: Mapping[Ty, TypeMetadata]) -> int: + match self.layout: + case None: + total = 0 + for component in self.components: + try: + component_info = types[component] + except KeyError as err: + raise ValueError(f'Unknown tuple component type: {component}') from err + total += component_info.nbytes(types) + return total + case LayoutShape(size=size): + return size.in_bytes + @dataclass class FunT(TypeMetadata): diff --git a/kmir/src/tests/integration/data/decode-value/tuple-bool-u32.expected b/kmir/src/tests/integration/data/decode-value/tuple-bool-u32.expected new file mode 100644 index 000000000..93dcdd749 --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-bool-u32.expected @@ -0,0 +1,2 @@ +Aggregate ( variantIdx ( 0 ) , ListItem ( BoolVal ( true ) ) +ListItem ( Integer ( 305419896 , 32 , false ) ) ) diff --git a/kmir/src/tests/integration/data/decode-value/tuple-bool-u32.json b/kmir/src/tests/integration/data/decode-value/tuple-bool-u32.json new file mode 100644 index 000000000..85653d655 --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-bool-u32.json @@ -0,0 +1,64 @@ +{ + "bytes": [ + 1, + 0, + 0, + 0, + 120, + 86, + 52, + 18 + ], + "types": [ + [ + 1, + { + "PrimitiveType": "Bool" + } + ], + [ + 4, + { + "PrimitiveType": { + "Uint": "U32" + } + } + ] + ], + "typeInfo": { + "TupleType": { + "types": [ + 1, + 4 + ], + "layout": { + "fields": { + "Arbitrary": { + "offsets": [ + { + "num_bits": 0 + }, + { + "num_bits": 32 + } + ] + } + }, + "variants": { + "Single": { + "index": 0 + } + }, + "abi": { + "Aggregate": { + "sized": true + } + }, + "abi_align": 4, + "size": { + "num_bits": 64 + } + } + } + } +} diff --git a/kmir/src/tests/integration/data/decode-value/tuple-empty.expected b/kmir/src/tests/integration/data/decode-value/tuple-empty.expected new file mode 100644 index 000000000..56b70ccf6 --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-empty.expected @@ -0,0 +1 @@ +Aggregate ( variantIdx ( 0 ) , .List ) diff --git a/kmir/src/tests/integration/data/decode-value/tuple-empty.json b/kmir/src/tests/integration/data/decode-value/tuple-empty.json new file mode 100644 index 000000000..5bd56c711 --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-empty.json @@ -0,0 +1,30 @@ +{ + "bytes": [], + "types": [], + "typeInfo": { + "TupleType": { + "types": [], + "layout": { + "fields": { + "Arbitrary": { + "offsets": [] + } + }, + "variants": { + "Single": { + "index": 0 + } + }, + "abi": { + "Aggregate": { + "sized": true + } + }, + "abi_align": 1, + "size": { + "num_bits": 0 + } + } + } + } +} diff --git a/kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.expected b/kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.expected new file mode 100644 index 000000000..e7a24beb9 --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.expected @@ -0,0 +1,3 @@ +Aggregate ( variantIdx ( 0 ) , ListItem ( Integer ( 4660 , 16 , true ) ) +ListItem ( BoolVal ( true ) ) +ListItem ( Integer ( 254 , 8 , false ) ) ) diff --git a/kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.json b/kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.json new file mode 100644 index 000000000..121fbf5b9 --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-i16-bool-u8.json @@ -0,0 +1,72 @@ +{ + "bytes": [ + 52, + 18, + 1, + 254 + ], + "types": [ + [ + 0, + { + "PrimitiveType": { + "Uint": "U8" + } + } + ], + [ + 1, + { + "PrimitiveType": "Bool" + } + ], + [ + 2, + { + "PrimitiveType": { + "Int": "I16" + } + } + ] + ], + "typeInfo": { + "TupleType": { + "types": [ + 2, + 1, + 0 + ], + "layout": { + "fields": { + "Arbitrary": { + "offsets": [ + { + "num_bits": 0 + }, + { + "num_bits": 16 + }, + { + "num_bits": 24 + } + ] + } + }, + "variants": { + "Single": { + "index": 0 + } + }, + "abi": { + "Aggregate": { + "sized": true + } + }, + "abi_align": 1, + "size": { + "num_bits": 32 + } + } + } + } +} diff --git a/kmir/src/tests/integration/data/decode-value/tuple-u8-i32.expected b/kmir/src/tests/integration/data/decode-value/tuple-u8-i32.expected new file mode 100644 index 000000000..17e65ddac --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-u8-i32.expected @@ -0,0 +1,2 @@ +Aggregate ( variantIdx ( 0 ) , ListItem ( Integer ( 171 , 8 , false ) ) +ListItem ( Integer ( 305419896 , 32 , true ) ) ) diff --git a/kmir/src/tests/integration/data/decode-value/tuple-u8-i32.json b/kmir/src/tests/integration/data/decode-value/tuple-u8-i32.json new file mode 100644 index 000000000..2b4499a1f --- /dev/null +++ b/kmir/src/tests/integration/data/decode-value/tuple-u8-i32.json @@ -0,0 +1,66 @@ +{ + "bytes": [ + 171, + 0, + 0, + 0, + 120, + 86, + 52, + 18 + ], + "types": [ + [ + 0, + { + "PrimitiveType": { + "Uint": "U8" + } + } + ], + [ + 3, + { + "PrimitiveType": { + "Int": "I32" + } + } + ] + ], + "typeInfo": { + "TupleType": { + "types": [ + 0, + 3 + ], + "layout": { + "fields": { + "Arbitrary": { + "offsets": [ + { + "num_bits": 0 + }, + { + "num_bits": 32 + } + ] + } + }, + "variants": { + "Single": { + "index": 0 + } + }, + "abi": { + "Aggregate": { + "sized": true + } + }, + "abi_align": 4, + "size": { + "num_bits": 64 + } + } + } + } +} diff --git a/kmir/src/tests/integration/data/decode-value/type-table b/kmir/src/tests/integration/data/decode-value/type-table index 40db1261c..01240bed6 100644 --- a/kmir/src/tests/integration/data/decode-value/type-table +++ b/kmir/src/tests/integration/data/decode-value/type-table @@ -2,5 +2,6 @@ [0, { "PrimitiveType": { "Uint": "U8"}}], [1, { "PrimitiveType": "Bool"}], [2, { "PrimitiveType": { "Int": "I16"}}], - [3, { "PrimitiveType": { "Int": "I32"}}] + [3, { "PrimitiveType": { "Int": "I32"}}], + [4, { "PrimitiveType": { "Uint": "U32"}}] ] diff --git a/kmir/src/tests/integration/data/prove-rs/show/arith.smir.cli-info.expected b/kmir/src/tests/integration/data/prove-rs/show/arith.smir.cli-info.expected index 7435e195a..58b249cf0 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/arith.smir.cli-info.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/arith.smir.cli-info.expected @@ -1,5 +1,5 @@ Types requested: (1, 5, 6) -Type 1: TupleT(components=[]) +Type 1: TupleT(components=[], layout=LayoutShape(fields=ArbitraryFields(offsets=[]), variants=Single(index=0), abi=ValueAbi(), abi_align=1, size=MachineSize(num_bits=0))) Type 5: RefT(pointee_type=32) Type 6: IntT(info=) \ No newline at end of file From 8ef12df346de8840aa1fbca711bd85e9f6a2ba03 Mon Sep 17 00:00:00 2001 From: Daniel Cumming <124537596+dkcumming@users.noreply.github.com> Date: Tue, 11 Nov 2025 12:36:27 +1000 Subject: [PATCH 2/2] NO OP rules for compiler hint intrinsics (#828) Some instrinsics do not affect the semantics of the compiled program but are hints to the compiler. In our semantics these intrinsics are NO OPs where the call is ignored and current continuation (`` cell) moves onto the next term in the sequence. In particular there are rules implemented for these intrinsics: - `cold_path` - `prefetch_read_data` - `prefetch_write_data` - `prefetch_read_instruction` - `prefetch_write_instruction` By handling `cold_path` we also support `likely` and `unlikely` which are `MonoItemFn` that call the `cold_path` instrinic. This PR also refactors the intrinsics into their only file `intrinsics.md`. closes #802 --------- Co-authored-by: Jost Berthold --- .../kmir/kdist/mir-semantics/intrinsics.md | 107 ++++++++++++++++++ kmir/src/kmir/kdist/mir-semantics/kmir.md | 71 +----------- ...ithmetic-fail.rs => checked_arithmetic.rs} | 0 .../integration/data/prove-rs/intrinsics.rs | 29 +++++ .../data/prove-rs/raw-ptr-cast-fail.rs | 10 ++ ...d_arithmetic-fail.checked_add_i32.expected | 39 ------- .../show/raw-ptr-cast-fail.main.expected | 40 +++++++ .../src/tests/integration/test_integration.py | 1 + 8 files changed, 189 insertions(+), 108 deletions(-) create mode 100644 kmir/src/kmir/kdist/mir-semantics/intrinsics.md rename kmir/src/tests/integration/data/prove-rs/{checked_arithmetic-fail.rs => checked_arithmetic.rs} (100%) create mode 100644 kmir/src/tests/integration/data/prove-rs/intrinsics.rs create mode 100644 kmir/src/tests/integration/data/prove-rs/raw-ptr-cast-fail.rs delete mode 100644 kmir/src/tests/integration/data/prove-rs/show/checked_arithmetic-fail.checked_add_i32.expected create mode 100644 kmir/src/tests/integration/data/prove-rs/show/raw-ptr-cast-fail.main.expected diff --git a/kmir/src/kmir/kdist/mir-semantics/intrinsics.md b/kmir/src/kmir/kdist/mir-semantics/intrinsics.md new file mode 100644 index 000000000..f95c90d6a --- /dev/null +++ b/kmir/src/kmir/kdist/mir-semantics/intrinsics.md @@ -0,0 +1,107 @@ +# Rust Intrinsic Functions in K + +```k +// This looks like a circular import but only module KMIR in kmir.md imports KMIR-INTRINSICS +requires "kmir.md" + +module KMIR-INTRINSICS + imports KMIR-CONTROL-FLOW +``` + +### Intrinsic Functions + +Intrinsic functions are built-in functions provided by the compiler that don't have regular MIR bodies. +They are handled specially in the execution semantics through the `#execIntrinsic` mechanism. +When an intrinsic function is called, the execution bypasses the normal function call setup and directly +executes the intrinsic-specific logic. + +#### Black Box (`std::hint::black_box`) + +The `black_box` intrinsic serves as an optimization barrier, preventing the compiler from making assumptions +about the value passed through it. In the semantics, it acts as an identity function that simply passes +its argument to the destination without modification. + +```k + // Black box intrinsic implementation - identity function + rule #execIntrinsic(IntrinsicFunction(symbol("black_box")), ARG:Operand .Operands, DEST) + => #setLocalValue(DEST, ARG) + ... +``` + +#### Cold Path (`std::hint::cold_path`) + +The `cold_path` intrinsic is a compiler hint indicating that the current execution path is unlikely to be taken. +It provides metadata for the optimiser and code generator to improve layout and branch predicition but is +a NO OP for program semantics. `std::intrinsics::likely` and `std::intrinsics::unlikely` are +"normal" `MonoItemFn`s that call the `cold_path` intrinsic. + +```k + rule #execIntrinsic(IntrinsicFunction(symbol("cold_path")), .Operands, _DEST) => .K ... +``` + +#### Prefetch (`std::intrinsics::prefetch_*`) + +The `prefetch_read_data`, `prefetch_write_data`, `prefetch_read_instruction`, and `prefetch_write_instruction` +intrinsics in Rust are performance hints that request the CPU to load or prepare a memory address in cache +before it's used. They have no effect on program semantics, and are implemented as a NO OP in this semantics. + +```k + rule #execIntrinsic(IntrinsicFunction(symbol("prefetch_read_data")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... + rule #execIntrinsic(IntrinsicFunction(symbol("prefetch_write_data")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... + + rule #execIntrinsic(IntrinsicFunction(symbol("prefetch_read_instruction")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... + rule #execIntrinsic(IntrinsicFunction(symbol("prefetch_write_instruction")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... +``` + +#### Raw Eq (`std::intrinsics::raw_eq`) + +The `raw_eq` intrinsic performs byte-by-byte equality comparison of the memory contents pointed to by two references. +It returns a boolean value indicating whether the referenced values are equal. The implementation dereferences the +provided references to access the underlying values, then compares them using K's built-in equality operator. + +**Type Safety:** +The implementation requires operands to have identical types (`TY1 ==K TY2`) before performing the comparison. +Execution gets stuck (no matching rule) when operands have different types or unknown type information. + +```k + // Raw eq: dereference operands, extract types, and delegate to typed comparison + rule #execIntrinsic(IntrinsicFunction(symbol("raw_eq")), ARG1:Operand ARG2:Operand .Operands, PLACE) + => #execRawEqTyped(PLACE, #withDeref(ARG1), #extractOperandType(#withDeref(ARG1), LOCALS), + #withDeref(ARG2), #extractOperandType(#withDeref(ARG2), LOCALS)) + ... + LOCALS + + // Compare values only if types are identical + syntax KItem ::= #execRawEqTyped(Place, Evaluation, MaybeTy, Evaluation, MaybeTy) [seqstrict(2,4)] + rule #execRawEqTyped(DEST, VAL1:Value, TY1:Ty, VAL2:Value, TY2:Ty) + => #setLocalValue(DEST, BoolVal(VAL1 ==K VAL2)) + ... + requires TY1 ==K TY2 + [preserves-definedness] + + // Add deref projection to operands + syntax Operand ::= #withDeref(Operand) [function, total] + rule #withDeref(operandCopy(place(LOCAL, PROJ))) + => operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems))) + rule #withDeref(operandMove(place(LOCAL, PROJ))) + => operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems))) + // must not overwrite the value, just the reference is moved! + rule #withDeref(OP) => OP [owise] + + // Extract type from operands (locals with projections, constants, fallback to unknown) + syntax MaybeTy ::= #extractOperandType(Operand, List) [function, total] + rule #extractOperandType(operandCopy(place(local(I), PROJS)), LOCALS) + => getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS) + requires 0 <=Int I andBool I getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS) + requires 0 <=Int I andBool I TY + rule #extractOperandType(_, _) => TyUnknown [owise] +``` + +```k +endmodule +``` diff --git a/kmir/src/kmir/kdist/mir-semantics/kmir.md b/kmir/src/kmir/kdist/mir-semantics/kmir.md index cedc1382d..8e805eb05 100644 --- a/kmir/src/kmir/kdist/mir-semantics/kmir.md +++ b/kmir/src/kmir/kdist/mir-semantics/kmir.md @@ -6,6 +6,7 @@ requires "rt/data.md" requires "rt/configuration.md" requires "lemmas/kmir-lemmas.md" requires "cheatcodes.md" +requires "intrinsics.md" ``` ## Syntax of MIR in K @@ -560,75 +561,6 @@ Drops are elaborated to Noops but still define the continuing control flow. Unre ``` -### Intrinsic Functions - -Intrinsic functions are built-in functions provided by the compiler that don't have regular MIR bodies. -They are handled specially in the execution semantics through the `#execIntrinsic` mechanism. -When an intrinsic function is called, the execution bypasses the normal function call setup and directly -executes the intrinsic-specific logic. - -#### Black Box (`std::hint::black_box`) - -The `black_box` intrinsic serves as an optimization barrier, preventing the compiler from making assumptions -about the value passed through it. In the semantics, it acts as an identity function that simply passes -its argument to the destination without modification. - -```k - // Black box intrinsic implementation - identity function - rule #execIntrinsic(IntrinsicFunction(symbol("black_box")), ARG:Operand .Operands, DEST) - => #setLocalValue(DEST, ARG) - ... -``` - -#### Raw Eq (`std::intrinsics::raw_eq`) - -The `raw_eq` intrinsic performs byte-by-byte equality comparison of the memory contents pointed to by two references. -It returns a boolean value indicating whether the referenced values are equal. The implementation dereferences the -provided references to access the underlying values, then compares them using K's built-in equality operator. - -**Type Safety:** -The implementation requires operands to have identical types (`TY1 ==K TY2`) before performing the comparison. -Execution gets stuck (no matching rule) when operands have different types or unknown type information. - -```k - // Raw eq: dereference operands, extract types, and delegate to typed comparison - rule #execIntrinsic(IntrinsicFunction(symbol("raw_eq")), ARG1:Operand ARG2:Operand .Operands, PLACE) - => #execRawEqTyped(PLACE, #withDeref(ARG1), #extractOperandType(#withDeref(ARG1), LOCALS), - #withDeref(ARG2), #extractOperandType(#withDeref(ARG2), LOCALS)) - ... - LOCALS - - // Compare values only if types are identical - syntax KItem ::= #execRawEqTyped(Place, Evaluation, MaybeTy, Evaluation, MaybeTy) [seqstrict(2,4)] - rule #execRawEqTyped(DEST, VAL1:Value, TY1:Ty, VAL2:Value, TY2:Ty) - => #setLocalValue(DEST, BoolVal(VAL1 ==K VAL2)) - ... - requires TY1 ==K TY2 - [preserves-definedness] - - // Add deref projection to operands - syntax Operand ::= #withDeref(Operand) [function, total] - rule #withDeref(operandCopy(place(LOCAL, PROJ))) - => operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems))) - rule #withDeref(operandMove(place(LOCAL, PROJ))) - => operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems))) - // must not overwrite the value, just the reference is moved! - rule #withDeref(OP) => OP [owise] - - // Extract type from operands (locals with projections, constants, fallback to unknown) - syntax MaybeTy ::= #extractOperandType(Operand, List) [function, total] - rule #extractOperandType(operandCopy(place(local(I), PROJS)), LOCALS) - => getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS) - requires 0 <=Int I andBool I getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS) - requires 0 <=Int I andBool I TY - rule #extractOperandType(_, _) => TyUnknown [owise] -``` - ### Stopping on Program Errors The semantics has a dedicated error sort to stop execution when flawed input or undefined behaviour is detected. @@ -656,5 +588,6 @@ module KMIR imports KMIR-AST // Necessary for the external Python parser imports KMIR-CONTROL-FLOW imports KMIR-CHEATCODES + imports KMIR-INTRINSICS imports KMIR-LEMMAS endmodule diff --git a/kmir/src/tests/integration/data/prove-rs/checked_arithmetic-fail.rs b/kmir/src/tests/integration/data/prove-rs/checked_arithmetic.rs similarity index 100% rename from kmir/src/tests/integration/data/prove-rs/checked_arithmetic-fail.rs rename to kmir/src/tests/integration/data/prove-rs/checked_arithmetic.rs diff --git a/kmir/src/tests/integration/data/prove-rs/intrinsics.rs b/kmir/src/tests/integration/data/prove-rs/intrinsics.rs new file mode 100644 index 000000000..fb8a2d0c1 --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/intrinsics.rs @@ -0,0 +1,29 @@ +#![feature(core_intrinsics)] +use std::intrinsics; + +fn main() { + intrinsics::cold_path(); + let b = true; + intrinsics::likely(b); + intrinsics::unlikely(b); + prefetch(); +} + +fn prefetch() { + let mut data = 11; + let ptr = &data as *const i32; + + unsafe { + intrinsics::prefetch_read_instruction::(ptr, 3); + intrinsics::prefetch_read_data::(ptr, 3); + assert_eq!(11, *ptr); + } + + let ptr_mut = &mut data as *mut i32; + unsafe { + intrinsics::prefetch_write_instruction::(ptr_mut, 3); + intrinsics::prefetch_write_data::(ptr_mut, 3); + // (*ptr_mut) = 44; + // assert_eq!(44, *ptr); // FIXME: fails due to thunks on casts + } +} diff --git a/kmir/src/tests/integration/data/prove-rs/raw-ptr-cast-fail.rs b/kmir/src/tests/integration/data/prove-rs/raw-ptr-cast-fail.rs new file mode 100644 index 000000000..cf098c9c3 --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/raw-ptr-cast-fail.rs @@ -0,0 +1,10 @@ +fn main() { + let mut data = 11; + let ptr = &data as *const i32; + + let ptr_mut = &mut data as *mut i32; + unsafe { + (*ptr_mut) = 44; + assert_eq!(44, *ptr); // FIXME: fails due to thunks on casts + } +} diff --git a/kmir/src/tests/integration/data/prove-rs/show/checked_arithmetic-fail.checked_add_i32.expected b/kmir/src/tests/integration/data/prove-rs/show/checked_arithmetic-fail.checked_add_i32.expected deleted file mode 100644 index 39a16b517..000000000 --- a/kmir/src/tests/integration/data/prove-rs/show/checked_arithmetic-fail.checked_add_i32.expected +++ /dev/null @@ -1,39 +0,0 @@ - -┌─ 1 (root, init) -│ #execTerminator ( terminator ( ... kind: terminatorKindCall ( ... func: operandC -│ span: 0 -│ -│ (64 steps) -├─ 3 (split) -│ #selectBlock ( switchTargets ( ... branches: branch ( 0 , basicBlockIdx ( 4 ) ) -┃ -┃ (branch) -┣━━┓ subst: .Subst -┃ ┃ constraint: -┃ ┃ ARG_INT1:Int +Int ARG_INT2:Int ==K truncate ( ARG_INT1:Int +Int ARG_INT2:Int , 32 , Signed ) -┃ │ -┃ ├─ 4 -┃ │ #selectBlock ( switchTargets ( ... branches: branch ( 0 , basicBlockIdx ( 4 ) ) -┃ │ -┃ │ (250 steps) -┃ ├─ 6 (terminal) -┃ │ #EndProgram ~> .K -┃ │ -┃ ┊ constraint: true -┃ ┊ subst: ... -┃ └─ 2 (leaf, target, terminal) -┃ #EndProgram ~> .K -┃ -┗━━┓ subst: .Subst - ┃ constraint: - ┃ notBool truncate ( ARG_INT1:Int +Int ARG_INT2:Int , 32 , Signed ) ==K ARG_INT1:Int +Int ARG_INT2:Int - │ - ├─ 5 - │ #selectBlock ( switchTargets ( ... branches: branch ( 0 , basicBlockIdx ( 4 ) ) - │ - │ (6 steps) - └─ 7 (stuck, leaf) - #execIntrinsic ( IntrinsicFunction ( symbol ( "cold_path" ) ) , .Operands , plac - - - diff --git a/kmir/src/tests/integration/data/prove-rs/show/raw-ptr-cast-fail.main.expected b/kmir/src/tests/integration/data/prove-rs/show/raw-ptr-cast-fail.main.expected new file mode 100644 index 000000000..f7d8e97bb --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/show/raw-ptr-cast-fail.main.expected @@ -0,0 +1,40 @@ + +┌─ 1 (root, init) +│ #execTerminator ( terminator ( ... kind: terminatorKindCall ( ... func: operandC +│ span: 0 +│ +│ (119 steps) +├─ 3 +│ #expect ( thunk ( #applyBinOp ( binOpEq , thunk ( #applyBinOp ( binOpBitAnd , th +│ function: main +┃ +┃ (1 step) +┣━━┓ +┃ │ +┃ ├─ 4 +┃ │ AssertError ( assertMessageMisalignedPointerDereference ( ... required: operandC +┃ │ function: main +┃ │ +┃ │ (1 step) +┃ └─ 6 (stuck, leaf) +┃ #ProgramError ( AssertError ( assertMessageMisalignedPointerDereference ( ... re +┃ function: main +┃ +┗━━┓ + │ + ├─ 5 + │ #execBlockIdx ( basicBlockIdx ( 3 ) ) ~> .K + │ function: main + │ + │ (124 steps) + ├─ 7 (terminal) + │ #EndProgram ~> .K + │ function: main + │ + ┊ constraint: true + ┊ subst: ... + └─ 2 (leaf, target, terminal) + #EndProgram ~> .K + + + diff --git a/kmir/src/tests/integration/test_integration.py b/kmir/src/tests/integration/test_integration.py index ae02d189d..4bdd55b39 100644 --- a/kmir/src/tests/integration/test_integration.py +++ b/kmir/src/tests/integration/test_integration.py @@ -52,6 +52,7 @@ 'pointer-cast-length-test-fail', 'niche-enum', 'assume-cheatcode-conflict-fail', + 'raw-ptr-cast-fail', ]