Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions kmir/src/kmir/decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ def decode_value(data: bytes, type_info: TypeMetadata, types: Mapping[Ty, TypeMe
return _decode_array(data, elem_ty, length, types)
case StructT(fields=fields, layout=layout):
return _decode_struct(data=data, fields=fields, layout=layout, types=types)
case TupleT(components=components):
return _decode_tuple(data=data, component_tys=components, types=types)
case TupleT(components=components, layout=layout):
return _decode_tuple(data=data, component_tys=components, layout=layout, types=types)
case EnumT(
discriminants=discriminants,
fields=fields,
Expand Down Expand Up @@ -289,14 +289,27 @@ def _decode_tuple(
*,
data: bytes,
component_tys: list[Ty],
layout: LayoutShape | None,
types: Mapping[Ty, TypeMetadata],
) -> Value:
if not component_tys:
if data:
raise ValueError(f'Zero-sized tuple expected empty data, got: {data!r}')
return AggregateValue(0, [])

raise ValueError('Tuple decoding with components is not implemented yet')
if not layout:
raise ValueError('Tuple layout not provided')

offsets = _extract_offsets(layout.fields)

match layout.variants:
case Single(index=0):
pass
case _:
raise ValueError(f'Unexpected layout variants in tuple: {layout.variants}')

field_values = _decode_fields(data=data, tys=component_tys, offsets=offsets, types=types)
return AggregateValue(0, field_values)


def _decode_enum(
Expand Down
2 changes: 1 addition & 1 deletion kmir/src/kmir/kast.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def _symbolic_value(self, ty: Ty, mutable: bool) -> tuple[KInner, Iterable[KInne
),
)

case TupleT(components):
case TupleT(components=components):
elem_vars = []
elem_constraints = []
for _ty in components:
Expand Down
107 changes: 107 additions & 0 deletions kmir/src/kmir/kdist/mir-semantics/intrinsics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Rust Intrinsic Functions in K

```k
// This looks like a circular import but only module KMIR in kmir.md imports KMIR-INTRINSICS
requires "kmir.md"

module KMIR-INTRINSICS
imports KMIR-CONTROL-FLOW
```

### Intrinsic Functions

Intrinsic functions are built-in functions provided by the compiler that don't have regular MIR bodies.
They are handled specially in the execution semantics through the `#execIntrinsic` mechanism.
When an intrinsic function is called, the execution bypasses the normal function call setup and directly
executes the intrinsic-specific logic.

#### Black Box (`std::hint::black_box`)

The `black_box` intrinsic serves as an optimization barrier, preventing the compiler from making assumptions
about the value passed through it. In the semantics, it acts as an identity function that simply passes
its argument to the destination without modification.

```k
// Black box intrinsic implementation - identity function
rule <k> #execIntrinsic(IntrinsicFunction(symbol("black_box")), ARG:Operand .Operands, DEST)
=> #setLocalValue(DEST, ARG)
... </k>
```

#### Cold Path (`std::hint::cold_path`)

The `cold_path` intrinsic is a compiler hint indicating that the current execution path is unlikely to be taken.
It provides metadata for the optimiser and code generator to improve layout and branch predicition but is
a NO OP for program semantics. `std::intrinsics::likely` and `std::intrinsics::unlikely` are
"normal" `MonoItemFn`s that call the `cold_path` intrinsic.

```k
rule <k> #execIntrinsic(IntrinsicFunction(symbol("cold_path")), .Operands, _DEST) => .K ... </k>
```

#### Prefetch (`std::intrinsics::prefetch_*`)

The `prefetch_read_data`, `prefetch_write_data`, `prefetch_read_instruction`, and `prefetch_write_instruction`
intrinsics in Rust are performance hints that request the CPU to load or prepare a memory address in cache
before it's used. They have no effect on program semantics, and are implemented as a NO OP in this semantics.

```k
rule <k> #execIntrinsic(IntrinsicFunction(symbol("prefetch_read_data")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... </k>
rule <k> #execIntrinsic(IntrinsicFunction(symbol("prefetch_write_data")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... </k>

rule <k> #execIntrinsic(IntrinsicFunction(symbol("prefetch_read_instruction")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... </k>
rule <k> #execIntrinsic(IntrinsicFunction(symbol("prefetch_write_instruction")), _ARG1:Operand _ARG2:Operand .Operands, _DEST) => .K ... </k>
```

#### Raw Eq (`std::intrinsics::raw_eq`)

The `raw_eq` intrinsic performs byte-by-byte equality comparison of the memory contents pointed to by two references.
It returns a boolean value indicating whether the referenced values are equal. The implementation dereferences the
provided references to access the underlying values, then compares them using K's built-in equality operator.

**Type Safety:**
The implementation requires operands to have identical types (`TY1 ==K TY2`) before performing the comparison.
Execution gets stuck (no matching rule) when operands have different types or unknown type information.

```k
// Raw eq: dereference operands, extract types, and delegate to typed comparison
rule <k> #execIntrinsic(IntrinsicFunction(symbol("raw_eq")), ARG1:Operand ARG2:Operand .Operands, PLACE)
=> #execRawEqTyped(PLACE, #withDeref(ARG1), #extractOperandType(#withDeref(ARG1), LOCALS),
#withDeref(ARG2), #extractOperandType(#withDeref(ARG2), LOCALS))
... </k>
<locals> LOCALS </locals>

// Compare values only if types are identical
syntax KItem ::= #execRawEqTyped(Place, Evaluation, MaybeTy, Evaluation, MaybeTy) [seqstrict(2,4)]
rule <k> #execRawEqTyped(DEST, VAL1:Value, TY1:Ty, VAL2:Value, TY2:Ty)
=> #setLocalValue(DEST, BoolVal(VAL1 ==K VAL2))
... </k>
requires TY1 ==K TY2
[preserves-definedness]

// Add deref projection to operands
syntax Operand ::= #withDeref(Operand) [function, total]
rule #withDeref(operandCopy(place(LOCAL, PROJ)))
=> operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems)))
rule #withDeref(operandMove(place(LOCAL, PROJ)))
=> operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems)))
// must not overwrite the value, just the reference is moved!
rule #withDeref(OP) => OP [owise]

// Extract type from operands (locals with projections, constants, fallback to unknown)
syntax MaybeTy ::= #extractOperandType(Operand, List) [function, total]
rule #extractOperandType(operandCopy(place(local(I), PROJS)), LOCALS)
=> getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS)
requires 0 <=Int I andBool I <Int size(LOCALS) andBool isTypedLocal(LOCALS[I])
[preserves-definedness]
rule #extractOperandType(operandMove(place(local(I), PROJS)), LOCALS)
=> getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS)
requires 0 <=Int I andBool I <Int size(LOCALS) andBool isTypedLocal(LOCALS[I])
[preserves-definedness]
rule #extractOperandType(operandConstant(constOperand(_, _, mirConst(_, TY, _))), _) => TY
rule #extractOperandType(_, _) => TyUnknown [owise]
```

```k
endmodule
```
75 changes: 4 additions & 71 deletions kmir/src/kmir/kdist/mir-semantics/kmir.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ requires "rt/data.md"
requires "rt/configuration.md"
requires "lemmas/kmir-lemmas.md"
requires "cheatcodes.md"
requires "intrinsics.md"
```

## Syntax of MIR in K
Expand Down Expand Up @@ -489,8 +490,8 @@ Therefore a heuristics is used here:
syntax Bool ::= isTupleType ( TypeInfo ) [function, total]
| isRefType ( TypeInfo ) [function, total]
// -------------------------------------------------------
rule isTupleType(typeInfoTupleType(_)) => true
rule isTupleType( _ ) => false [owise]
rule isTupleType(typeInfoTupleType(_, _)) => true
rule isTupleType( _ ) => false [owise]
rule isRefType(typeInfoRefType(_)) => true
rule isRefType( _ ) => false [owise]

Expand Down Expand Up @@ -560,75 +561,6 @@ Drops are elaborated to Noops but still define the continuing control flow. Unre
</k>
```

### Intrinsic Functions

Intrinsic functions are built-in functions provided by the compiler that don't have regular MIR bodies.
They are handled specially in the execution semantics through the `#execIntrinsic` mechanism.
When an intrinsic function is called, the execution bypasses the normal function call setup and directly
executes the intrinsic-specific logic.

#### Black Box (`std::hint::black_box`)

The `black_box` intrinsic serves as an optimization barrier, preventing the compiler from making assumptions
about the value passed through it. In the semantics, it acts as an identity function that simply passes
its argument to the destination without modification.

```k
// Black box intrinsic implementation - identity function
rule <k> #execIntrinsic(IntrinsicFunction(symbol("black_box")), ARG:Operand .Operands, DEST)
=> #setLocalValue(DEST, ARG)
... </k>
```

#### Raw Eq (`std::intrinsics::raw_eq`)

The `raw_eq` intrinsic performs byte-by-byte equality comparison of the memory contents pointed to by two references.
It returns a boolean value indicating whether the referenced values are equal. The implementation dereferences the
provided references to access the underlying values, then compares them using K's built-in equality operator.

**Type Safety:**
The implementation requires operands to have identical types (`TY1 ==K TY2`) before performing the comparison.
Execution gets stuck (no matching rule) when operands have different types or unknown type information.

```k
// Raw eq: dereference operands, extract types, and delegate to typed comparison
rule <k> #execIntrinsic(IntrinsicFunction(symbol("raw_eq")), ARG1:Operand ARG2:Operand .Operands, PLACE)
=> #execRawEqTyped(PLACE, #withDeref(ARG1), #extractOperandType(#withDeref(ARG1), LOCALS),
#withDeref(ARG2), #extractOperandType(#withDeref(ARG2), LOCALS))
... </k>
<locals> LOCALS </locals>

// Compare values only if types are identical
syntax KItem ::= #execRawEqTyped(Place, Evaluation, MaybeTy, Evaluation, MaybeTy) [seqstrict(2,4)]
rule <k> #execRawEqTyped(DEST, VAL1:Value, TY1:Ty, VAL2:Value, TY2:Ty)
=> #setLocalValue(DEST, BoolVal(VAL1 ==K VAL2))
... </k>
requires TY1 ==K TY2
[preserves-definedness]

// Add deref projection to operands
syntax Operand ::= #withDeref(Operand) [function, total]
rule #withDeref(operandCopy(place(LOCAL, PROJ)))
=> operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems)))
rule #withDeref(operandMove(place(LOCAL, PROJ)))
=> operandCopy(place(LOCAL, appendP(PROJ, projectionElemDeref .ProjectionElems)))
// must not overwrite the value, just the reference is moved!
rule #withDeref(OP) => OP [owise]

// Extract type from operands (locals with projections, constants, fallback to unknown)
syntax MaybeTy ::= #extractOperandType(Operand, List) [function, total]
rule #extractOperandType(operandCopy(place(local(I), PROJS)), LOCALS)
=> getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS)
requires 0 <=Int I andBool I <Int size(LOCALS) andBool isTypedLocal(LOCALS[I])
[preserves-definedness]
rule #extractOperandType(operandMove(place(local(I), PROJS)), LOCALS)
=> getTyOf(tyOfLocal({LOCALS[I]}:>TypedLocal), PROJS)
requires 0 <=Int I andBool I <Int size(LOCALS) andBool isTypedLocal(LOCALS[I])
[preserves-definedness]
rule #extractOperandType(operandConstant(constOperand(_, _, mirConst(_, TY, _))), _) => TY
rule #extractOperandType(_, _) => TyUnknown [owise]
```

### Stopping on Program Errors

The semantics has a dedicated error sort to stop execution when flawed input or undefined behaviour is detected.
Expand Down Expand Up @@ -656,5 +588,6 @@ module KMIR
imports KMIR-AST // Necessary for the external Python parser
imports KMIR-CONTROL-FLOW
imports KMIR-CHEATCODES
imports KMIR-INTRINSICS
imports KMIR-LEMMAS
endmodule
2 changes: 1 addition & 1 deletion kmir/src/kmir/kdist/mir-semantics/rt/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -1554,7 +1554,7 @@ Zero-sized types can be decoded trivially into their respective representation.
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoStructType(_, _, _, _))
=> Aggregate(variantIdx(0), .List) ... </k>
// zero-sized tuple
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoTupleType(_))
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoTupleType(_, _))
=> Aggregate(variantIdx(0), .List) ... </k>
// zero-sized array
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoArrayType(_, _))
Expand Down
45 changes: 28 additions & 17 deletions kmir/src/kmir/kdist/mir-semantics/rt/decoding.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,21 @@ When using layout offsets we always return fields in declaration order within th
// ---------------------------------------------------------------------------
// Use the offsets when they are provided and the input length is sufficient.
rule #decodeValue(BYTES, typeInfoStructType(_, _, TYS, LAYOUT))
=> Aggregate(variantIdx(0), #decodeStructFieldsWithOffsets(BYTES, TYS, #structOffsets(LAYOUT)))
requires #structOffsets(LAYOUT) =/=K .MachineSizes
andBool 0 <=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT))
andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT))
=> Aggregate(variantIdx(0), #decodeFieldsWithOffsets(BYTES, TYS, #layoutOffsets(LAYOUT)))
requires #layoutOffsets(LAYOUT) =/=K .MachineSizes
andBool 0 <=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT))
andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT))
[preserves-definedness]

rule #decodeValue(BYTES, typeInfoTupleType(.Tys, _))
=> Aggregate(variantIdx(0), .List)
requires lengthBytes(BYTES) ==Int 0

rule #decodeValue(BYTES, typeInfoTupleType(TYS, LAYOUT))
=> Aggregate(variantIdx(0), #decodeFieldsWithOffsets(BYTES, TYS, #layoutOffsets(LAYOUT)))
requires #layoutOffsets(LAYOUT) =/=K .MachineSizes
andBool 0 <=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT))
andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #layoutOffsets(LAYOUT))
[preserves-definedness]

// ---------------------------------------------------------------------------
Expand All @@ -101,10 +112,10 @@ rule #msBytes(machineSize(mirInt(NBITS))) => NBITS /Int 8 [preserves-definedness
rule #msBytes(machineSize(NBITS)) => NBITS /Int 8 [owise, preserves-definedness]

// Extract field offsets from the struct layout when available (Arbitrary only).
syntax MachineSizes ::= #structOffsets ( MaybeLayoutShape ) [function, total]
rule #structOffsets(someLayoutShape(layoutShape(fieldsShapeArbitrary(mk(OFFSETS)), _, _, _, _))) => OFFSETS
rule #structOffsets(noLayoutShape) => .MachineSizes
rule #structOffsets(_) => .MachineSizes [owise]
syntax MachineSizes ::= #layoutOffsets ( MaybeLayoutShape ) [function, total]
rule #layoutOffsets(someLayoutShape(layoutShape(fieldsShapeArbitrary(mk(OFFSETS)), _, _, _, _))) => OFFSETS
rule #layoutOffsets(noLayoutShape) => .MachineSizes
rule #layoutOffsets(_) => .MachineSizes [owise]

// Minimum number of input bytes required to decode all fields by the chosen offsets.
// Uses builtin maxInt to compute max(offset + size). The lists of types and
Expand All @@ -118,17 +129,17 @@ rule #neededBytesForOffsets(TY TYS, OFFSET OFFSETS)
rule #neededBytesForOffsets(_, _) => -1 [owise]

// Decode each field at its byte offset and return values in declaration order.
syntax List ::= #decodeStructFieldsWithOffsets ( Bytes , Tys , MachineSizes ) [function, total]
rule #decodeStructFieldsWithOffsets(_, .Tys, _OFFSETS) => .List
rule #decodeStructFieldsWithOffsets(_, _TYS, .MachineSizes) => .List [owise]
rule #decodeStructFieldsWithOffsets(BYTES, TY TYS, OFFSET OFFSETS)
syntax List ::= #decodeFieldsWithOffsets ( Bytes , Tys , MachineSizes ) [function, total]
rule #decodeFieldsWithOffsets(_, .Tys, _OFFSETS) => .List
rule #decodeFieldsWithOffsets(_, _TYS, .MachineSizes) => .List [owise]
rule #decodeFieldsWithOffsets(BYTES, TY TYS, OFFSET OFFSETS)
=> ListItem(
#decodeValue(
substrBytes(BYTES, #msBytes(OFFSET), #msBytes(OFFSET) +Int #elemSize(lookupTy(TY))),
lookupTy(TY)
)
)
#decodeStructFieldsWithOffsets(BYTES, TYS, OFFSETS)
#decodeFieldsWithOffsets(BYTES, TYS, OFFSETS)
requires lengthBytes(BYTES) >=Int (#msBytes(OFFSET) +Int #elemSize(lookupTy(TY)))
[preserves-definedness]
```
Expand Down Expand Up @@ -182,10 +193,10 @@ Known element sizes for common types:
[owise]

// ---- Tuples ----
// Without layout, approximate as sum of element sizes (ignores padding).
rule #elemSize(typeInfoTupleType(.Tys)) => 0
rule #elemSize(typeInfoTupleType(TY TYS))
=> #elemSize(lookupTy(TY)) +Int #elemSize(typeInfoTupleType(TYS))
rule #elemSize(typeInfoTupleType(_TYS, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE)
rule #elemSize(typeInfoTupleType(.Tys, noLayoutShape)) => 0
rule #elemSize(typeInfoTupleType(TY TYS, noLayoutShape))
=> #elemSize(lookupTy(TY)) +Int #elemSize(typeInfoTupleType(TYS, noLayoutShape))

// ---- Structs and Enums with layout ----
rule #elemSize(typeInfoStructType(_, _, _, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE)
Expand Down
2 changes: 1 addition & 1 deletion kmir/src/kmir/kdist/mir-semantics/rt/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ Slices, `str`s and dynamic types require it, and any `Ty` that `is_sized` does
```k
syntax Bool ::= #zeroSizedType ( TypeInfo ) [function, total]

rule #zeroSizedType(typeInfoTupleType(.Tys)) => true
rule #zeroSizedType(typeInfoTupleType(.Tys, _)) => true
rule #zeroSizedType(typeInfoStructType(_, _, .Tys, _)) => true
rule #zeroSizedType(typeInfoVoidType) => true
// FIXME: Only unit tuples, empty structs, and void are recognized here; other
Expand Down
3 changes: 2 additions & 1 deletion kmir/src/kmir/kdist/mir-semantics/ty.md
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ syntax ExistentialPredicateBinders ::= List {ExistentialPredicateBinder, ""}
| typeInfoArrayType(Ty, MaybeTyConst) [symbol(TypeInfo::ArrayType) , group(mir-enum---elem-type--size)]
| typeInfoPtrType(Ty) [symbol(TypeInfo::PtrType) , group(mir-enum---pointee-type)]
| typeInfoRefType(Ty) [symbol(TypeInfo::RefType) , group(mir-enum---pointee-type)]
| typeInfoTupleType(Tys) [symbol(TypeInfo::TupleType) , group(mir-enum---types)]
| typeInfoTupleType(types: Tys, layout: MaybeLayoutShape)
[symbol(TypeInfo::TupleType) , group(mir-enum---types--layout)]
| typeInfoFunType(MIRString) [symbol(TypeInfo::FunType) , group(mir-enum)]
| "typeInfoVoidType" [symbol(TypeInfo::VoidType) , group(mir-enum)]

Expand Down
Loading