Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion kmir/src/kmir/kdist/mir-semantics/rt/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -1410,7 +1410,7 @@ Zero-sized types can be decoded trivially into their respective representation.

```k
// zero-sized struct
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoStructType(_, _, _))
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoStructType(_, _, _, _))
=> Aggregate(variantIdx(0), .List) ... </k>
// zero-sized tuple
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoTupleType(_))
Expand Down
98 changes: 93 additions & 5 deletions kmir/src/kmir/kdist/mir-semantics/rt/decoding.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ requires "value.md"
requires "numbers.md"

module RT-DECODING
imports INT
imports BOOL
imports MAP

Expand Down Expand Up @@ -72,6 +73,66 @@ rule #decodeValue(BYTES, typeInfoArrayType(ELEMTY, noTyConst))
=> #decodeSliceAllocation(BYTES, lookupTy(ELEMTY))
```

### Struct decoding

Structs can have non-trivial layouts: field offsets may not match declaration order, and there may be padding.
Decoding is only performed when explicit layout offsets are available; otherwise we leave the term as `UnableToDecode` via the default rule. This avoids incorrectly decoding data when field reordering or padding is present.

When using layout offsets we always return fields in declaration order within the `Aggregate` (variant 0), regardless of the physical order in memory.

```k
// ---------------------------------------------------------------------------
// Struct decoding rules (top level)
// ---------------------------------------------------------------------------
// Use the offsets when they are provided and the input length is sufficient.
rule #decodeValue(BYTES, typeInfoStructType(_, _, TYS, LAYOUT))
=> Aggregate(variantIdx(0), #decodeStructFieldsWithOffsets(BYTES, TYS, #structOffsets(LAYOUT)))
requires #structOffsets(LAYOUT) =/=K .MachineSizes
andBool 0 <=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT))
andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT))
[preserves-definedness]

// ---------------------------------------------------------------------------
// Offsets and helpers (used by the rules above)
// ---------------------------------------------------------------------------
// MachineSize is in bits in the ABI; convert to bytes for slicing.
syntax Int ::= #msBytes ( MachineSize ) [function, total]
rule #msBytes(machineSize(mirInt(NBITS))) => NBITS /Int 8 [preserves-definedness]
rule #msBytes(machineSize(NBITS)) => NBITS /Int 8 [owise, preserves-definedness]

// Extract field offsets from the struct layout when available (Arbitrary only).
syntax MachineSizes ::= #structOffsets ( MaybeLayoutShape ) [function, total]
rule #structOffsets(someLayoutShape(layoutShape(fieldsShapeArbitrary(mk(OFFSETS)), _, _, _, _))) => OFFSETS
rule #structOffsets(noLayoutShape) => .MachineSizes
rule #structOffsets(_) => .MachineSizes [owise]

// Minimum number of input bytes required to decode all fields by the chosen offsets.
// Uses builtin maxInt to compute max(offset + size). The lists of types and
// offsets must have the same length; if not, this function returns -1 to signal
// an invalid layout for decoding.
syntax Int ::= #neededBytesForOffsets ( Tys , MachineSizes ) [function, total]
rule #neededBytesForOffsets(.Tys, .MachineSizes) => 0
rule #neededBytesForOffsets(TY TYS, OFFSET OFFSETS)
=> maxInt(#msBytes(OFFSET) +Int #elemSize(lookupTy(TY)), #neededBytesForOffsets(TYS, OFFSETS))
// Any remaining pattern indicates a length mismatch between types and offsets.
rule #neededBytesForOffsets(_, _) => -1 [owise]

// Decode each field at its byte offset and return values in declaration order.
syntax List ::= #decodeStructFieldsWithOffsets ( Bytes , Tys , MachineSizes ) [function, total]
rule #decodeStructFieldsWithOffsets(_, .Tys, _OFFSETS) => .List
rule #decodeStructFieldsWithOffsets(_, _TYS, .MachineSizes) => .List [owise]
rule #decodeStructFieldsWithOffsets(BYTES, TY TYS, OFFSET OFFSETS)
=> ListItem(
#decodeValue(
substrBytes(BYTES, #msBytes(OFFSET), #msBytes(OFFSET) +Int #elemSize(lookupTy(TY))),
lookupTy(TY)
)
)
#decodeStructFieldsWithOffsets(BYTES, TYS, OFFSETS)
requires lengthBytes(BYTES) >=Int (#msBytes(OFFSET) +Int #elemSize(lookupTy(TY)))
[preserves-definedness]
```

### Error marker (becomes thunk) for other (unimplemented) cases

All unimplemented cases will become thunks by way of this default rule:
Expand All @@ -84,20 +145,33 @@ All unimplemented cases will become thunks by way of this default rule:

```k
// TODO: this function should go into the rt/types.md module
syntax Int ::= #elemSize ( TypeInfo ) [function]
syntax Int ::= #elemSize ( TypeInfo ) [function, total]
```

Known element sizes for common types:

```k
// ---- Primitive types ----
rule #elemSize(typeInfoPrimitiveType(primTypeBool)) => 1
// Rust `char` is a 32-bit Unicode scalar value
rule #elemSize(typeInfoPrimitiveType(primTypeChar)) => 4
rule #elemSize(TYPEINFO) => #bitWidth(#intTypeOf(TYPEINFO)) /Int 8
requires #isIntType(TYPEINFO)

// Floating-point sizes
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF16))) => 2
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF32))) => 4
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF64))) => 8
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF128))) => 16
// `str` is unsized; size only known with metadata (e.g., in fat pointers)
rule #elemSize(typeInfoPrimitiveType(primTypeStr)) => 0

// ---- Arrays and slices ----
rule #elemSize(typeInfoArrayType(ELEM_TY, someTyConst(tyConst(LEN, _))))
=> #elemSize(lookupTy(ELEM_TY)) *Int readTyConstInt(LEN)
// Slice `[T]` has dynamic size; plain value is unsized
rule #elemSize(typeInfoArrayType(_ELEM_TY, noTyConst)) => 0

// thin and fat pointers
// ---- thin and fat pointers ----
rule #elemSize(typeInfoRefType(TY)) => #elemSize(typeInfoPrimitiveType(primTypeUint(uintTyUsize)))
requires dynamicSize(1) ==K #metadataSize(TY)
rule #elemSize(typeInfoRefType(_)) => 2 *Int #elemSize(typeInfoPrimitiveType(primTypeUint(uintTyUsize)))
Expand All @@ -107,10 +181,24 @@ Known element sizes for common types:
rule #elemSize(typeInfoPtrType(_)) => 2 *Int #elemSize(typeInfoPrimitiveType(primTypeUint(uintTyUsize)))
[owise]

// ---- Tuples ----
// Without layout, approximate as sum of element sizes (ignores padding).
rule #elemSize(typeInfoTupleType(.Tys)) => 0
rule #elemSize(typeInfoTupleType(TY TYS))
=> #elemSize(lookupTy(TY)) +Int #elemSize(typeInfoTupleType(TYS))

// ---- Structs and Enums with layout ----
rule #elemSize(typeInfoStructType(_, _, _, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE)
rule #elemSize(typeInfoEnumType(_, _, _, _, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE)

// ---- Function item types ----
// Function items are zero-sized; function pointers are handled by PtrType
rule #elemSize(typeInfoFunType(_)) => 0

rule #elemSize(typeInfoVoidType) => 0

// FIXME can only use size from layout here. Requires adding layout first.
// Enum, Struct, Tuple,
// Fallback to keep the function total for any remaining cases
rule #elemSize(_) => 0 [owise]

rule 0 <=Int #elemSize(_) => true [simplification, preserves-definedness]
```
Expand Down
6 changes: 5 additions & 1 deletion kmir/src/kmir/kdist/mir-semantics/ty.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,11 @@ syntax ExistentialPredicateBinders ::= List {ExistentialPredicateBinder, ""}
, discriminants: Discriminants
, fields: Tyss
, layout: MaybeLayoutShape) [symbol(TypeInfo::EnumType) , group(mir-enum---name--adt-def--discriminants--fields--layout)]
| typeInfoStructType(MIRString, AdtDef, Tys) [symbol(TypeInfo::StructType) , group(mir-enum---name--adt-def--fields)]
| typeInfoStructType(
name: MIRString
, adtDef: AdtDef
, fields: Tys
, layout: MaybeLayoutShape) [symbol(TypeInfo::StructType) , group(mir-enum---name--adt-def--fields--layout)]
| typeInfoUnionType(MIRString, AdtDef) [symbol(TypeInfo::UnionType) , group(mir-enum---name--adt-def)]
| typeInfoArrayType(Ty, MaybeTyConst) [symbol(TypeInfo::ArrayType) , group(mir-enum---elem-type--size)]
| typeInfoPtrType(Ty) [symbol(TypeInfo::PtrType) , group(mir-enum---pointee-type)]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#![allow(dead_code)]

#[derive(Copy, Clone)]
pub struct Pubkey(pub [u8; 32]);

const KEY: Pubkey = Pubkey([
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
]);

#[inline(never)]
fn use_inner(arr: &[u8; 32]) -> usize {
// Touch the array so MIR keeps the projection
if arr[0] == 0 { 32 } else { 0 }
}

fn main() {
let pk = KEY; // forces constant decoding of StructType(newtype)
let len = use_inner(&pk.0); // field projection .0; needs StructType decode to Aggregate
core::hint::black_box(len);
}

Loading