diff --git a/kani-compiler/src/args.rs b/kani-compiler/src/args.rs index b4d4eb3718d8..e4b7a4435b0f 100644 --- a/kani-compiler/src/args.rs +++ b/kani-compiler/src/args.rs @@ -88,4 +88,6 @@ pub enum ExtraChecks { /// Check pointer validity when casting pointers to references. /// See https://github.com/model-checking/kani/issues/2975. PtrToRefCast, + /// Check for using uninitialized memory. + Uninit, } diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs index 0f27d3f119f7..d35015aa040d 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs @@ -118,6 +118,27 @@ impl<'tcx> GotocCtx<'tcx> { .typ .clone(); + let shadow_memory_assign = self + .tcx + .all_diagnostic_items(()) + .name_to_id + .get(&rustc_span::symbol::Symbol::intern("KaniMemInitShadowMem")) + .map(|attr_id| { + self.tcx + .symbol_name(rustc_middle::ty::Instance::mono(self.tcx, *attr_id)) + .name + .to_string() + }) + .and_then(|shadow_memory_table| self.symbol_table.lookup(&shadow_memory_table).cloned()) + .map(|shadow_memory_symbol| { + vec![Lambda::as_contract_for( + &goto_annotated_fn_typ, + None, + shadow_memory_symbol.to_expr(), + )] + }) + .unwrap_or_default(); + let assigns = modified_places .into_iter() .map(|local| { @@ -127,6 +148,7 @@ impl<'tcx> GotocCtx<'tcx> { self.codegen_place_stable(&local.into(), loc).unwrap().goto_expr.dereference(), ) }) + .chain(shadow_memory_assign) .collect(); FunctionContract::new(assigns) diff --git a/kani-compiler/src/kani_middle/transform/body.rs b/kani-compiler/src/kani_middle/transform/body.rs index 2bf424a1332d..22895bd8d20d 100644 --- a/kani-compiler/src/kani_middle/transform/body.rs +++ b/kani-compiler/src/kani_middle/transform/body.rs @@ -37,6 +37,13 @@ pub struct MutableBody { span: Span, } +/// Denotes whether instrumentation should be inserted before or after the statement. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InsertPosition { + Before, + After, +} + impl MutableBody { /// Get the basic blocks of this builder. pub fn blocks(&self) -> &[BasicBlock] { @@ -95,12 +102,13 @@ impl MutableBody { from: Operand, pointee_ty: Ty, mutability: Mutability, - before: &mut SourceInstruction, + source: &mut SourceInstruction, + position: InsertPosition, ) -> Local { assert!(from.ty(self.locals()).unwrap().kind().is_raw_ptr()); let target_ty = Ty::new_ptr(pointee_ty, mutability); let rvalue = Rvalue::Cast(CastKind::PtrToPtr, from, target_ty); - self.new_assignment(rvalue, before) + self.new_assignment(rvalue, source, position) } /// Add a new assignment for the given binary operation. @@ -111,21 +119,27 @@ impl MutableBody { bin_op: BinOp, lhs: Operand, rhs: Operand, - before: &mut SourceInstruction, + source: &mut SourceInstruction, + position: InsertPosition, ) -> Local { let rvalue = Rvalue::BinaryOp(bin_op, lhs, rhs); - self.new_assignment(rvalue, before) + self.new_assignment(rvalue, source, position) } /// Add a new assignment. /// /// Return local where the result is saved. - pub fn new_assignment(&mut self, rvalue: Rvalue, before: &mut SourceInstruction) -> Local { - let span = before.span(&self.blocks); + pub fn new_assignment( + &mut self, + rvalue: Rvalue, + source: &mut SourceInstruction, + position: InsertPosition, + ) -> Local { + let span = source.span(&self.blocks); let ret_ty = rvalue.ty(&self.locals).unwrap(); let result = self.new_local(ret_ty, span, Mutability::Not); let stmt = Statement { kind: StatementKind::Assign(Place::from(result), rvalue), span }; - self.insert_stmt(stmt, before); + self.insert_stmt(stmt, source, position); result } @@ -139,6 +153,7 @@ impl MutableBody { tcx: TyCtxt, check_type: &CheckType, source: &mut SourceInstruction, + position: InsertPosition, value: Local, msg: &str, ) { @@ -168,7 +183,7 @@ impl MutableBody { unwind: UnwindAction::Terminate, }; let terminator = Terminator { kind, span }; - self.split_bb(source, terminator); + self.split_bb(source, position, terminator); } CheckType::Panic | CheckType::NoCore => { tcx.sess @@ -182,11 +197,55 @@ impl MutableBody { } } - /// Split a basic block right before the source location and use the new terminator - /// in the basic block that was split. + /// Add a new call to the basic block indicated by the given index. + /// + /// The new call will have the same span as the source instruction, and the basic block + /// will be split. The source instruction will be adjusted to point to the first instruction in + /// the new basic block. + pub fn add_call( + &mut self, + callee: &Instance, + source: &mut SourceInstruction, + position: InsertPosition, + args: Vec, + destination: Place, + ) { + let new_bb = self.blocks.len(); + let span = source.span(&self.blocks); + let callee_op = + Operand::Copy(Place::from(self.new_local(callee.ty(), span, Mutability::Not))); + let kind = TerminatorKind::Call { + func: callee_op, + args, + destination, + target: Some(new_bb), + unwind: UnwindAction::Terminate, + }; + let terminator = Terminator { kind, span }; + self.split_bb(source, position, terminator); + } + + /// Split a basic block and use the new terminator in the basic block that was split. /// /// The source is updated to point to the same instruction which is now in the new basic block. - pub fn split_bb(&mut self, source: &mut SourceInstruction, new_term: Terminator) { + pub fn split_bb( + &mut self, + source: &mut SourceInstruction, + position: InsertPosition, + new_term: Terminator, + ) { + match position { + InsertPosition::Before => { + self.split_bb_before(source, new_term); + } + InsertPosition::After => { + self.split_bb_after(source, new_term); + } + } + } + + /// Split a basic block right before the source location. + fn split_bb_before(&mut self, source: &mut SourceInstruction, new_term: Terminator) { let new_bb_idx = self.blocks.len(); let (idx, bb) = match source { SourceInstruction::Statement { idx, bb } => { @@ -196,9 +255,9 @@ impl MutableBody { (orig_idx, orig_bb) } SourceInstruction::Terminator { bb } => { - let orig_bb = *bb; + let (orig_idx, orig_bb) = (self.blocks[*bb].statements.len(), *bb); *bb = new_bb_idx; - (self.blocks[orig_bb].statements.len(), orig_bb) + (orig_idx, orig_bb) } }; let old_term = mem::replace(&mut self.blocks[bb].terminator, new_term); @@ -208,16 +267,95 @@ impl MutableBody { self.blocks.push(new_bb); } - /// Insert statement before the source instruction and update the source as needed. - pub fn insert_stmt(&mut self, new_stmt: Statement, before: &mut SourceInstruction) { - match before { + /// Split a basic block right after the source location. + fn split_bb_after(&mut self, source: &mut SourceInstruction, mut new_term: Terminator) { + let new_bb_idx = self.blocks.len(); + match source { + // Split the current block after the statement located at `source` + // and move the remaining statements into the new one. SourceInstruction::Statement { idx, bb } => { - self.blocks[*bb].statements.insert(*idx, new_stmt); - *idx += 1; + let (orig_idx, orig_bb) = (*idx, *bb); + *idx = 0; + *bb = new_bb_idx; + let old_term = mem::replace(&mut self.blocks[orig_bb].terminator, new_term); + let bb_stmts = &mut self.blocks[orig_bb].statements; + let remaining = bb_stmts.split_off(orig_idx + 1); + let new_bb = BasicBlock { statements: remaining, terminator: old_term }; + self.blocks.push(new_bb); } + // Make the terminator at `source` point at the new block, + // the terminator of which is a simple Goto instruction. SourceInstruction::Terminator { bb } => { - // Append statements at the end of the basic block. - self.blocks[*bb].statements.push(new_stmt); + let current_terminator = &mut self.blocks.get_mut(*bb).unwrap().terminator; + // Kani can only instrument function calls like this. + match (&mut current_terminator.kind, &mut new_term.kind) { + ( + TerminatorKind::Call { target: Some(target_bb), .. }, + TerminatorKind::Call { target: Some(new_target_bb), .. }, + ) => { + // Set the new terminator to point where previous terminator pointed. + *new_target_bb = *target_bb; + // Point the current terminator to the new terminator's basic block. + *target_bb = new_bb_idx; + // Update the current poisition. + *bb = new_bb_idx; + self.blocks.push(BasicBlock { statements: vec![], terminator: new_term }); + } + _ => unimplemented!("Kani can only split blocks after calls."), + }; + } + }; + } + + /// Insert statement before or after the source instruction and update the source as needed. + pub fn insert_stmt( + &mut self, + new_stmt: Statement, + source: &mut SourceInstruction, + position: InsertPosition, + ) { + match position { + InsertPosition::Before => { + match source { + SourceInstruction::Statement { idx, bb } => { + self.blocks[*bb].statements.insert(*idx, new_stmt); + *idx += 1; + } + SourceInstruction::Terminator { bb } => { + // Append statements at the end of the basic block. + self.blocks[*bb].statements.push(new_stmt); + } + } + } + InsertPosition::After => { + let new_bb_idx = self.blocks.len(); + let span = source.span(&self.blocks); + match source { + SourceInstruction::Statement { idx, bb } => { + self.blocks[*bb].statements.insert(*idx + 1, new_stmt); + *idx += 1; + } + SourceInstruction::Terminator { bb } => { + // Create a new basic block, as we need to append a statement after the terminator. + let current_terminator = &mut self.blocks.get_mut(*bb).unwrap().terminator; + // Kani can only instrument function calls in this way. + match &mut current_terminator.kind { + TerminatorKind::Call { target: Some(target_bb), .. } => { + *source = SourceInstruction::Statement { idx: 0, bb: new_bb_idx }; + let new_bb = BasicBlock { + statements: vec![new_stmt], + terminator: Terminator { + kind: TerminatorKind::Goto { target: *target_bb }, + span, + }, + }; + *target_bb = new_bb_idx; + self.blocks.push(new_bb); + } + _ => unimplemented!("Kani can only insert statements after calls."), + }; + } + } } } } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs new file mode 100644 index 000000000000..6665ab697287 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs @@ -0,0 +1,428 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Implement a transformation pass that instruments the code to detect possible UB due to +//! the accesses to uninitialized memory. + +use crate::args::ExtraChecks; +use crate::kani_middle::find_fn_def; +use crate::kani_middle::transform::body::{ + CheckType, InsertPosition, MutableBody, SourceInstruction, +}; +use crate::kani_middle::transform::{TransformPass, TransformationType}; +use crate::kani_queries::QueryDb; +use rustc_middle::ty::TyCtxt; +use rustc_smir::rustc_internal; +use stable_mir::mir::mono::Instance; +use stable_mir::mir::{AggregateKind, Body, ConstOperand, Mutability, Operand, Place, Rvalue}; +use stable_mir::ty::{ + FnDef, GenericArgKind, GenericArgs, MirConst, RigidTy, Ty, TyConst, TyKind, UintTy, +}; +use stable_mir::CrateDef; +use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; +use tracing::{debug, trace}; + +mod ty_layout; +mod uninit_visitor; + +pub use ty_layout::{PointeeInfo, PointeeLayout}; +use uninit_visitor::{CheckUninitVisitor, InitRelevantInstruction, MemoryInitOp}; + +const SKIPPED_DIAGNOSTIC_ITEMS: &[&str] = + &["KaniIsUnitPtrInitialized", "KaniSetUnitPtrInitialized"]; + +/// Instrument the code with checks for uninitialized memory. +#[derive(Debug)] +pub struct UninitPass { + pub check_type: CheckType, + /// Used to cache FnDef lookups of injected memory initialization functions. + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, +} + +impl TransformPass for UninitPass { + fn transformation_type() -> TransformationType + where + Self: Sized, + { + TransformationType::Instrumentation + } + + fn is_enabled(&self, query_db: &QueryDb) -> bool + where + Self: Sized, + { + let args = query_db.args(); + args.ub_check.contains(&ExtraChecks::Uninit) + } + + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + trace!(function=?instance.name(), "transform"); + + // Need to break infinite recursion when shadow memory checks are inserted, + // so the internal function responsible for shadow memory checks are skipped. + if tcx + .get_diagnostic_name(rustc_internal::internal(tcx, instance.def.def_id())) + .map(|diagnostic_name| { + SKIPPED_DIAGNOSTIC_ITEMS.contains(&diagnostic_name.to_ident_string().as_str()) + }) + .unwrap_or(false) + { + return (false, body); + } + + let mut new_body = MutableBody::from(body); + let orig_len = new_body.blocks().len(); + + // Set of basic block indices for which analyzing first statement should be skipped. + // + // This is necessary because some checks are inserted before the source instruction, which, in + // turn, gets moved to the next basic block. Hence, we would not need to look at the + // instruction again as a part of new basic block. However, if the check is inserted after the + // source instruction, we still need to look at the first statement of the new basic block, so + // we need to keep track of which basic blocks were created as a part of injecting checks after + // the source instruction. + let mut skip_first = HashSet::new(); + + // Do not cache body.blocks().len() since it will change as we add new checks. + let mut bb_idx = 0; + while bb_idx < new_body.blocks().len() { + if let Some(candidate) = + CheckUninitVisitor::find_next(&new_body, bb_idx, skip_first.contains(&bb_idx)) + { + self.build_check_for_instruction(tcx, &mut new_body, candidate, &mut skip_first); + bb_idx += 1 + } else { + bb_idx += 1; + }; + } + (orig_len != new_body.blocks().len(), new_body.into()) + } +} + +impl UninitPass { + /// Inject memory initialization checks for each operation in an instruction. + fn build_check_for_instruction( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + instruction: InitRelevantInstruction, + skip_first: &mut HashSet, + ) { + debug!(?instruction, "build_check"); + let mut source = instruction.source; + for operation in instruction.before_instruction { + self.build_check_for_operation(tcx, body, &mut source, operation, skip_first); + } + for operation in instruction.after_instruction { + self.build_check_for_operation(tcx, body, &mut source, operation, skip_first); + } + } + + /// Inject memory initialization check for an operation. + fn build_check_for_operation( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + operation: MemoryInitOp, + skip_first: &mut HashSet, + ) { + if let MemoryInitOp::Unsupported { reason } = &operation { + collect_skipped(&operation, body, skip_first); + self.unsupported_check(tcx, body, source, operation.position(), reason); + return; + }; + + let pointee_ty_info = { + let ptr_operand = operation.mk_operand(body, source); + let ptr_operand_ty = ptr_operand.ty(body.locals()).unwrap(); + let pointee_ty = match ptr_operand_ty.kind() { + TyKind::RigidTy(RigidTy::RawPtr(pointee_ty, _)) => pointee_ty, + _ => { + unreachable!( + "Should only build checks for raw pointers, `{ptr_operand_ty}` encountered." + ) + } + }; + match PointeeInfo::from_ty(pointee_ty) { + Ok(type_info) => type_info, + Err(_) => { + let reason = format!( + "Kani currently doesn't support checking memory initialization for pointers to `{pointee_ty}.", + ); + collect_skipped(&operation, body, skip_first); + self.unsupported_check(tcx, body, source, operation.position(), &reason); + return; + } + } + }; + + match operation { + MemoryInitOp::Check { .. } => { + self.build_get_and_check(tcx, body, source, operation, pointee_ty_info, skip_first) + } + MemoryInitOp::Set { .. } | MemoryInitOp::SetRef { .. } => { + self.build_set(tcx, body, source, operation, pointee_ty_info, skip_first) + } + MemoryInitOp::Unsupported { .. } => { + unreachable!() + } + } + } + + /// Inject a load from shadow memory tracking memory initialization and an assertion that all + /// non-padding bytes are initialized. + fn build_get_and_check( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + operation: MemoryInitOp, + pointee_info: PointeeInfo, + skip_first: &mut HashSet, + ) { + let ret_place = Place { + local: body.new_local(Ty::bool_ty(), source.span(body.blocks()), Mutability::Not), + projection: vec![], + }; + let ptr_operand = operation.mk_operand(body, source); + match pointee_info.layout() { + PointeeLayout::Sized { layout } => { + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, "KaniIsPtrInitialized", &mut self.mem_init_fn_cache), + layout.len(), + *pointee_info.ty(), + ); + let layout_operand = mk_layout_operand(body, source, operation.position(), &layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &is_ptr_initialized_instance, + source, + operation.position(), + vec![ptr_operand.clone(), layout_operand, operation.expect_count()], + ret_place.clone(), + ); + } + PointeeLayout::Slice { element_layout } => { + // Since `str`` is a separate type, need to differentiate between [T] and str. + let (slicee_ty, diagnostic) = match pointee_info.ty().kind() { + TyKind::RigidTy(RigidTy::Slice(slicee_ty)) => { + (slicee_ty, "KaniIsSlicePtrInitialized") + } + TyKind::RigidTy(RigidTy::Str) => { + (Ty::unsigned_ty(UintTy::U8), "KaniIsStrPtrInitialized") + } + _ => unreachable!(), + }; + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, diagnostic, &mut self.mem_init_fn_cache), + element_layout.len(), + slicee_ty, + ); + let layout_operand = + mk_layout_operand(body, source, operation.position(), &element_layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &is_ptr_initialized_instance, + source, + operation.position(), + vec![ptr_operand.clone(), layout_operand], + ret_place.clone(), + ); + } + PointeeLayout::TraitObject => { + collect_skipped(&operation, body, skip_first); + let reason = "Kani does not support reasoning about memory initialization of pointers to trait objects."; + self.unsupported_check(tcx, body, source, operation.position(), reason); + return; + } + }; + + // Make sure all non-padding bytes are initialized. + collect_skipped(&operation, body, skip_first); + let ptr_operand_ty = ptr_operand.ty(body.locals()).unwrap(); + body.add_check( + tcx, + &self.check_type, + source, + operation.position(), + ret_place.local, + &format!("Undefined Behavior: Reading from an uninitialized pointer of type `{ptr_operand_ty}`"), + ) + } + + /// Inject a store into shadow memory tracking memory initialization to initialize or + /// deinitialize all non-padding bytes. + fn build_set( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + operation: MemoryInitOp, + pointee_info: PointeeInfo, + skip_first: &mut HashSet, + ) { + let ret_place = Place { + local: body.new_local(Ty::new_tuple(&[]), source.span(body.blocks()), Mutability::Not), + projection: vec![], + }; + let ptr_operand = operation.mk_operand(body, source); + let value = operation.expect_value(); + + match pointee_info.layout() { + PointeeLayout::Sized { layout } => { + let set_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, "KaniSetPtrInitialized", &mut self.mem_init_fn_cache), + layout.len(), + *pointee_info.ty(), + ); + let layout_operand = mk_layout_operand(body, source, operation.position(), &layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &set_ptr_initialized_instance, + source, + operation.position(), + vec![ + ptr_operand, + layout_operand, + operation.expect_count(), + Operand::Constant(ConstOperand { + span: source.span(body.blocks()), + user_ty: None, + const_: MirConst::from_bool(value), + }), + ], + ret_place, + ); + } + PointeeLayout::Slice { element_layout } => { + // Since `str`` is a separate type, need to differentiate between [T] and str. + let (slicee_ty, diagnostic) = match pointee_info.ty().kind() { + TyKind::RigidTy(RigidTy::Slice(slicee_ty)) => { + (slicee_ty, "KaniSetSlicePtrInitialized") + } + TyKind::RigidTy(RigidTy::Str) => { + (Ty::unsigned_ty(UintTy::U8), "KaniSetStrPtrInitialized") + } + _ => unreachable!(), + }; + let set_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, diagnostic, &mut self.mem_init_fn_cache), + element_layout.len(), + slicee_ty, + ); + let layout_operand = + mk_layout_operand(body, source, operation.position(), &element_layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &set_ptr_initialized_instance, + source, + operation.position(), + vec![ + ptr_operand, + layout_operand, + Operand::Constant(ConstOperand { + span: source.span(body.blocks()), + user_ty: None, + const_: MirConst::from_bool(value), + }), + ], + ret_place, + ); + } + PointeeLayout::TraitObject => { + unreachable!("Cannot change the initialization state of a trait object directly."); + } + }; + } + + fn unsupported_check( + &self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + position: InsertPosition, + reason: &str, + ) { + let span = source.span(body.blocks()); + let rvalue = Rvalue::Use(Operand::Constant(ConstOperand { + const_: MirConst::from_bool(false), + span, + user_ty: None, + })); + let result = body.new_assignment(rvalue, source, position); + body.add_check(tcx, &self.check_type, source, position, result, reason); + } +} + +/// Create an operand from a bit array that represents a byte mask for a type layout where padding +/// bytes are marked as `false` and data bytes are marked as `true`. +/// +/// For example, the layout for: +/// ``` +/// [repr(C)] +/// struct { +/// a: u16, +/// b: u8 +/// } +/// ``` +/// will have the following byte mask `[true, true, true, false]`. +pub fn mk_layout_operand( + body: &mut MutableBody, + source: &mut SourceInstruction, + position: InsertPosition, + layout_byte_mask: &[bool], +) -> Operand { + Operand::Move(Place { + local: body.new_assignment( + Rvalue::Aggregate( + AggregateKind::Array(Ty::bool_ty()), + layout_byte_mask + .iter() + .map(|byte| { + Operand::Constant(ConstOperand { + span: source.span(body.blocks()), + user_ty: None, + const_: MirConst::from_bool(*byte), + }) + }) + .collect(), + ), + source, + position, + ), + projection: vec![], + }) +} + +/// If injecting a new call to the function before the current statement, need to skip the original +/// statement when analyzing it as a part of the new basic block. +fn collect_skipped(operation: &MemoryInitOp, body: &MutableBody, skip_first: &mut HashSet) { + if operation.position() == InsertPosition::Before { + let new_bb_idx = body.blocks().len(); + skip_first.insert(new_bb_idx); + } +} + +/// Retrieve a function definition by diagnostic string, caching the result. +pub fn get_mem_init_fn_def( + tcx: TyCtxt, + diagnostic: &'static str, + cache: &mut HashMap<&'static str, FnDef>, +) -> FnDef { + let entry = cache.entry(diagnostic).or_insert_with(|| find_fn_def(tcx, diagnostic).unwrap()); + *entry +} + +/// Resolves a given memory initialization function with passed type parameters. +pub fn resolve_mem_init_fn(fn_def: FnDef, layout_size: usize, associated_type: Ty) -> Instance { + Instance::resolve( + fn_def, + &GenericArgs(vec![ + GenericArgKind::Const(TyConst::try_from_target_usize(layout_size as u64).unwrap()), + GenericArgKind::Type(associated_type), + ]), + ) + .unwrap() +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs new file mode 100644 index 000000000000..09116230af80 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs @@ -0,0 +1,334 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Utility functions that help calculate type layout. + +use stable_mir::abi::{FieldsShape, Scalar, TagEncoding, ValueAbi, VariantsShape}; +use stable_mir::target::{MachineInfo, MachineSize}; +use stable_mir::ty::{AdtKind, IndexedVal, RigidTy, Ty, TyKind, UintTy}; +use stable_mir::CrateDef; + +/// Represents a chunk of data bytes in a data structure. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +struct DataBytes { + /// Offset in bytes. + offset: usize, + /// Size of this data chunk. + size: MachineSize, +} + +/// Bytewise mask, representing which bytes of a type are data and which are padding. Here, `false` +/// represents padding bytes and `true` represents data bytes. +type Layout = Vec; + +/// Create a byte-wise mask from known chunks of data bytes. +fn generate_byte_mask(size_in_bytes: usize, data_chunks: Vec) -> Vec { + let mut layout_mask = vec![false; size_in_bytes]; + for data_bytes in data_chunks.iter() { + for layout_item in + layout_mask.iter_mut().skip(data_bytes.offset).take(data_bytes.size.bytes()) + { + *layout_item = true; + } + } + layout_mask +} + +// Depending on whether the type is statically or dynamically sized, +// the layout of the element or the layout of the actual type is returned. +pub enum PointeeLayout { + /// Layout of sized objects. + Sized { layout: Layout }, + /// Layout of slices, *const/mut str is included in this case and treated as *const/mut [u8]. + Slice { element_layout: Layout }, + /// Trait objects have an arbitrary layout. + TraitObject, +} + +pub struct PointeeInfo { + pointee_ty: Ty, + layout: PointeeLayout, +} + +impl PointeeInfo { + pub fn from_ty(ty: Ty) -> Result { + match ty.kind() { + TyKind::RigidTy(rigid_ty) => match rigid_ty { + RigidTy::Str => { + let slicee_ty = Ty::unsigned_ty(UintTy::U8); + let size_in_bytes = slicee_ty.layout().unwrap().shape().size.bytes(); + let data_chunks = data_bytes_for_ty(&MachineInfo::target(), slicee_ty, 0)?; + let layout = PointeeLayout::Slice { + element_layout: generate_byte_mask(size_in_bytes, data_chunks), + }; + Ok(PointeeInfo { pointee_ty: ty, layout }) + } + RigidTy::Slice(slicee_ty) => { + let size_in_bytes = slicee_ty.layout().unwrap().shape().size.bytes(); + let data_chunks = data_bytes_for_ty(&MachineInfo::target(), slicee_ty, 0)?; + let layout = PointeeLayout::Slice { + element_layout: generate_byte_mask(size_in_bytes, data_chunks), + }; + Ok(PointeeInfo { pointee_ty: ty, layout }) + } + RigidTy::Dynamic(..) => { + Ok(PointeeInfo { pointee_ty: ty, layout: PointeeLayout::TraitObject }) + } + _ => { + if ty.layout().unwrap().shape().is_sized() { + let size_in_bytes = ty.layout().unwrap().shape().size.bytes(); + let data_chunks = data_bytes_for_ty(&MachineInfo::target(), ty, 0)?; + let layout = PointeeLayout::Sized { + layout: generate_byte_mask(size_in_bytes, data_chunks), + }; + Ok(PointeeInfo { pointee_ty: ty, layout }) + } else { + Err(format!("Cannot determine type layout for type `{ty}`")) + } + } + }, + TyKind::Alias(..) | TyKind::Param(..) | TyKind::Bound(..) => { + unreachable!("Should only encounter monomorphized types at this point.") + } + } + } + + pub fn ty(&self) -> &Ty { + &self.pointee_ty + } + + pub fn layout(&self) -> &PointeeLayout { + &self.layout + } +} + +/// Retrieve a set of data bytes with offsets for a type. +fn data_bytes_for_ty( + machine_info: &MachineInfo, + ty: Ty, + current_offset: usize, +) -> Result, String> { + let layout = ty.layout().unwrap().shape(); + + match layout.fields { + FieldsShape::Primitive => Ok(vec![match layout.abi { + ValueAbi::Scalar(Scalar::Initialized { value, .. }) => { + DataBytes { offset: current_offset, size: value.size(machine_info) } + } + _ => unreachable!("FieldsShape::Primitive with a different ABI than ValueAbi::Scalar"), + }]), + FieldsShape::Array { stride, count } if count > 0 => { + let TyKind::RigidTy(RigidTy::Array(elem_ty, _)) = ty.kind() else { unreachable!() }; + let elem_data_bytes = data_bytes_for_ty(machine_info, elem_ty, current_offset)?; + let mut result = vec![]; + if !elem_data_bytes.is_empty() { + for idx in 0..count { + let idx: usize = idx.try_into().unwrap(); + let elem_offset = idx * stride.bytes(); + let mut next_data_bytes = elem_data_bytes + .iter() + .cloned() + .map(|mut req| { + req.offset += elem_offset; + req + }) + .collect::>(); + result.append(&mut next_data_bytes) + } + } + Ok(result) + } + FieldsShape::Arbitrary { ref offsets } => { + match ty.kind().rigid().expect(&format!("unexpected type: {ty:?}")) { + RigidTy::Adt(def, args) => { + match def.kind() { + AdtKind::Enum => { + // Support basic enumeration forms + let ty_variants = def.variants(); + match layout.variants { + VariantsShape::Single { index } => { + // Only one variant is reachable. This behaves like a struct. + let fields = ty_variants[index.to_index()].fields(); + let mut fields_data_bytes = vec![]; + for idx in layout.fields.fields_by_offset_order() { + let field_offset = offsets[idx].bytes(); + let field_ty = fields[idx].ty_with_args(&args); + fields_data_bytes.append(&mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?); + } + Ok(fields_data_bytes) + } + VariantsShape::Multiple { + tag_encoding: TagEncoding::Niche { .. }, + .. + } => { + Err(format!("Unsupported Enum `{}` check", def.trimmed_name()))? + } + VariantsShape::Multiple { variants, tag, .. } => { + // Retrieve data bytes for the tag. + let tag_size = match tag { + Scalar::Initialized { value, .. } => { + value.size(&machine_info) + } + Scalar::Union { .. } => { + unreachable!("Enum tag should not be a union.") + } + }; + // For enums, tag is the only field and should have offset of 0. + assert!(offsets.len() == 1 && offsets[0].bytes() == 0); + let tag_data_bytes = + vec![DataBytes { offset: current_offset, size: tag_size }]; + + // Retrieve data bytes for the fields. + let mut fields_data_bytes = vec![]; + // Iterate over all variants for the enum. + for (index, variant) in variants.iter().enumerate() { + let mut field_data_bytes_for_variant = vec![]; + let fields = ty_variants[index].fields(); + // Get offsets of all fields in a variant. + let FieldsShape::Arbitrary { offsets: field_offsets } = + variant.fields.clone() + else { + unreachable!() + }; + for field_idx in variant.fields.fields_by_offset_order() { + let field_offset = field_offsets[field_idx].bytes(); + let field_ty = fields[field_idx].ty_with_args(&args); + field_data_bytes_for_variant.append( + &mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?, + ); + } + fields_data_bytes.push(field_data_bytes_for_variant); + } + + if fields_data_bytes.is_empty() { + // If there are no fields, return the tag data bytes. + Ok(tag_data_bytes) + } else if fields_data_bytes.iter().all( + |data_bytes_for_variant| { + // Byte layout for variant N. + let byte_mask_for_variant = generate_byte_mask( + layout.size.bytes(), + data_bytes_for_variant.clone(), + ); + // Byte layout for variant 0. + let byte_mask_for_first = generate_byte_mask( + layout.size.bytes(), + fields_data_bytes.first().unwrap().clone(), + ); + byte_mask_for_variant == byte_mask_for_first + }, + ) { + // If all fields have the same layout, return fields data + // bytes. + let mut total_data_bytes = tag_data_bytes; + let mut field_data_bytes = + fields_data_bytes.first().unwrap().clone(); + total_data_bytes.append(&mut field_data_bytes); + Ok(total_data_bytes) + } else { + // Struct has multiple padding variants, Kani cannot + // differentiate between them. + Err(format!( + "Unsupported Enum `{}` check", + def.trimmed_name() + )) + } + } + } + } + AdtKind::Union => unreachable!(), + AdtKind::Struct => { + let mut struct_data_bytes = vec![]; + let fields = def.variants_iter().next().unwrap().fields(); + for idx in layout.fields.fields_by_offset_order() { + let field_offset = offsets[idx].bytes(); + let field_ty = fields[idx].ty_with_args(&args); + struct_data_bytes.append(&mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?); + } + Ok(struct_data_bytes) + } + } + } + RigidTy::Pat(base_ty, ..) => { + // This is similar to a structure with one field and with niche defined. + let mut pat_data_bytes = vec![]; + pat_data_bytes.append(&mut data_bytes_for_ty(machine_info, *base_ty, 0)?); + Ok(pat_data_bytes) + } + RigidTy::Tuple(tys) => { + let mut tuple_data_bytes = vec![]; + for idx in layout.fields.fields_by_offset_order() { + let field_offset = offsets[idx].bytes(); + let field_ty = tys[idx]; + tuple_data_bytes.append(&mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?); + } + Ok(tuple_data_bytes) + } + RigidTy::Bool + | RigidTy::Char + | RigidTy::Int(_) + | RigidTy::Uint(_) + | RigidTy::Float(_) + | RigidTy::Never => { + unreachable!("Expected primitive layout for {ty:?}") + } + RigidTy::Str | RigidTy::Slice(_) | RigidTy::Array(_, _) => { + unreachable!("Expected array layout for {ty:?}") + } + RigidTy::RawPtr(_, _) | RigidTy::Ref(_, _, _) => Ok(match layout.abi { + ValueAbi::Scalar(Scalar::Initialized { value, .. }) => { + // Thin pointer, ABI is a single scalar. + vec![DataBytes { offset: current_offset, size: value.size(machine_info) }] + } + ValueAbi::ScalarPair( + Scalar::Initialized { value: value_first, .. }, + Scalar::Initialized { value: value_second, .. }, + ) => { + // Fat pointer, ABI is a scalar pair. + let FieldsShape::Arbitrary { offsets } = layout.fields else { + unreachable!() + }; + // Since this is a scalar pair, only 2 elements are in the offsets vec. + assert!(offsets.len() == 2); + vec![ + DataBytes { + offset: current_offset + offsets[0].bytes(), + size: value_first.size(machine_info), + }, + DataBytes { + offset: current_offset + offsets[1].bytes(), + size: value_second.size(machine_info), + }, + ] + } + _ => unreachable!("RigidTy::RawPtr | RigidTy::Ref with a non-scalar ABI."), + }), + RigidTy::FnDef(_, _) + | RigidTy::FnPtr(_) + | RigidTy::Closure(_, _) + | RigidTy::Coroutine(_, _, _) + | RigidTy::CoroutineWitness(_, _) + | RigidTy::Foreign(_) + | RigidTy::Dynamic(_, _, _) => Err(format!("Unsupported {ty:?}")), + } + } + FieldsShape::Union(_) => Err(format!("Unsupported {ty:?}")), + FieldsShape::Array { .. } => Ok(vec![]), + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs new file mode 100644 index 000000000000..19b13c6ab8b6 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs @@ -0,0 +1,713 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Visitor that collects all instructions relevant to uninitialized memory access. + +use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use stable_mir::mir::alloc::GlobalAlloc; +use stable_mir::mir::mono::{Instance, InstanceKind}; +use stable_mir::mir::visit::{Location, PlaceContext}; +use stable_mir::mir::{ + BasicBlockIdx, CastKind, ConstOperand, LocalDecl, MirVisitor, Mutability, + NonDivergingIntrinsic, Operand, Place, PointerCoercion, ProjectionElem, Rvalue, Statement, + StatementKind, Terminator, TerminatorKind, +}; +use stable_mir::ty::{ConstantKind, MirConst, RigidTy, Span, TyKind, UintTy}; +use strum_macros::AsRefStr; + +/// Memory initialization operations: set or get memory initialization state for a given pointer. +#[derive(AsRefStr, Clone, Debug)] +pub enum MemoryInitOp { + /// Check memory initialization of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + Check { operand: Operand, count: Operand }, + /// Set memory initialization state of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + Set { operand: Operand, count: Operand, value: bool, position: InsertPosition }, + /// Set memory initialization of data bytes in a memory region starting from the reference to + /// `operand` and of length `count * sizeof(operand)` bytes. + SetRef { operand: Operand, count: Operand, value: bool, position: InsertPosition }, + /// Unsupported memory initialization operation. + Unsupported { reason: String }, +} + +impl MemoryInitOp { + /// Produce an operand for the relevant memory initialization related operation. This is mostly + /// required so that the analysis can create a new local to take a reference in + /// `MemoryInitOp::SetRef`. + pub fn mk_operand(&self, body: &mut MutableBody, source: &mut SourceInstruction) -> Operand { + match self { + MemoryInitOp::Check { operand, .. } | MemoryInitOp::Set { operand, .. } => { + operand.clone() + } + MemoryInitOp::SetRef { operand, .. } => Operand::Copy(Place { + local: { + let place = match operand { + Operand::Copy(place) | Operand::Move(place) => place, + Operand::Constant(_) => unreachable!(), + }; + body.new_assignment( + Rvalue::AddressOf(Mutability::Not, place.clone()), + source, + self.position(), + ) + }, + projection: vec![], + }), + MemoryInitOp::Unsupported { .. } => unreachable!(), + } + } + + pub fn expect_count(&self) -> Operand { + match self { + MemoryInitOp::Check { count, .. } + | MemoryInitOp::Set { count, .. } + | MemoryInitOp::SetRef { count, .. } => count.clone(), + MemoryInitOp::Unsupported { .. } => unreachable!(), + } + } + + pub fn expect_value(&self) -> bool { + match self { + MemoryInitOp::Set { value, .. } | MemoryInitOp::SetRef { value, .. } => *value, + MemoryInitOp::Check { .. } | MemoryInitOp::Unsupported { .. } => unreachable!(), + } + } + + pub fn position(&self) -> InsertPosition { + match self { + MemoryInitOp::Set { position, .. } | MemoryInitOp::SetRef { position, .. } => *position, + MemoryInitOp::Check { .. } | MemoryInitOp::Unsupported { .. } => InsertPosition::Before, + } + } +} + +/// Represents an instruction in the source code together with all memory initialization checks/sets +/// that are connected to the memory used in this instruction and whether they should be inserted +/// before or after the instruction. +#[derive(Clone, Debug)] +pub struct InitRelevantInstruction { + /// The instruction that affects the state of the memory. + pub source: SourceInstruction, + /// All memory-related operations that should happen after the instruction. + pub before_instruction: Vec, + /// All memory-related operations that should happen after the instruction. + pub after_instruction: Vec, +} + +impl InitRelevantInstruction { + pub fn push_operation(&mut self, source_op: MemoryInitOp) { + match source_op.position() { + InsertPosition::Before => self.before_instruction.push(source_op), + InsertPosition::After => self.after_instruction.push(source_op), + } + } +} + +pub struct CheckUninitVisitor<'a> { + locals: &'a [LocalDecl], + /// Whether we should skip the next instruction, since it might've been instrumented already. + /// When we instrument an instruction, we partition the basic block, and the instruction that + /// may trigger UB becomes the first instruction of the basic block, which we need to skip + /// later. + skip_next: bool, + /// The instruction being visited at a given point. + current: SourceInstruction, + /// The target instruction that should be verified. + pub target: Option, + /// The basic block being visited. + bb: BasicBlockIdx, +} + +impl<'a> CheckUninitVisitor<'a> { + pub fn find_next( + body: &'a MutableBody, + bb: BasicBlockIdx, + skip_first: bool, + ) -> Option { + let mut visitor = CheckUninitVisitor { + locals: body.locals(), + skip_next: skip_first, + current: SourceInstruction::Statement { idx: 0, bb }, + target: None, + bb, + }; + visitor.visit_basic_block(&body.blocks()[bb]); + visitor.target + } + + fn push_target(&mut self, source_op: MemoryInitOp) { + let target = self.target.get_or_insert_with(|| InitRelevantInstruction { + source: self.current, + after_instruction: vec![], + before_instruction: vec![], + }); + target.push_operation(source_op); + } +} + +impl<'a> MirVisitor for CheckUninitVisitor<'a> { + fn visit_statement(&mut self, stmt: &Statement, location: Location) { + if self.skip_next { + self.skip_next = false; + } else if self.target.is_none() { + // Leave it as an exhaustive match to be notified when a new kind is added. + match &stmt.kind { + StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(copy)) => { + self.super_statement(stmt, location); + // Source is a *const T and it must be initialized. + self.push_target(MemoryInitOp::Check { + operand: copy.src.clone(), + count: copy.count.clone(), + }); + // Destimation is a *mut T so it gets initialized. + self.push_target(MemoryInitOp::Set { + operand: copy.dst.clone(), + count: copy.count.clone(), + value: true, + position: InsertPosition::After, + }); + } + StatementKind::Assign(place, rvalue) => { + // First check rvalue. + self.visit_rvalue(rvalue, location); + // Check whether we are assigning into a dereference (*ptr = _). + if let Some(place_without_deref) = try_remove_topmost_deref(place) { + // First, check that we are not dereferencing extra pointers along the way + // (e.g., **ptr = _). If yes, check whether these pointers are initialized. + let mut place_to_add_projections = + Place { local: place_without_deref.local, projection: vec![] }; + for projection_elem in place_without_deref.projection.iter() { + // If the projection is Deref and the current type is raw pointer, check + // if it points to initialized memory. + if *projection_elem == ProjectionElem::Deref { + if let TyKind::RigidTy(RigidTy::RawPtr(..)) = + place_to_add_projections.ty(&self.locals).unwrap().kind() + { + self.push_target(MemoryInitOp::Check { + operand: Operand::Copy(place_to_add_projections.clone()), + count: mk_const_operand(1, location.span()), + }); + }; + } + place_to_add_projections.projection.push(projection_elem.clone()); + } + if place_without_deref.ty(&self.locals).unwrap().kind().is_raw_ptr() { + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place_without_deref), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::After, + }); + } + } + // Check whether Rvalue creates a new initialized pointer previously not captured inside shadow memory. + if place.ty(&self.locals).unwrap().kind().is_raw_ptr() { + if let Rvalue::AddressOf(..) = rvalue { + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::After, + }); + } + } + } + StatementKind::Deinit(place) => { + self.super_statement(stmt, location); + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: false, + position: InsertPosition::After, + }); + } + StatementKind::FakeRead(_, _) + | StatementKind::SetDiscriminant { .. } + | StatementKind::StorageLive(_) + | StatementKind::StorageDead(_) + | StatementKind::Retag(_, _) + | StatementKind::PlaceMention(_) + | StatementKind::AscribeUserType { .. } + | StatementKind::Coverage(_) + | StatementKind::ConstEvalCounter + | StatementKind::Intrinsic(NonDivergingIntrinsic::Assume(_)) + | StatementKind::Nop => self.super_statement(stmt, location), + } + } + let SourceInstruction::Statement { idx, bb } = self.current else { unreachable!() }; + self.current = SourceInstruction::Statement { idx: idx + 1, bb }; + } + + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if !(self.skip_next || self.target.is_some()) { + self.current = SourceInstruction::Terminator { bb: self.bb }; + // Leave it as an exhaustive match to be notified when a new kind is added. + match &term.kind { + TerminatorKind::Call { func, args, destination, .. } => { + self.super_terminator(term, location); + let instance = match try_resolve_instance(self.locals, func) { + Ok(instance) => instance, + Err(reason) => { + self.super_terminator(term, location); + self.push_target(MemoryInitOp::Unsupported { reason }); + return; + } + }; + match instance.kind { + InstanceKind::Intrinsic => { + match instance.intrinsic_name().unwrap().as_str() { + intrinsic_name if can_skip_intrinsic(intrinsic_name) => { + /* Intrinsics that can be safely skipped */ + } + name if name.starts_with("atomic") => { + let num_args = + if name.starts_with("atomic_cxchg") { 3 } else { 2 }; + assert_eq!( + args.len(), + num_args, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(..)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "compare_bytes" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `compare_bytes`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + assert!(matches!( + args[1].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: args[2].clone(), + }); + self.push_target(MemoryInitOp::Check { + operand: args[1].clone(), + count: args[2].clone(), + }); + } + "copy" + | "volatile_copy_memory" + | "volatile_copy_nonoverlapping_memory" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `copy`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + assert!(matches!( + args[1].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: args[2].clone(), + }); + self.push_target(MemoryInitOp::Set { + operand: args[1].clone(), + count: args[2].clone(), + value: true, + position: InsertPosition::After, + }); + } + "typed_swap" => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `typed_swap`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + assert!(matches!( + args[1].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + self.push_target(MemoryInitOp::Check { + operand: args[1].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "unaligned_volatile_load" => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `unaligned_volatile_load`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "volatile_load" => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `volatile_load`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "volatile_store" => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `volatile_store`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Set { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::After, + }); + } + "write_bytes" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `write_bytes`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Set { + operand: args[0].clone(), + count: args[2].clone(), + value: true, + position: InsertPosition::After, + }) + } + intrinsic => { + self.push_target(MemoryInitOp::Unsupported { + reason: format!("Kani does not support reasoning about memory initialization of intrinsic `{intrinsic}`."), + }); + } + } + } + InstanceKind::Item => { + if instance.is_foreign_item() { + match instance.name().as_str() { + "alloc::alloc::__rust_alloc" + | "alloc::alloc::__rust_realloc" => { + /* Memory is uninitialized, nothing to do here. */ + } + "alloc::alloc::__rust_alloc_zeroed" => { + /* Memory is initialized here, need to update shadow memory. */ + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(destination.clone()), + count: args[0].clone(), + value: true, + position: InsertPosition::After, + }); + } + "alloc::alloc::__rust_dealloc" => { + /* Memory is uninitialized here, need to update shadow memory. */ + self.push_target(MemoryInitOp::Set { + operand: args[0].clone(), + count: args[1].clone(), + value: false, + position: InsertPosition::After, + }); + } + _ => {} + } + } + } + _ => {} + } + } + TerminatorKind::Drop { place, .. } => { + self.super_terminator(term, location); + let place_ty = place.ty(&self.locals).unwrap(); + // When drop is codegen'ed, a reference is taken to the place which is later implicitly coerced to a pointer. + // Hence, we need to bless this pointer as initialized. + self.push_target(MemoryInitOp::SetRef { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::Before, + }); + if place_ty.kind().is_raw_ptr() { + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: false, + position: InsertPosition::After, + }); + } + } + TerminatorKind::Goto { .. } + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::Resume + | TerminatorKind::Abort + | TerminatorKind::Return + | TerminatorKind::Unreachable + | TerminatorKind::Assert { .. } + | TerminatorKind::InlineAsm { .. } => self.super_terminator(term, location), + } + } + } + + fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) { + for (idx, elem) in place.projection.iter().enumerate() { + let intermediate_place = + Place { local: place.local, projection: place.projection[..idx].to_vec() }; + match elem { + ProjectionElem::Deref => { + let ptr_ty = intermediate_place.ty(self.locals).unwrap(); + if ptr_ty.kind().is_raw_ptr() { + self.push_target(MemoryInitOp::Check { + operand: Operand::Copy(intermediate_place.clone()), + count: mk_const_operand(1, location.span()), + }); + } + } + ProjectionElem::Field(idx, target_ty) => { + if target_ty.kind().is_union() + && (!ptx.is_mutating() || place.projection.len() > idx + 1) + { + self.push_target(MemoryInitOp::Unsupported { + reason: "Kani does not support reasoning about memory initialization of unions.".to_string(), + }); + } + } + ProjectionElem::Index(_) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. } => { + /* For a slice to be indexed, it should be valid first. */ + } + ProjectionElem::Downcast(_) => {} + ProjectionElem::OpaqueCast(_) => {} + ProjectionElem::Subtype(_) => {} + } + } + self.super_place(place, ptx, location) + } + + fn visit_operand(&mut self, operand: &Operand, location: Location) { + if let Operand::Constant(constant) = operand { + if let ConstantKind::Allocated(allocation) = constant.const_.kind() { + for (_, prov) in &allocation.provenance.ptrs { + if let GlobalAlloc::Static(_) = GlobalAlloc::from(prov.0) { + self.push_target(MemoryInitOp::Set { + operand: Operand::Constant(constant.clone()), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::Before, + }); + }; + } + } + } + self.super_operand(operand, location); + } + + fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { + if let Rvalue::Cast(CastKind::PointerCoercion(PointerCoercion::Unsize), _, ty) = rvalue { + if let TyKind::RigidTy(RigidTy::RawPtr(pointee_ty, _)) = ty.kind() { + if pointee_ty.kind().is_trait() { + self.push_target(MemoryInitOp::Unsupported { + reason: "Kani does not support reasoning about memory initialization of unsized pointers.".to_string(), + }); + } + } + }; + self.super_rvalue(rvalue, location); + } +} + +/// Determines if the intrinsic has no memory initialization related function and hence can be +/// safely skipped. +fn can_skip_intrinsic(intrinsic_name: &str) -> bool { + match intrinsic_name { + "add_with_overflow" + | "arith_offset" + | "assert_inhabited" + | "assert_mem_uninitialized_valid" + | "assert_zero_valid" + | "assume" + | "bitreverse" + | "black_box" + | "breakpoint" + | "bswap" + | "caller_location" + | "ceilf32" + | "ceilf64" + | "copysignf32" + | "copysignf64" + | "cosf32" + | "cosf64" + | "ctlz" + | "ctlz_nonzero" + | "ctpop" + | "cttz" + | "cttz_nonzero" + | "discriminant_value" + | "exact_div" + | "exp2f32" + | "exp2f64" + | "expf32" + | "expf64" + | "fabsf32" + | "fabsf64" + | "fadd_fast" + | "fdiv_fast" + | "floorf32" + | "floorf64" + | "fmaf32" + | "fmaf64" + | "fmul_fast" + | "forget" + | "fsub_fast" + | "is_val_statically_known" + | "likely" + | "log10f32" + | "log10f64" + | "log2f32" + | "log2f64" + | "logf32" + | "logf64" + | "maxnumf32" + | "maxnumf64" + | "min_align_of" + | "min_align_of_val" + | "minnumf32" + | "minnumf64" + | "mul_with_overflow" + | "nearbyintf32" + | "nearbyintf64" + | "needs_drop" + | "powf32" + | "powf64" + | "powif32" + | "powif64" + | "pref_align_of" + | "raw_eq" + | "rintf32" + | "rintf64" + | "rotate_left" + | "rotate_right" + | "roundf32" + | "roundf64" + | "saturating_add" + | "saturating_sub" + | "sinf32" + | "sinf64" + | "sqrtf32" + | "sqrtf64" + | "sub_with_overflow" + | "truncf32" + | "truncf64" + | "type_id" + | "type_name" + | "unchecked_div" + | "unchecked_rem" + | "unlikely" + | "vtable_size" + | "vtable_align" + | "wrapping_add" + | "wrapping_mul" + | "wrapping_sub" => { + /* Intrinsics that do not interact with memory initialization. */ + true + } + "ptr_guaranteed_cmp" | "ptr_offset_from" | "ptr_offset_from_unsigned" | "size_of_val" => { + /* AFAICS from the documentation, none of those require the pointer arguments to be actually initialized. */ + true + } + name if name.starts_with("simd") => { + /* SIMD operations */ + true + } + "copy_nonoverlapping" => unreachable!( + "Expected `core::intrinsics::unreachable` to be handled by `StatementKind::CopyNonOverlapping`" + ), + "offset" => unreachable!( + "Expected `core::intrinsics::unreachable` to be handled by `BinOp::OffSet`" + ), + "unreachable" => unreachable!( + "Expected `std::intrinsics::unreachable` to be handled by `TerminatorKind::Unreachable`" + ), + "transmute" | "transmute_copy" | "unchecked_add" | "unchecked_mul" | "unchecked_shl" + | "size_of" | "unchecked_shr" | "unchecked_sub" => { + unreachable!("Expected intrinsic to be lowered before codegen") + } + "catch_unwind" => { + unimplemented!("") + } + "retag_box_to_raw" => { + unreachable!("This was removed in the latest Rust version.") + } + _ => { + /* Everything else */ + false + } + } +} + +/// Create a constant operand with a given value and span. +fn mk_const_operand(value: usize, span: Span) -> Operand { + Operand::Constant(ConstOperand { + span, + user_ty: None, + const_: MirConst::try_from_uint(value as u128, UintTy::Usize).unwrap(), + }) +} + +/// Try removing a topmost deref projection from a place if it exists, returning a place without it. +fn try_remove_topmost_deref(place: &Place) -> Option { + let mut new_place = place.clone(); + if let Some(ProjectionElem::Deref) = new_place.projection.pop() { + Some(new_place) + } else { + None + } +} + +/// Try retrieving instance for the given function operand. +fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result { + let ty = func.ty(locals).unwrap(); + match ty.kind() { + TyKind::RigidTy(RigidTy::FnDef(def, args)) => Ok(Instance::resolve(def, &args).unwrap()), + _ => Err(format!( + "Kani does not support reasoning about memory initialization of arguments to `{ty:?}`." + )), + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_values.rs b/kani-compiler/src/kani_middle/transform/check_values.rs index 4ecb16fab023..a7d0f14d270f 100644 --- a/kani-compiler/src/kani_middle/transform/check_values.rs +++ b/kani-compiler/src/kani_middle/transform/check_values.rs @@ -14,7 +14,9 @@ //! 1. We could merge the invalid values by the offset. //! 2. We could avoid checking places that have been checked before. use crate::args::ExtraChecks; -use crate::kani_middle::transform::body::{CheckType, MutableBody, SourceInstruction}; +use crate::kani_middle::transform::body::{ + CheckType, InsertPosition, MutableBody, SourceInstruction, +}; use crate::kani_middle::transform::check_values::SourceOp::UnsupportedCheck; use crate::kani_middle::transform::{TransformPass, TransformationType}; use crate::kani_queries::QueryDb; @@ -59,7 +61,7 @@ impl TransformPass for ValidValuePass { /// Transform the function body by inserting checks one-by-one. /// For every unsafe dereference or a transmute operation, we check all values are valid. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); let mut new_body = MutableBody::from(body); let orig_len = new_body.blocks().len(); @@ -83,13 +85,20 @@ impl ValidValuePass { for operation in instruction.operations { match operation { SourceOp::BytesValidity { ranges, target_ty, rvalue } => { - let value = body.new_assignment(rvalue, &mut source); + let value = body.new_assignment(rvalue, &mut source, InsertPosition::Before); let rvalue_ptr = Rvalue::AddressOf(Mutability::Not, Place::from(value)); for range in ranges { let result = build_limits(body, &range, rvalue_ptr.clone(), &mut source); let msg = format!("Undefined Behavior: Invalid value of type `{target_ty}`",); - body.add_check(tcx, &self.check_type, &mut source, result, &msg); + body.add_check( + tcx, + &self.check_type, + &mut source, + InsertPosition::Before, + result, + &msg, + ); } } SourceOp::DerefValidity { pointee_ty, rvalue, ranges } => { @@ -97,7 +106,14 @@ impl ValidValuePass { let result = build_limits(body, &range, rvalue.clone(), &mut source); let msg = format!("Undefined Behavior: Invalid value of type `{pointee_ty}`",); - body.add_check(tcx, &self.check_type, &mut source, result, &msg); + body.add_check( + tcx, + &self.check_type, + &mut source, + InsertPosition::Before, + result, + &msg, + ); } } SourceOp::UnsupportedCheck { check, ty } => { @@ -123,8 +139,8 @@ impl ValidValuePass { span, user_ty: None, })); - let result = body.new_assignment(rvalue, source); - body.add_check(tcx, &self.check_type, source, result, reason); + let result = body.new_assignment(rvalue, source, InsertPosition::Before); + body.add_check(tcx, &self.check_type, source, InsertPosition::Before, result, reason); } } @@ -758,30 +774,60 @@ pub fn build_limits( let start_const = body.new_const_operand(req.valid_range.start, primitive_ty, span); let end_const = body.new_const_operand(req.valid_range.end, primitive_ty, span); let orig_ptr = if req.offset != 0 { - let start_ptr = move_local(body.new_assignment(rvalue_ptr, source)); + let start_ptr = move_local(body.new_assignment(rvalue_ptr, source, InsertPosition::Before)); let byte_ptr = move_local(body.new_cast_ptr( start_ptr, Ty::unsigned_ty(UintTy::U8), Mutability::Not, source, + InsertPosition::Before, )); let offset_const = body.new_const_operand(req.offset as _, UintTy::Usize, span); - let offset = move_local(body.new_assignment(Rvalue::Use(offset_const), source)); - move_local(body.new_binary_op(BinOp::Offset, byte_ptr, offset, source)) + let offset = move_local(body.new_assignment( + Rvalue::Use(offset_const), + source, + InsertPosition::Before, + )); + move_local(body.new_binary_op( + BinOp::Offset, + byte_ptr, + offset, + source, + InsertPosition::Before, + )) } else { - move_local(body.new_assignment(rvalue_ptr, source)) + move_local(body.new_assignment(rvalue_ptr, source, InsertPosition::Before)) }; - let value_ptr = - body.new_cast_ptr(orig_ptr, Ty::unsigned_ty(primitive_ty), Mutability::Not, source); + let value_ptr = body.new_cast_ptr( + orig_ptr, + Ty::unsigned_ty(primitive_ty), + Mutability::Not, + source, + InsertPosition::Before, + ); let value = Operand::Copy(Place { local: value_ptr, projection: vec![ProjectionElem::Deref] }); - let start_result = body.new_binary_op(BinOp::Ge, value.clone(), start_const, source); - let end_result = body.new_binary_op(BinOp::Le, value, end_const, source); + let start_result = + body.new_binary_op(BinOp::Ge, value.clone(), start_const, source, InsertPosition::Before); + let end_result = + body.new_binary_op(BinOp::Le, value, end_const, source, InsertPosition::Before); if req.valid_range.wraps_around() { // valid >= start || valid <= end - body.new_binary_op(BinOp::BitOr, move_local(start_result), move_local(end_result), source) + body.new_binary_op( + BinOp::BitOr, + move_local(start_result), + move_local(end_result), + source, + InsertPosition::Before, + ) } else { // valid >= start && valid <= end - body.new_binary_op(BinOp::BitAnd, move_local(start_result), move_local(end_result), source) + body.new_binary_op( + BinOp::BitAnd, + move_local(start_result), + move_local(end_result), + source, + InsertPosition::Before, + ) } } diff --git a/kani-compiler/src/kani_middle/transform/contracts.rs b/kani-compiler/src/kani_middle/transform/contracts.rs index f5760bd4d829..eb5266e0a0eb 100644 --- a/kani-compiler/src/kani_middle/transform/contracts.rs +++ b/kani-compiler/src/kani_middle/transform/contracts.rs @@ -47,7 +47,7 @@ impl TransformPass for AnyModifiesPass { } /// Transform the function body by replacing it with the stub body. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "AnyModifiesPass::transform"); if instance.def.def_id() == self.kani_any.unwrap().def_id() { diff --git a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs index 53bbd52086ac..ea7bf8625228 100644 --- a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs +++ b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs @@ -7,8 +7,12 @@ //! information; thus, they are implemented as a transformation pass where their body get generated //! by the transformation. +use crate::args::{Arguments, ExtraChecks}; use crate::kani_middle::attributes::matches_diagnostic; -use crate::kani_middle::transform::body::{CheckType, MutableBody, SourceInstruction}; +use crate::kani_middle::transform::body::{ + CheckType, InsertPosition, MutableBody, SourceInstruction, +}; +use crate::kani_middle::transform::check_uninit::PointeeInfo; use crate::kani_middle::transform::check_values::{build_limits, ty_validity_per_offset}; use crate::kani_middle::transform::{TransformPass, TransformationType}; use crate::kani_queries::QueryDb; @@ -18,15 +22,24 @@ use stable_mir::mir::{ BinOp, Body, ConstOperand, Operand, Place, Rvalue, Statement, StatementKind, RETURN_LOCAL, }; use stable_mir::target::MachineInfo; -use stable_mir::ty::{MirConst, RigidTy, TyKind}; +use stable_mir::ty::{FnDef, MirConst, RigidTy, Ty, TyKind, UintTy}; +use std::collections::HashMap; use std::fmt::Debug; use strum_macros::AsRefStr; use tracing::trace; +use super::check_uninit::{ + get_mem_init_fn_def, mk_layout_operand, resolve_mem_init_fn, PointeeLayout, +}; + /// Generate the body for a few Kani intrinsics. #[derive(Debug)] pub struct IntrinsicGeneratorPass { pub check_type: CheckType, + /// Used to cache FnDef lookups of injected memory initialization functions. + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, + /// Used to enable intrinsics depending on the flags passed. + pub arguments: Arguments, } impl TransformPass for IntrinsicGeneratorPass { @@ -46,10 +59,12 @@ impl TransformPass for IntrinsicGeneratorPass { /// Transform the function body by inserting checks one-by-one. /// For every unsafe dereference or a transmute operation, we check all values are valid. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); if matches_diagnostic(tcx, instance.def, Intrinsics::KaniValidValue.as_ref()) { (true, self.valid_value_body(tcx, body)) + } else if matches_diagnostic(tcx, instance.def, Intrinsics::KaniIsInitialized.as_ref()) { + (true, self.is_initialized_body(tcx, body)) } else { (false, body) } @@ -86,7 +101,7 @@ impl IntrinsicGeneratorPass { })), ); let stmt = Statement { kind: assign, span }; - new_body.insert_stmt(stmt, &mut terminator); + new_body.insert_stmt(stmt, &mut terminator, InsertPosition::Before); let machine_info = MachineInfo::target(); // The first and only argument type. @@ -110,7 +125,7 @@ impl IntrinsicGeneratorPass { ); let assign = StatementKind::Assign(Place::from(ret_var), rvalue); let stmt = Statement { kind: assign, span }; - new_body.insert_stmt(stmt, &mut terminator); + new_body.insert_stmt(stmt, &mut terminator, InsertPosition::Before); } } Err(msg) => { @@ -120,11 +135,169 @@ impl IntrinsicGeneratorPass { span, user_ty: None, })); - let result = new_body.new_assignment(rvalue, &mut terminator); + let result = + new_body.new_assignment(rvalue, &mut terminator, InsertPosition::Before); let reason = format!( "Kani currently doesn't support checking validity of `{target_ty}`. {msg}" ); - new_body.add_check(tcx, &self.check_type, &mut terminator, result, &reason); + new_body.add_check( + tcx, + &self.check_type, + &mut terminator, + InsertPosition::Before, + result, + &reason, + ); + } + } + new_body.into() + } + + /// Generate the body for `is_initialized`, which looks like the following + /// + /// ``` + /// pub fn is_initialized(ptr: *const T, len: usize) -> bool { + /// let layout = ... // Byte mask representing the layout of T. + /// __kani_mem_init_sm_get(ptr, layout, len) + /// } + /// ``` + fn is_initialized_body(&mut self, tcx: TyCtxt, body: Body) -> Body { + let mut new_body = MutableBody::from(body); + new_body.clear_body(); + + // Initialize return variable with True. + let ret_var = RETURN_LOCAL; + let mut terminator = SourceInstruction::Terminator { bb: 0 }; + let span = new_body.locals()[ret_var].span; + let assign = StatementKind::Assign( + Place::from(ret_var), + Rvalue::Use(Operand::Constant(ConstOperand { + span, + user_ty: None, + const_: MirConst::from_bool(true), + })), + ); + let stmt = Statement { kind: assign, span }; + new_body.insert_stmt(stmt, &mut terminator, InsertPosition::Before); + + if !self.arguments.ub_check.contains(&ExtraChecks::Uninit) { + // Short-circut if uninitialized memory checks are not enabled. + return new_body.into(); + } + + // The first argument type. + let arg_ty = new_body.locals()[1].ty; + let TyKind::RigidTy(RigidTy::RawPtr(target_ty, _)) = arg_ty.kind() else { unreachable!() }; + let pointee_info = PointeeInfo::from_ty(target_ty); + match pointee_info { + Ok(pointee_info) => { + match pointee_info.layout() { + PointeeLayout::Sized { layout } => { + if layout.is_empty() { + // Encountered a ZST, so we can short-circut here. + return new_body.into(); + } + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def( + tcx, + "KaniIsPtrInitialized", + &mut self.mem_init_fn_cache, + ), + layout.len(), + *pointee_info.ty(), + ); + let layout_operand = mk_layout_operand( + &mut new_body, + &mut terminator, + InsertPosition::Before, + &layout, + ); + new_body.add_call( + &is_ptr_initialized_instance, + &mut terminator, + InsertPosition::Before, + vec![ + Operand::Copy(Place::from(1)), + layout_operand, + Operand::Copy(Place::from(2)), + ], + Place::from(ret_var), + ); + } + PointeeLayout::Slice { element_layout } => { + // Since `str`` is a separate type, need to differentiate between [T] and str. + let (slicee_ty, diagnostic) = match pointee_info.ty().kind() { + TyKind::RigidTy(RigidTy::Slice(slicee_ty)) => { + (slicee_ty, "KaniIsSlicePtrInitialized") + } + TyKind::RigidTy(RigidTy::Str) => { + (Ty::unsigned_ty(UintTy::U8), "KaniIsStrPtrInitialized") + } + _ => unreachable!(), + }; + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, diagnostic, &mut self.mem_init_fn_cache), + element_layout.len(), + slicee_ty, + ); + let layout_operand = mk_layout_operand( + &mut new_body, + &mut terminator, + InsertPosition::Before, + &element_layout, + ); + new_body.add_call( + &is_ptr_initialized_instance, + &mut terminator, + InsertPosition::Before, + vec![Operand::Copy(Place::from(1)), layout_operand], + Place::from(ret_var), + ); + } + PointeeLayout::TraitObject => { + let rvalue = Rvalue::Use(Operand::Constant(ConstOperand { + const_: MirConst::from_bool(false), + span, + user_ty: None, + })); + let result = new_body.new_assignment( + rvalue, + &mut terminator, + InsertPosition::Before, + ); + let reason: &str = "Kani does not support reasoning about memory initialization of pointers to trait objects."; + + new_body.add_check( + tcx, + &self.check_type, + &mut terminator, + InsertPosition::Before, + result, + &reason, + ); + } + }; + } + Err(msg) => { + // We failed to retrieve the type layout. + let rvalue = Rvalue::Use(Operand::Constant(ConstOperand { + const_: MirConst::from_bool(false), + span, + user_ty: None, + })); + let result = + new_body.new_assignment(rvalue, &mut terminator, InsertPosition::Before); + let reason = format!( + "Kani currently doesn't support checking memory initialization of `{target_ty}`. {msg}" + ); + new_body.add_check( + tcx, + &self.check_type, + &mut terminator, + InsertPosition::Before, + result, + &reason, + ); } } new_body.into() @@ -135,4 +308,5 @@ impl IntrinsicGeneratorPass { #[strum(serialize_all = "PascalCase")] enum Intrinsics { KaniValidValue, + KaniIsInitialized, } diff --git a/kani-compiler/src/kani_middle/transform/mod.rs b/kani-compiler/src/kani_middle/transform/mod.rs index 8d5c61f55c92..56ab0be493c4 100644 --- a/kani-compiler/src/kani_middle/transform/mod.rs +++ b/kani-compiler/src/kani_middle/transform/mod.rs @@ -18,6 +18,7 @@ //! case is added. use crate::kani_middle::codegen_units::CodegenUnit; use crate::kani_middle::transform::body::CheckType; +use crate::kani_middle::transform::check_uninit::UninitPass; use crate::kani_middle::transform::check_values::ValidValuePass; use crate::kani_middle::transform::contracts::AnyModifiesPass; use crate::kani_middle::transform::kani_intrinsics::IntrinsicGeneratorPass; @@ -30,6 +31,7 @@ use std::collections::HashMap; use std::fmt::Debug; pub(crate) mod body; +mod check_uninit; mod check_values; mod contracts; mod kani_intrinsics; @@ -64,7 +66,23 @@ impl BodyTransformation { // This has to come after stubs since we want this to replace the stubbed body. transformer.add_pass(queries, AnyModifiesPass::new(tcx, &unit)); transformer.add_pass(queries, ValidValuePass { check_type: check_type.clone() }); - transformer.add_pass(queries, IntrinsicGeneratorPass { check_type }); + // Putting `UninitPass` after `ValidValuePass` makes sure that the code generated by + // `UninitPass` does not get unnecessarily instrumented by valid value checks. However, it + // would also make sense to check that the values are initialized before checking their + // validity. In the future, it would be nice to have a mechanism to skip automatically + // generated code for future instrumentation passes. + transformer.add_pass( + queries, + UninitPass { check_type: check_type.clone(), mem_init_fn_cache: HashMap::new() }, + ); + transformer.add_pass( + queries, + IntrinsicGeneratorPass { + check_type, + mem_init_fn_cache: HashMap::new(), + arguments: queries.args().clone(), + }, + ); transformer } @@ -79,7 +97,7 @@ impl BodyTransformation { None => { let mut body = instance.body().unwrap(); let mut modified = false; - for pass in self.stub_passes.iter().chain(self.inst_passes.iter()) { + for pass in self.stub_passes.iter_mut().chain(self.inst_passes.iter_mut()) { let result = pass.transform(tcx, body, instance); modified |= result.0; body = result.1; @@ -127,7 +145,7 @@ pub(crate) trait TransformPass: Debug { Self: Sized; /// Run a transformation pass in the function body. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body); + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body); } /// The transformation result. diff --git a/kani-compiler/src/kani_middle/transform/stubs.rs b/kani-compiler/src/kani_middle/transform/stubs.rs index 4f249afdd45a..3dbd667c3943 100644 --- a/kani-compiler/src/kani_middle/transform/stubs.rs +++ b/kani-compiler/src/kani_middle/transform/stubs.rs @@ -43,7 +43,7 @@ impl TransformPass for FnStubPass { } /// Transform the function body by replacing it with the stub body. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); let ty = instance.ty(); if let TyKind::RigidTy(RigidTy::FnDef(fn_def, mut args)) = ty.kind() { @@ -103,7 +103,7 @@ impl TransformPass for ExternFnStubPass { /// /// We need to find function calls and function pointers. /// We should replace this with a visitor once StableMIR includes a mutable one. - fn transform(&self, _tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, _tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); let mut new_body = MutableBody::from(body); let changed = false; diff --git a/kani-driver/src/call_single_file.rs b/kani-driver/src/call_single_file.rs index 18ca11b0a30f..7cbe150427e9 100644 --- a/kani-driver/src/call_single_file.rs +++ b/kani-driver/src/call_single_file.rs @@ -139,6 +139,10 @@ impl KaniSession { flags.push("--ub-check=ptr_to_ref_cast".into()) } + if self.args.common_args.unstable_features.contains(UnstableFeature::UninitChecks) { + flags.push("--ub-check=uninit".into()) + } + if self.args.ignore_locals_lifetime { flags.push("--ignore-storage-markers".into()) } diff --git a/kani_metadata/src/unstable.rs b/kani_metadata/src/unstable.rs index 9f82e4b02a64..68e4fba28819 100644 --- a/kani_metadata/src/unstable.rs +++ b/kani_metadata/src/unstable.rs @@ -91,6 +91,8 @@ pub enum UnstableFeature { GhostState, /// Automatically check that pointers are valid when casting them to references. PtrToRefCastChecks, + /// Automatically check that uninitialized memory is not used. + UninitChecks, /// Enable an unstable option or subcommand. UnstableOptions, } diff --git a/library/kani/src/lib.rs b/library/kani/src/lib.rs index 0a52a9516398..6eab2a331811 100644 --- a/library/kani/src/lib.rs +++ b/library/kani/src/lib.rs @@ -35,6 +35,7 @@ pub mod vec; #[doc(hidden)] pub mod internal; +mod mem_init; mod models; pub use arbitrary::Arbitrary; diff --git a/library/kani/src/mem.rs b/library/kani/src/mem.rs index c40a1aa696e3..0b390e74288d 100644 --- a/library/kani/src/mem.rs +++ b/library/kani/src/mem.rs @@ -120,6 +120,7 @@ where let (thin_ptr, metadata) = ptr.to_raw_parts(); metadata.is_ptr_aligned(thin_ptr, Internal) && is_inbounds(&metadata, thin_ptr) + && is_initialized(ptr, 1) && unsafe { has_valid_value(ptr) } } @@ -147,7 +148,7 @@ where ::Metadata: PtrProperties, { let (thin_ptr, metadata) = ptr.to_raw_parts(); - is_inbounds(&metadata, thin_ptr) && unsafe { has_valid_value(ptr) } + is_inbounds(&metadata, thin_ptr) && is_initialized(ptr, 1) && unsafe { has_valid_value(ptr) } } /// Checks that `data_ptr` points to an allocation that can hold data of size calculated from `T`. @@ -290,6 +291,13 @@ unsafe fn has_valid_value(_ptr: *const T) -> bool { kani_intrinsic() } +/// Check whether `len * size_of::()` bytes are initialized starting from `ptr`. +#[rustc_diagnostic_item = "KaniIsInitialized"] +#[inline(never)] +pub fn is_initialized(_ptr: *const T, _len: usize) -> bool { + kani_intrinsic() +} + /// Get the object ID of the given pointer. #[rustc_diagnostic_item = "KaniPointerObject"] #[inline(never)] diff --git a/library/kani/src/mem_init.rs b/library/kani/src/mem_init.rs new file mode 100644 index 000000000000..37832ea32604 --- /dev/null +++ b/library/kani/src/mem_init.rs @@ -0,0 +1,122 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! This module uses shadow memory API to track memory initialization of raw pointers. +//! +//! Currently, memory initialization is tracked on per-byte basis, so each byte of memory pointed to +//! by raw pointers could be either initialized or uninitialized. Compiler automatically inserts +//! calls to `is_xxx_initialized` and `set_xxx_initialized` at appropriate locations to get or set +//! the initialization status of the memory pointed to. Padding bytes are always considered +//! uninitialized: type layout is determined at compile time and statically injected into the +//! program (see `Layout`). + +// Definitions in this module are not meant to be visible to the end user, only the compiler. +#![allow(dead_code)] + +use crate::shadow::ShadowMem; + +/// Bytewise mask, representing which bytes of a type are data and which are padding. +/// For example, for a type like this: +/// ``` +/// #[repr(C)] +/// struct Foo { +/// a: u16, +/// b: u8, +/// } +/// ``` +/// the layout would be [true, true, true, false]; +type Layout = [bool; N]; + +/// Global shadow memory object for tracking memory initialization. +#[rustc_diagnostic_item = "KaniMemInitShadowMem"] +static mut MEM_INIT_SHADOW_MEM: ShadowMem = ShadowMem::new(false); + +/// Get initialization state of `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsUnitPtrInitialized"] +fn is_unit_ptr_initialized(ptr: *const (), layout: Layout, len: usize) -> bool { + let mut count: usize = 0; + while count < len { + let mut offset: usize = 0; + while offset < N { + unsafe { + if layout[offset] + && !MEM_INIT_SHADOW_MEM.get((ptr as *const u8).add(count * N + offset)) + { + return false; + } + offset += 1; + } + } + count += 1; + } + true +} + +/// Set initialization state to `value` for `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniSetUnitPtrInitialized"] +fn set_unit_ptr_initialized( + ptr: *const (), + layout: Layout, + len: usize, + value: bool, +) { + let mut count: usize = 0; + while count < len { + let mut offset: usize = 0; + while offset < N { + unsafe { + MEM_INIT_SHADOW_MEM + .set((ptr as *const u8).add(count * N + offset), value && layout[offset]); + } + offset += 1; + } + count += 1; + } +} + +/// Get initialization state of `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsPtrInitialized"] +fn is_ptr_initialized(ptr: *const T, layout: Layout, len: usize) -> bool { + let (ptr, _) = ptr.to_raw_parts(); + is_unit_ptr_initialized(ptr, layout, len) +} + +/// Set initialization state to `value` for `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniSetPtrInitialized"] +fn set_ptr_initialized( + ptr: *const T, + layout: Layout, + len: usize, + value: bool, +) { + let (ptr, _) = ptr.to_raw_parts(); + set_unit_ptr_initialized(ptr, layout, len, value); +} + +/// Get initialization state of the slice, items of which are laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsSlicePtrInitialized"] +fn is_slice_ptr_initialized(ptr: *const [T], layout: Layout) -> bool { + let (ptr, len) = ptr.to_raw_parts(); + is_unit_ptr_initialized(ptr, layout, len) +} + +/// Set initialization state of the slice, items of which are laid out according to the `layout` starting at address `ptr` to `value`. +#[rustc_diagnostic_item = "KaniSetSlicePtrInitialized"] +fn set_slice_ptr_initialized(ptr: *const [T], layout: Layout, value: bool) { + let (ptr, len) = ptr.to_raw_parts(); + set_unit_ptr_initialized(ptr, layout, len, value); +} + +/// Get initialization state of the string slice, items of which are laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsStrPtrInitialized"] +fn is_str_ptr_initialized(ptr: *const str, layout: Layout) -> bool { + let (ptr, len) = ptr.to_raw_parts(); + is_unit_ptr_initialized(ptr, layout, len) +} + +/// Set initialization state of the string slice, items of which are laid out according to the `layout` starting at address `ptr` to `value`. +#[rustc_diagnostic_item = "KaniSetStrPtrInitialized"] +fn set_str_ptr_initialized(ptr: *const str, layout: Layout, value: bool) { + let (ptr, len) = ptr.to_raw_parts(); + set_unit_ptr_initialized(ptr, layout, len, value); +} diff --git a/library/kani_core/src/mem.rs b/library/kani_core/src/mem.rs index 8dd4a27cb03a..3b10856765a5 100644 --- a/library/kani_core/src/mem.rs +++ b/library/kani_core/src/mem.rs @@ -125,6 +125,7 @@ macro_rules! kani_mem { let (thin_ptr, metadata) = ptr.to_raw_parts(); metadata.is_ptr_aligned(thin_ptr, Internal) && is_inbounds(&metadata, thin_ptr) + && is_initialized(ptr, 1) && unsafe { has_valid_value(ptr) } } @@ -153,7 +154,9 @@ macro_rules! kani_mem { ::Metadata: PtrProperties, { let (thin_ptr, metadata) = ptr.to_raw_parts(); - is_inbounds(&metadata, thin_ptr) && unsafe { has_valid_value(ptr) } + is_inbounds(&metadata, thin_ptr) + && is_initialized(ptr, 1) + && unsafe { has_valid_value(ptr) } } /// Checks that `data_ptr` points to an allocation that can hold data of size calculated from `T`. @@ -296,6 +299,13 @@ macro_rules! kani_mem { kani_intrinsic() } + /// Check whether `len * size_of::()` bytes are initialized starting from `ptr`. + #[rustc_diagnostic_item = "KaniIsInitialized"] + #[inline(never)] + pub fn is_initialized(_ptr: *const T, _len: usize) -> bool { + kani_intrinsic() + } + /// Get the object ID of the given pointer. #[rustc_diagnostic_item = "KaniPointerObject"] #[inline(never)] diff --git a/tests/expected/uninit/access-padding-uninit/access-padding-uninit.rs b/tests/expected/uninit/access-padding-uninit/access-padding-uninit.rs new file mode 100644 index 000000000000..d6e735e219ad --- /dev/null +++ b/tests/expected/uninit/access-padding-uninit/access-padding-uninit.rs @@ -0,0 +1,16 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr::addr_of; + +#[repr(C)] +struct S(u32, u8); + +/// Checks that Kani catches an attempt to access padding of a struct using raw pointers. +#[kani::proof] +fn check_uninit_padding() { + let s = S(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let padding = unsafe { *(ptr.add(5)) }; // ~ERROR: padding bytes are uninitialized, so reading them is UB. +} diff --git a/tests/expected/uninit/access-padding-uninit/expected b/tests/expected/uninit/access-padding-uninit/expected new file mode 100644 index 000000000000..da8d15b2dbb9 --- /dev/null +++ b/tests/expected/uninit/access-padding-uninit/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/access-padding-via-cast/access-padding-via-cast.rs b/tests/expected/uninit/access-padding-via-cast/access-padding-via-cast.rs new file mode 100644 index 000000000000..b73bebc827bc --- /dev/null +++ b/tests/expected/uninit/access-padding-via-cast/access-padding-via-cast.rs @@ -0,0 +1,20 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +#[repr(C)] +struct S(u8, u16); + +/// Checks that Kani catches an attempt to access padding of a struct using casting to different types. +#[kani::proof] +fn check_uninit_padding_after_cast() { + unsafe { + let mut s = S(0, 0); + let sptr = ptr::addr_of_mut!(s); + let sptr2 = sptr as *mut [u8; 4]; + *sptr2 = [0; 4]; + *sptr = S(0, 0); // should reset the padding + let val = *sptr2; // ~ERROR: encountered uninitialized memory + } +} diff --git a/tests/expected/uninit/access-padding-via-cast/expected b/tests/expected/uninit/access-padding-via-cast/expected new file mode 100644 index 000000000000..12c5c0a4a439 --- /dev/null +++ b/tests/expected/uninit/access-padding-via-cast/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut [u8; 4]` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/alloc-to-slice/alloc-to-slice.rs b/tests/expected/uninit/alloc-to-slice/alloc-to-slice.rs new file mode 100644 index 000000000000..3c4420f5791f --- /dev/null +++ b/tests/expected/uninit/alloc-to-slice/alloc-to-slice.rs @@ -0,0 +1,20 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::alloc::{alloc, dealloc, Layout}; +use std::slice::from_raw_parts; + +/// Checks that Kani catches an attempt to form a slice from uninitialized memory. +#[kani::proof] +fn check_uninit_slice() { + let layout = Layout::from_size_align(16, 8).unwrap(); + unsafe { + let ptr = alloc(layout); + *ptr = 0x41; + *ptr.add(1) = 0x42; + *ptr.add(2) = 0x43; + *ptr.add(3) = 0x44; + let uninit_slice = from_raw_parts(ptr, 16); // ~ERROR: forming a slice from unitialized memory is UB. + } +} diff --git a/tests/expected/uninit/alloc-to-slice/expected b/tests/expected/uninit/alloc-to-slice/expected new file mode 100644 index 000000000000..f8347a591edf --- /dev/null +++ b/tests/expected/uninit/alloc-to-slice/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const [u8]` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-bad-len/expected b/tests/expected/uninit/vec-read-bad-len/expected new file mode 100644 index 000000000000..f8347a591edf --- /dev/null +++ b/tests/expected/uninit/vec-read-bad-len/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const [u8]` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-bad-len/vec-read-bad-len.rs b/tests/expected/uninit/vec-read-bad-len/vec-read-bad-len.rs new file mode 100644 index 000000000000..9778bb11a277 --- /dev/null +++ b/tests/expected/uninit/vec-read-bad-len/vec-read-bad-len.rs @@ -0,0 +1,15 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ops::Index; + +/// Checks that Kani catches an attempt to read uninitialized memory from a vector with bad length. +#[kani::proof] +fn check_vec_read_bad_len() { + let mut v: Vec = Vec::with_capacity(10); + unsafe { + v.set_len(5); // even though length is now 5, vector is still uninitialized + } + let uninit = v.index(0); // ~ERROR: reading from unitialized memory is UB. +} diff --git a/tests/expected/uninit/vec-read-semi-init/expected b/tests/expected/uninit/vec-read-semi-init/expected new file mode 100644 index 000000000000..da8d15b2dbb9 --- /dev/null +++ b/tests/expected/uninit/vec-read-semi-init/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-semi-init/vec-read-semi-init.rs b/tests/expected/uninit/vec-read-semi-init/vec-read-semi-init.rs new file mode 100644 index 000000000000..4330f5f53023 --- /dev/null +++ b/tests/expected/uninit/vec-read-semi-init/vec-read-semi-init.rs @@ -0,0 +1,11 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +/// Checks that Kani catches an attempt to read uninitialized memory from a semi-initialized vector. +#[kani::proof] +fn check_vec_read_semi_init() { + let mut v: Vec = Vec::with_capacity(10); + unsafe { *v.as_mut_ptr().add(4) = 0x42 }; + let uninit = unsafe { *v.as_ptr().add(5) }; // ~ERROR: reading from unitialized memory is UB. +} diff --git a/tests/expected/uninit/vec-read-uninit/expected b/tests/expected/uninit/vec-read-uninit/expected new file mode 100644 index 000000000000..da8d15b2dbb9 --- /dev/null +++ b/tests/expected/uninit/vec-read-uninit/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-uninit/vec-read-uninit.rs b/tests/expected/uninit/vec-read-uninit/vec-read-uninit.rs new file mode 100644 index 000000000000..c322b4d33bb2 --- /dev/null +++ b/tests/expected/uninit/vec-read-uninit/vec-read-uninit.rs @@ -0,0 +1,10 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +/// Checks that Kani catches an attempt to read uninitialized memory from an uninitialized vector. +#[kani::proof] +fn check_vec_read_uninit() { + let v: Vec = Vec::with_capacity(10); + let uninit = unsafe { *v.as_ptr().add(5) }; // ~ERROR: reading from unitialized memory is UB. +} diff --git a/tests/kani/Uninit/access-padding-enum-diverging-variants.rs b/tests/kani/Uninit/access-padding-enum-diverging-variants.rs new file mode 100644 index 000000000000..7feb493a5b3f --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-diverging-variants.rs @@ -0,0 +1,34 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is variable, so Kani cannot check memory initialization statically. +#[repr(C)] +enum E1 { + A(u16, u8), + B(u16), +} + +/// The layout of this enum is variable, but both of the arms have the same padding, so Kani should +/// support that. +#[repr(C)] +enum E2 { + A(u16), + B(u8, u8), +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_unsupported() { + let s = E1::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; +} + +#[kani::proof] +fn access_padding_supported() { + let s = E2::A(0); + let ptr: *const u8 = addr_of!(s) as *const u8; +} diff --git a/tests/kani/Uninit/access-padding-enum-multiple-variants.rs b/tests/kani/Uninit/access-padding-enum-multiple-variants.rs new file mode 100644 index 000000000000..fb8fae06ca59 --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-multiple-variants.rs @@ -0,0 +1,49 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is the following (D = data, P = padding): +/// 0 1 2 3 4 5 6 7 +/// [D, D, D, D, D, D, D, P] +/// ---------- ------- +/// \_ tag (i32) \_ A|B(u16, u8) +#[repr(C)] +enum E { + A(u16, u8), + B(u16, u8), +} + +#[kani::proof] +fn access_padding_init_a() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_4 = unsafe { *(ptr.add(4)) }; +} + +#[kani::proof] +fn access_padding_init_b() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_4 = unsafe { *(ptr.add(4)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit_a() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_7 = unsafe { *(ptr.add(7)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit_b() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_7 = unsafe { *(ptr.add(7)) }; +} diff --git a/tests/kani/Uninit/access-padding-enum-single-field.rs b/tests/kani/Uninit/access-padding-enum-single-field.rs new file mode 100644 index 000000000000..c283b603f705 --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-single-field.rs @@ -0,0 +1,34 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is the following (D = data, P = padding): +/// 0 1 2 3 4 5 6 7 8 9 A B C D E F +/// [D, D, D, D, P, P, P, P, D, D, D, D, D, D, D, D] +/// ---------- ---------------------- +/// \_ tag (i32) \_ A(u64) +#[repr(C)] +enum E { + A(u64), +} + +#[kani::proof] +fn access_padding_init() { + let s = E::A(0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_3 = unsafe { *(ptr.add(3)) }; + let at_9 = unsafe { *(ptr.add(9)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit() { + let s = E::A(0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_4 = unsafe { *(ptr.add(4)) }; + let at_7 = unsafe { *(ptr.add(7)) }; +} diff --git a/tests/kani/Uninit/access-padding-enum-single-variant.rs b/tests/kani/Uninit/access-padding-enum-single-variant.rs new file mode 100644 index 000000000000..f33cfe7ce6fb --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-single-variant.rs @@ -0,0 +1,32 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is the following (D = data, P = padding): +/// 0 1 2 3 4 5 6 7 +/// [D, D, D, D, D, D, D, P] +/// ---------- ------- +/// \_ tag (i32) \_ A(u16, u8) +#[repr(C)] +enum E { + A(u16, u8), +} + +#[kani::proof] +fn access_padding_init() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_4 = unsafe { *(ptr.add(4)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_7 = unsafe { *(ptr.add(7)) }; +} diff --git a/tests/perf/uninit/Cargo.toml b/tests/perf/uninit/Cargo.toml new file mode 100644 index 000000000000..9f44cb3fe103 --- /dev/null +++ b/tests/perf/uninit/Cargo.toml @@ -0,0 +1,14 @@ +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT + +[package] +name = "uninit" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[package.metadata.kani] +unstable = { ghost-state = true, uninit-checks = true } diff --git a/tests/perf/uninit/expected b/tests/perf/uninit/expected new file mode 100644 index 000000000000..f7b4fd303a77 --- /dev/null +++ b/tests/perf/uninit/expected @@ -0,0 +1 @@ +Complete - 5 successfully verified harnesses, 0 failures, 5 total. \ No newline at end of file diff --git a/tests/perf/uninit/src/lib.rs b/tests/perf/uninit/src/lib.rs new file mode 100644 index 000000000000..86b101c0e5d8 --- /dev/null +++ b/tests/perf/uninit/src/lib.rs @@ -0,0 +1,68 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::alloc::{alloc, alloc_zeroed, Layout}; +use std::ptr; +use std::ptr::addr_of; +use std::slice::from_raw_parts; + +#[repr(C)] +struct S(u32, u8); + +#[kani::proof] +fn access_padding_init() { + let s = S(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let data = unsafe { *(ptr.add(3)) }; // Accessing data bytes is valid. +} + +#[kani::proof] +fn alloc_to_slice() { + let layout = Layout::from_size_align(32, 8).unwrap(); + unsafe { + let ptr = alloc(layout); + *ptr = 0x41; + *ptr.add(1) = 0x42; + *ptr.add(2) = 0x43; + *ptr.add(3) = 0x44; + *ptr.add(16) = 0x00; + let val = *(ptr.add(2)); // Accessing previously initialized byte is valid. + } +} + +#[kani::proof] +fn alloc_zeroed_to_slice() { + let layout = Layout::from_size_align(32, 8).unwrap(); + unsafe { + // This returns initialized memory, so any further accesses are valid. + let ptr = alloc_zeroed(layout); + *ptr = 0x41; + *ptr.add(1) = 0x42; + *ptr.add(2) = 0x43; + *ptr.add(3) = 0x44; + *ptr.add(16) = 0x00; + let slice1 = from_raw_parts(ptr, 16); + let slice2 = from_raw_parts(ptr.add(16), 16); + } +} + +#[kani::proof] +fn struct_padding_and_arr_init() { + unsafe { + let mut s = S(0, 0); + let sptr = ptr::addr_of_mut!(s); + let sptr2 = sptr as *mut [u8; 4]; + *sptr2 = [0; 4]; + *sptr = S(0, 0); + // Both S(u32, u8) and [u8; 4] have the same layout, so the memory is initialized. + let val = *sptr2; + } +} + +#[kani::proof] +fn vec_read_init() { + let mut v: Vec = Vec::with_capacity(10); + unsafe { *v.as_mut_ptr().add(5) = 0x42 }; + let def = unsafe { *v.as_ptr().add(5) }; // Accessing previously initialized byte is valid. + let x = def + 1; +} diff --git a/tests/std-checks/core/mem.expected b/tests/std-checks/core/mem.expected index 8a3b89a8f66a..1484c83901fc 100644 --- a/tests/std-checks/core/mem.expected +++ b/tests/std-checks/core/mem.expected @@ -1,3 +1,7 @@ +Checking harness mem::verify::check_swap_unit... + +Failed Checks: ptr NULL or writable up to size + Summary: Verification failed for - mem::verify::check_swap_unit -Complete - 3 successfully verified harnesses, 1 failures, 4 total. +Complete - 6 successfully verified harnesses, 1 failures, 7 total. diff --git a/tests/std-checks/core/slice.expected b/tests/std-checks/core/slice.expected new file mode 100644 index 000000000000..01a90d50b557 --- /dev/null +++ b/tests/std-checks/core/slice.expected @@ -0,0 +1 @@ +Complete - 1 successfully verified harnesses, 0 failures, 1 total. diff --git a/tests/std-checks/core/src/lib.rs b/tests/std-checks/core/src/lib.rs index f0e5b480a2d2..b0a4fbc6154f 100644 --- a/tests/std-checks/core/src/lib.rs +++ b/tests/std-checks/core/src/lib.rs @@ -7,3 +7,4 @@ extern crate kani; pub mod mem; pub mod ptr; +pub mod slice; diff --git a/tests/std-checks/core/src/mem.rs b/tests/std-checks/core/src/mem.rs index 4f41d176a73a..b0400d0a75f5 100644 --- a/tests/std-checks/core/src/mem.rs +++ b/tests/std-checks/core/src/mem.rs @@ -18,6 +18,11 @@ pub mod contracts { pub fn swap(x: &mut T, y: &mut T) { std::mem::swap(x, y) } + + #[kani::modifies(dest)] + pub fn replace(dest: &mut T, src: T) -> T { + std::mem::replace(dest, src) + } } #[cfg(kani)] @@ -70,4 +75,34 @@ mod verify { std::mem::forget(x); std::mem::forget(y); } + + #[kani::proof_for_contract(contracts::replace)] + pub fn check_replace_primitive() { + let mut x: u8 = kani::any(); + let x_before = x; + + let y: u8 = kani::any(); + let x_returned = contracts::replace(&mut x, y); + + kani::assert(x_before == x_returned, "x_before == x_returned"); + } + + #[kani::proof_for_contract(contracts::replace)] + pub fn check_replace_adt_no_drop() { + let mut x: CannotDrop = kani::any(); + let y: CannotDrop = kani::any(); + let new_x = contracts::replace(&mut x, y); + std::mem::forget(x); + std::mem::forget(new_x); + } + + /// Memory replace logic is optimized according to the size and alignment of a type. + #[kani::proof_for_contract(contracts::replace)] + pub fn check_replace_large_adt_no_drop() { + let mut x: CannotDrop<[u128; 4]> = kani::any(); + let y: CannotDrop<[u128; 4]> = kani::any(); + let new_x = contracts::replace(&mut x, y); + std::mem::forget(x); + std::mem::forget(new_x); + } } diff --git a/tests/std-checks/core/src/slice.rs b/tests/std-checks/core/src/slice.rs new file mode 100644 index 000000000000..044f4bd38586 --- /dev/null +++ b/tests/std-checks/core/src/slice.rs @@ -0,0 +1,30 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +/// Create wrapper functions to standard library functions that contains their contract. +pub mod contracts { + use kani::{mem::*, requires}; + + #[requires(can_dereference(std::ptr::slice_from_raw_parts(data, len)))] + #[requires(is_initialized(data, len))] + pub unsafe fn from_raw_parts<'a, T>(data: *const T, len: usize) -> &'a [T] { + std::slice::from_raw_parts(data, len) + } +} + +#[cfg(kani)] +mod verify { + use super::*; + + const MAX_LEN: usize = 2; + + #[kani::proof_for_contract(contracts::from_raw_parts)] + #[kani::unwind(25)] + pub fn check_from_raw_parts_primitive() { + let len: usize = kani::any(); + kani::assume(len < MAX_LEN); + + let arr = vec![0u8; len]; + let _slice = unsafe { contracts::from_raw_parts(arr.as_ptr(), len) }; + } +}