diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs index fc7f1166af99a..aaefcb1b24c31 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -25,6 +25,31 @@ use super::{ }; use crate::fluent_generated as fluent; +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum MulAddType { + /// Used with `fma` and `simd_fma`, always uses fused-multiply-add + Fused, + /// Used with `fmuladd` and `simd_relaxed_fma`, nondeterministically determines whether to use + /// fma or simple multiply-add + Nondeterministic, +} + +#[derive(Copy, Clone)] +pub(crate) enum MinMax { + /// The IEEE `Minimum` operation - see `f32::minimum` etc + /// In particular, `-0.0` is considered smaller than `+0.0`. + Minimum, + /// The IEEE `MinNum` operation - see `f32::min` etc + /// In particular, if the inputs are `-0.0` and `+0.0`, the result is non-deterministic. + MinNum, + /// The IEEE `Maximum` operation - see `f32::maximum` etc + /// In particular, `-0.0` is considered smaller than `+0.0`. + Maximum, + /// The IEEE `MaxNum` operation - see `f32::max` etc + /// In particular, if the inputs are `-0.0` and `+0.0`, the result is non-deterministic. + MaxNum, +} + /// Directly returns an `Allocation` containing an absolute path representation of the given type. pub(crate) fn alloc_type_name<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> (AllocId, u64) { let path = crate::util::type_name(tcx, ty); @@ -486,25 +511,33 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { self.write_scalar(Scalar::from_target_usize(align.bytes(), self), dest)?; } - sym::minnumf16 => self.float_min_intrinsic::(args, dest)?, - sym::minnumf32 => self.float_min_intrinsic::(args, dest)?, - sym::minnumf64 => self.float_min_intrinsic::(args, dest)?, - sym::minnumf128 => self.float_min_intrinsic::(args, dest)?, + sym::minnumf16 => self.float_minmax_intrinsic::(args, MinMax::MinNum, dest)?, + sym::minnumf32 => self.float_minmax_intrinsic::(args, MinMax::MinNum, dest)?, + sym::minnumf64 => self.float_minmax_intrinsic::(args, MinMax::MinNum, dest)?, + sym::minnumf128 => self.float_minmax_intrinsic::(args, MinMax::MinNum, dest)?, - sym::minimumf16 => self.float_minimum_intrinsic::(args, dest)?, - sym::minimumf32 => self.float_minimum_intrinsic::(args, dest)?, - sym::minimumf64 => self.float_minimum_intrinsic::(args, dest)?, - sym::minimumf128 => self.float_minimum_intrinsic::(args, dest)?, + sym::minimumf16 => self.float_minmax_intrinsic::(args, MinMax::Minimum, dest)?, + sym::minimumf32 => { + self.float_minmax_intrinsic::(args, MinMax::Minimum, dest)? + } + sym::minimumf64 => { + self.float_minmax_intrinsic::(args, MinMax::Minimum, dest)? + } + sym::minimumf128 => self.float_minmax_intrinsic::(args, MinMax::Minimum, dest)?, - sym::maxnumf16 => self.float_max_intrinsic::(args, dest)?, - sym::maxnumf32 => self.float_max_intrinsic::(args, dest)?, - sym::maxnumf64 => self.float_max_intrinsic::(args, dest)?, - sym::maxnumf128 => self.float_max_intrinsic::(args, dest)?, + sym::maxnumf16 => self.float_minmax_intrinsic::(args, MinMax::MaxNum, dest)?, + sym::maxnumf32 => self.float_minmax_intrinsic::(args, MinMax::MaxNum, dest)?, + sym::maxnumf64 => self.float_minmax_intrinsic::(args, MinMax::MaxNum, dest)?, + sym::maxnumf128 => self.float_minmax_intrinsic::(args, MinMax::MaxNum, dest)?, - sym::maximumf16 => self.float_maximum_intrinsic::(args, dest)?, - sym::maximumf32 => self.float_maximum_intrinsic::(args, dest)?, - sym::maximumf64 => self.float_maximum_intrinsic::(args, dest)?, - sym::maximumf128 => self.float_maximum_intrinsic::(args, dest)?, + sym::maximumf16 => self.float_minmax_intrinsic::(args, MinMax::Maximum, dest)?, + sym::maximumf32 => { + self.float_minmax_intrinsic::(args, MinMax::Maximum, dest)? + } + sym::maximumf64 => { + self.float_minmax_intrinsic::(args, MinMax::Maximum, dest)? + } + sym::maximumf128 => self.float_minmax_intrinsic::(args, MinMax::Maximum, dest)?, sym::copysignf16 => self.float_copysign_intrinsic::(args, dest)?, sym::copysignf32 => self.float_copysign_intrinsic::(args, dest)?, @@ -612,14 +645,22 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { dest, rustc_apfloat::Round::NearestTiesToEven, )?, - sym::fmaf16 => self.fma_intrinsic::(args, dest)?, - sym::fmaf32 => self.fma_intrinsic::(args, dest)?, - sym::fmaf64 => self.fma_intrinsic::(args, dest)?, - sym::fmaf128 => self.fma_intrinsic::(args, dest)?, - sym::fmuladdf16 => self.float_muladd_intrinsic::(args, dest)?, - sym::fmuladdf32 => self.float_muladd_intrinsic::(args, dest)?, - sym::fmuladdf64 => self.float_muladd_intrinsic::(args, dest)?, - sym::fmuladdf128 => self.float_muladd_intrinsic::(args, dest)?, + sym::fmaf16 => self.float_muladd_intrinsic::(args, dest, MulAddType::Fused)?, + sym::fmaf32 => self.float_muladd_intrinsic::(args, dest, MulAddType::Fused)?, + sym::fmaf64 => self.float_muladd_intrinsic::(args, dest, MulAddType::Fused)?, + sym::fmaf128 => self.float_muladd_intrinsic::(args, dest, MulAddType::Fused)?, + sym::fmuladdf16 => { + self.float_muladd_intrinsic::(args, dest, MulAddType::Nondeterministic)? + } + sym::fmuladdf32 => { + self.float_muladd_intrinsic::(args, dest, MulAddType::Nondeterministic)? + } + sym::fmuladdf64 => { + self.float_muladd_intrinsic::(args, dest, MulAddType::Nondeterministic)? + } + sym::fmuladdf128 => { + self.float_muladd_intrinsic::(args, dest, MulAddType::Nondeterministic)? + } // Unsupported intrinsic: skip the return_to_block below. _ => return interp_ok(false), @@ -901,76 +942,45 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { interp_ok(Scalar::from_bool(lhs_bytes == rhs_bytes)) } - fn float_min_intrinsic( - &mut self, - args: &[OpTy<'tcx, M::Provenance>], - dest: &PlaceTy<'tcx, M::Provenance>, - ) -> InterpResult<'tcx, ()> - where - F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, - { - let a: F = self.read_scalar(&args[0])?.to_float()?; - let b: F = self.read_scalar(&args[1])?.to_float()?; - let res = if a == b { - // They are definitely not NaN (those are never equal), but they could be `+0` and `-0`. - // Let the machine decide which one to return. - M::equal_float_min_max(self, a, b) - } else { - self.adjust_nan(a.min(b), &[a, b]) - }; - self.write_scalar(res, dest)?; - interp_ok(()) - } - - fn float_max_intrinsic( - &mut self, - args: &[OpTy<'tcx, M::Provenance>], - dest: &PlaceTy<'tcx, M::Provenance>, - ) -> InterpResult<'tcx, ()> + fn float_minmax( + &self, + a: Scalar, + b: Scalar, + op: MinMax, + ) -> InterpResult<'tcx, Scalar> where F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, { - let a: F = self.read_scalar(&args[0])?.to_float()?; - let b: F = self.read_scalar(&args[1])?.to_float()?; - let res = if a == b { + let a: F = a.to_float()?; + let b: F = b.to_float()?; + let res = if matches!(op, MinMax::MinNum | MinMax::MaxNum) && a == b { // They are definitely not NaN (those are never equal), but they could be `+0` and `-0`. // Let the machine decide which one to return. M::equal_float_min_max(self, a, b) } else { - self.adjust_nan(a.max(b), &[a, b]) + let result = match op { + MinMax::Minimum => a.minimum(b), + MinMax::MinNum => a.min(b), + MinMax::Maximum => a.maximum(b), + MinMax::MaxNum => a.max(b), + }; + self.adjust_nan(result, &[a, b]) }; - self.write_scalar(res, dest)?; - interp_ok(()) - } - fn float_minimum_intrinsic( - &mut self, - args: &[OpTy<'tcx, M::Provenance>], - dest: &PlaceTy<'tcx, M::Provenance>, - ) -> InterpResult<'tcx, ()> - where - F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, - { - let a: F = self.read_scalar(&args[0])?.to_float()?; - let b: F = self.read_scalar(&args[1])?.to_float()?; - let res = a.minimum(b); - let res = self.adjust_nan(res, &[a, b]); - self.write_scalar(res, dest)?; - interp_ok(()) + interp_ok(res.into()) } - fn float_maximum_intrinsic( + fn float_minmax_intrinsic( &mut self, args: &[OpTy<'tcx, M::Provenance>], + op: MinMax, dest: &PlaceTy<'tcx, M::Provenance>, ) -> InterpResult<'tcx, ()> where F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, { - let a: F = self.read_scalar(&args[0])?.to_float()?; - let b: F = self.read_scalar(&args[1])?.to_float()?; - let res = a.maximum(b); - let res = self.adjust_nan(res, &[a, b]); + let res = + self.float_minmax::(self.read_scalar(&args[0])?, self.read_scalar(&args[1])?, op)?; self.write_scalar(res, dest)?; interp_ok(()) } @@ -1004,56 +1014,69 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { interp_ok(()) } - fn float_round_intrinsic( + fn float_round( &mut self, - args: &[OpTy<'tcx, M::Provenance>], - dest: &PlaceTy<'tcx, M::Provenance>, + x: Scalar, mode: rustc_apfloat::Round, - ) -> InterpResult<'tcx, ()> + ) -> InterpResult<'tcx, Scalar> where F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, { - let x: F = self.read_scalar(&args[0])?.to_float()?; + let x: F = x.to_float()?; let res = x.round_to_integral(mode).value; let res = self.adjust_nan(res, &[x]); - self.write_scalar(res, dest)?; - interp_ok(()) + interp_ok(res.into()) } - fn fma_intrinsic( + fn float_round_intrinsic( &mut self, args: &[OpTy<'tcx, M::Provenance>], dest: &PlaceTy<'tcx, M::Provenance>, + mode: rustc_apfloat::Round, ) -> InterpResult<'tcx, ()> where F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, { - let a: F = self.read_scalar(&args[0])?.to_float()?; - let b: F = self.read_scalar(&args[1])?.to_float()?; - let c: F = self.read_scalar(&args[2])?.to_float()?; - - let res = a.mul_add(b, c).value; - let res = self.adjust_nan(res, &[a, b, c]); + let res = self.float_round::(self.read_scalar(&args[0])?, mode)?; self.write_scalar(res, dest)?; interp_ok(()) } + fn float_muladd( + &self, + a: Scalar, + b: Scalar, + c: Scalar, + typ: MulAddType, + ) -> InterpResult<'tcx, Scalar> + where + F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, + { + let a: F = a.to_float()?; + let b: F = b.to_float()?; + let c: F = c.to_float()?; + + let fuse = typ == MulAddType::Fused || M::float_fuse_mul_add(self); + + let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value }; + let res = self.adjust_nan(res, &[a, b, c]); + interp_ok(res.into()) + } + fn float_muladd_intrinsic( &mut self, args: &[OpTy<'tcx, M::Provenance>], dest: &PlaceTy<'tcx, M::Provenance>, + typ: MulAddType, ) -> InterpResult<'tcx, ()> where F: rustc_apfloat::Float + rustc_apfloat::FloatConvert + Into>, { - let a: F = self.read_scalar(&args[0])?.to_float()?; - let b: F = self.read_scalar(&args[1])?.to_float()?; - let c: F = self.read_scalar(&args[2])?.to_float()?; - - let fuse = M::float_fuse_mul_add(self); + let a = self.read_scalar(&args[0])?; + let b = self.read_scalar(&args[1])?; + let c = self.read_scalar(&args[2])?; - let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value }; - let res = self.adjust_nan(res, &[a, b, c]); + let res = self.float_muladd::(a, b, c, typ)?; self.write_scalar(res, dest)?; interp_ok(()) } diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs b/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs index 0dba66ae93721..d39005b98731c 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs @@ -1,5 +1,6 @@ use either::Either; use rustc_abi::Endian; +use rustc_apfloat::ieee::{Double, Half, Quad, Single}; use rustc_apfloat::{Float, Round}; use rustc_middle::mir::interpret::{InterpErrorKind, UndefinedBehaviorInfo}; use rustc_middle::ty::FloatTy; @@ -8,17 +9,11 @@ use rustc_span::{Symbol, sym}; use tracing::trace; use super::{ - ImmTy, InterpCx, InterpResult, Machine, OpTy, PlaceTy, Provenance, Scalar, Size, interp_ok, - throw_ub_format, + ImmTy, InterpCx, InterpResult, Machine, MinMax, MulAddType, OpTy, PlaceTy, Provenance, Scalar, + Size, interp_ok, throw_ub_format, }; use crate::interpret::Writeable; -#[derive(Copy, Clone)] -pub(crate) enum MinMax { - Min, - Max, -} - impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// Returns `true` if emulation happened. /// Here we implement the intrinsics that are common to all CTFE instances; individual machines can add their own @@ -125,10 +120,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let op = op.to_scalar(); // "Bitwise" operation, no NaN adjustments match float_ty { - FloatTy::F16 => unimplemented!("f16_f128"), + FloatTy::F16 => Scalar::from_f16(op.to_f16()?.abs()), FloatTy::F32 => Scalar::from_f32(op.to_f32()?.abs()), FloatTy::F64 => Scalar::from_f64(op.to_f64()?.abs()), - FloatTy::F128 => unimplemented!("f16_f128"), + FloatTy::F128 => Scalar::from_f128(op.to_f128()?.abs()), } } Op::Round(rounding) => { @@ -139,21 +134,12 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { intrinsic_name ) }; + let op = op.to_scalar(); match float_ty { - FloatTy::F16 => unimplemented!("f16_f128"), - FloatTy::F32 => { - let f = op.to_scalar().to_f32()?; - let res = f.round_to_integral(rounding).value; - let res = self.adjust_nan(res, &[f]); - Scalar::from_f32(res) - } - FloatTy::F64 => { - let f = op.to_scalar().to_f64()?; - let res = f.round_to_integral(rounding).value; - let res = self.adjust_nan(res, &[f]); - Scalar::from_f64(res) - } - FloatTy::F128 => unimplemented!("f16_f128"), + FloatTy::F16 => self.float_round::(op, rounding)?, + FloatTy::F32 => self.float_round::(op, rounding)?, + FloatTy::F64 => self.float_round::(op, rounding)?, + FloatTy::F128 => self.float_round::(op, rounding)?, } } Op::Numeric(name) => { @@ -216,8 +202,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { sym::simd_le => Op::MirOp(BinOp::Le), sym::simd_gt => Op::MirOp(BinOp::Gt), sym::simd_ge => Op::MirOp(BinOp::Ge), - sym::simd_fmax => Op::FMinMax(MinMax::Max), - sym::simd_fmin => Op::FMinMax(MinMax::Min), + sym::simd_fmax => Op::FMinMax(MinMax::MaxNum), + sym::simd_fmin => Op::FMinMax(MinMax::MinNum), sym::simd_saturating_add => Op::SaturatingOp(BinOp::Add), sym::simd_saturating_sub => Op::SaturatingOp(BinOp::Sub), sym::simd_arith_offset => Op::WrappingOffset, @@ -309,8 +295,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { sym::simd_reduce_xor => Op::MirOp(BinOp::BitXor), sym::simd_reduce_any => Op::MirOpBool(BinOp::BitOr), sym::simd_reduce_all => Op::MirOpBool(BinOp::BitAnd), - sym::simd_reduce_max => Op::MinMax(MinMax::Max), - sym::simd_reduce_min => Op::MinMax(MinMax::Min), + sym::simd_reduce_max => Op::MinMax(MinMax::MaxNum), + sym::simd_reduce_min => Op::MinMax(MinMax::MinNum), _ => unreachable!(), }; @@ -334,8 +320,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { } else { // Just boring integers, so NaNs to worry about let mirop = match mmop { - MinMax::Min => BinOp::Le, - MinMax::Max => BinOp::Ge, + MinMax::MinNum | MinMax::Minimum => BinOp::Le, + MinMax::MaxNum | MinMax::Maximum => BinOp::Ge, }; if self.binary_op(mirop, &res, &op)?.to_scalar().to_bool()? { res @@ -701,6 +687,43 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { }; } } + sym::simd_fma | sym::simd_relaxed_fma => { + // `simd_fma` should always deterministically use `mul_add`, whereas `relaxed_fma` + // is non-deterministic, and can use either `mul_add` or `a * b + c` + let typ = match intrinsic_name { + sym::simd_fma => MulAddType::Fused, + sym::simd_relaxed_fma => MulAddType::Nondeterministic, + _ => unreachable!(), + }; + + let (a, a_len) = self.project_to_simd(&args[0])?; + let (b, b_len) = self.project_to_simd(&args[1])?; + let (c, c_len) = self.project_to_simd(&args[2])?; + let (dest, dest_len) = self.project_to_simd(&dest)?; + + assert_eq!(dest_len, a_len); + assert_eq!(dest_len, b_len); + assert_eq!(dest_len, c_len); + + for i in 0..dest_len { + let a = self.read_scalar(&self.project_index(&a, i)?)?; + let b = self.read_scalar(&self.project_index(&b, i)?)?; + let c = self.read_scalar(&self.project_index(&c, i)?)?; + let dest = self.project_index(&dest, i)?; + + let ty::Float(float_ty) = dest.layout.ty.kind() else { + span_bug!(self.cur_span(), "{} operand is not a float", intrinsic_name) + }; + + let val = match float_ty { + FloatTy::F16 => self.float_muladd::(a, b, c, typ)?, + FloatTy::F32 => self.float_muladd::(a, b, c, typ)?, + FloatTy::F64 => self.float_muladd::(a, b, c, typ)?, + FloatTy::F128 => self.float_muladd::(a, b, c, typ)?, + }; + self.write_scalar(val, &dest)?; + } + } // Unsupported intrinsic: skip the return_to_block below. _ => return interp_ok(false), @@ -711,12 +734,12 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { interp_ok(true) } - fn fminmax_op( + fn fminmax_op( &self, op: MinMax, - left: &ImmTy<'tcx, Prov>, - right: &ImmTy<'tcx, Prov>, - ) -> InterpResult<'tcx, Scalar> { + left: &ImmTy<'tcx, M::Provenance>, + right: &ImmTy<'tcx, M::Provenance>, + ) -> InterpResult<'tcx, Scalar> { assert_eq!(left.layout.ty, right.layout.ty); let ty::Float(float_ty) = left.layout.ty.kind() else { bug!("fmax operand is not a float") @@ -724,28 +747,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let left = left.to_scalar(); let right = right.to_scalar(); interp_ok(match float_ty { - FloatTy::F16 => unimplemented!("f16_f128"), - FloatTy::F32 => { - let left = left.to_f32()?; - let right = right.to_f32()?; - let res = match op { - MinMax::Min => left.min(right), - MinMax::Max => left.max(right), - }; - let res = self.adjust_nan(res, &[left, right]); - Scalar::from_f32(res) - } - FloatTy::F64 => { - let left = left.to_f64()?; - let right = right.to_f64()?; - let res = match op { - MinMax::Min => left.min(right), - MinMax::Max => left.max(right), - }; - let res = self.adjust_nan(res, &[left, right]); - Scalar::from_f64(res) - } - FloatTy::F128 => unimplemented!("f16_f128"), + FloatTy::F16 => self.float_minmax::(left, right, op)?, + FloatTy::F32 => self.float_minmax::(left, right, op)?, + FloatTy::F64 => self.float_minmax::(left, right, op)?, + FloatTy::F128 => self.float_minmax::(left, right, op)?, }) } } diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs index 1725635e0b479..236c35ec7b96a 100644 --- a/compiler/rustc_const_eval/src/interpret/machine.rs +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -290,7 +290,7 @@ pub trait Machine<'tcx>: Sized { } /// Determines whether the `fmuladd` intrinsics fuse the multiply-add or use separate operations. - fn float_fuse_mul_add(_ecx: &mut InterpCx<'tcx, Self>) -> bool; + fn float_fuse_mul_add(_ecx: &InterpCx<'tcx, Self>) -> bool; /// Called before a basic block terminator is executed. #[inline] @@ -676,7 +676,7 @@ pub macro compile_time_machine(<$tcx: lifetime>) { } #[inline(always)] - fn float_fuse_mul_add(_ecx: &mut InterpCx<$tcx, Self>) -> bool { + fn float_fuse_mul_add(_ecx: &InterpCx<$tcx, Self>) -> bool { true } diff --git a/src/tools/miri/src/intrinsics/simd.rs b/src/tools/miri/src/intrinsics/simd.rs index 5f75657e0a220..2246edb9a6df2 100644 --- a/src/tools/miri/src/intrinsics/simd.rs +++ b/src/tools/miri/src/intrinsics/simd.rs @@ -1,5 +1,3 @@ -use rand::Rng; -use rustc_apfloat::Float; use rustc_middle::ty::FloatTy; use rustc_middle::ty; @@ -83,62 +81,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this.write_scalar(val, &dest)?; } } - "fma" | "relaxed_fma" => { - let [a, b, c] = check_intrinsic_arg_count(args)?; - let (a, a_len) = this.project_to_simd(a)?; - let (b, b_len) = this.project_to_simd(b)?; - let (c, c_len) = this.project_to_simd(c)?; - let (dest, dest_len) = this.project_to_simd(dest)?; - - assert_eq!(dest_len, a_len); - assert_eq!(dest_len, b_len); - assert_eq!(dest_len, c_len); - - for i in 0..dest_len { - let a = this.read_scalar(&this.project_index(&a, i)?)?; - let b = this.read_scalar(&this.project_index(&b, i)?)?; - let c = this.read_scalar(&this.project_index(&c, i)?)?; - let dest = this.project_index(&dest, i)?; - - let fuse: bool = intrinsic_name == "fma" - || (this.machine.float_nondet && this.machine.rng.get_mut().random()); - - // Works for f32 and f64. - // FIXME: using host floats to work around https://github.com/rust-lang/miri/issues/2468. - let ty::Float(float_ty) = dest.layout.ty.kind() else { - span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name) - }; - let val = match float_ty { - FloatTy::F16 => unimplemented!("f16_f128"), - FloatTy::F32 => { - let a = a.to_f32()?; - let b = b.to_f32()?; - let c = c.to_f32()?; - let res = if fuse { - a.mul_add(b, c).value - } else { - ((a * b).value + c).value - }; - let res = this.adjust_nan(res, &[a, b, c]); - Scalar::from(res) - } - FloatTy::F64 => { - let a = a.to_f64()?; - let b = b.to_f64()?; - let c = c.to_f64()?; - let res = if fuse { - a.mul_add(b, c).value - } else { - ((a * b).value + c).value - }; - let res = this.adjust_nan(res, &[a, b, c]); - Scalar::from(res) - } - FloatTy::F128 => unimplemented!("f16_f128"), - }; - this.write_scalar(val, &dest)?; - } - } "expose_provenance" => { let [op] = check_intrinsic_arg_count(args)?; let (op, op_len) = this.project_to_simd(op)?; diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 412640a112c09..9e0b6f15acba5 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1294,8 +1294,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { } #[inline(always)] - fn float_fuse_mul_add(ecx: &mut InterpCx<'tcx, Self>) -> bool { - ecx.machine.float_nondet && ecx.machine.rng.get_mut().random() + fn float_fuse_mul_add(ecx: &InterpCx<'tcx, Self>) -> bool { + ecx.machine.float_nondet && ecx.machine.rng.borrow_mut().random() } #[inline(always)] diff --git a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs index e2cd08733af1c..e5e84f0c5aeb3 100644 --- a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs +++ b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs @@ -6,18 +6,143 @@ rustc_attrs, intrinsics, core_intrinsics, - repr_simd + repr_simd, + f16, + f128 )] -#![allow(incomplete_features, internal_features)] +#![allow(incomplete_features, internal_features, non_camel_case_types)] +use std::fmt::{self, Debug, Formatter}; use std::intrinsics::simd as intrinsics; use std::ptr; use std::simd::StdFloat; use std::simd::prelude::*; +#[repr(simd, packed)] +#[derive(Copy)] +struct PackedSimd([T; N]); + +impl Clone for PackedSimd { + fn clone(&self) -> Self { + *self + } +} + +impl PartialEq for PackedSimd { + fn eq(&self, other: &Self) -> bool { + self.into_array() == other.into_array() + } +} + +impl Debug for PackedSimd { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Debug::fmt(&self.into_array(), f) + } +} + +type f16x2 = PackedSimd; +type f16x4 = PackedSimd; + +type f128x2 = PackedSimd; +type f128x4 = PackedSimd; + +impl PackedSimd { + fn splat(x: T) -> Self { + Self([x; N]) + } + fn from_array(a: [T; N]) -> Self { + Self(a) + } + fn into_array(self) -> [T; N] { + // as we have `repr(packed)`, there shouldn't be any padding bytes + unsafe { std::mem::transmute_copy(&self) } + } +} + #[rustc_intrinsic] #[rustc_nounwind] pub unsafe fn simd_shuffle_const_generic(x: T, y: T) -> U; +pub fn simd_ops_f16() { + use intrinsics::*; + + // small hack to make type inference better + macro_rules! assert_eq { + ($a:expr, $b:expr $(,$t:tt)*) => { + ::std::assert_eq!($b, $a $(,$t)*) + } + } + + let a = f16x4::splat(10.0); + let b = f16x4::from_array([1.0, 2.0, 3.0, -4.0]); + + unsafe { + assert_eq!(simd_neg(b), f16x4::from_array([-1.0, -2.0, -3.0, 4.0])); + assert_eq!(simd_add(a, b), f16x4::from_array([11.0, 12.0, 13.0, 6.0])); + assert_eq!(simd_sub(a, b), f16x4::from_array([9.0, 8.0, 7.0, 14.0])); + assert_eq!(simd_mul(a, b), f16x4::from_array([10.0, 20.0, 30.0, -40.0])); + assert_eq!(simd_div(b, a), f16x4::from_array([0.1, 0.2, 0.3, -0.4])); + assert_eq!(simd_div(a, f16x4::splat(2.0)), f16x4::splat(5.0)); + assert_eq!(simd_rem(a, b), f16x4::from_array([0.0, 0.0, 1.0, 2.0])); + assert_eq!(simd_fabs(b), f16x4::from_array([1.0, 2.0, 3.0, 4.0])); + assert_eq!( + simd_fmax(a, simd_mul(b, f16x4::splat(4.0))), + f16x4::from_array([10.0, 10.0, 12.0, 10.0]) + ); + assert_eq!( + simd_fmin(a, simd_mul(b, f16x4::splat(4.0))), + f16x4::from_array([4.0, 8.0, 10.0, -16.0]) + ); + + assert_eq!(simd_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_fma(f16x4::splat(-3.2), b, f16x4::splat(f16::NEG_INFINITY)), + f16x4::splat(f16::NEG_INFINITY) + ); + + assert_eq!(simd_relaxed_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_relaxed_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_relaxed_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_relaxed_fma(f16x4::splat(-3.2), b, f16x4::splat(f16::NEG_INFINITY)), + f16x4::splat(f16::NEG_INFINITY) + ); + + assert_eq!(simd_fsqrt(simd_mul(a, a)), a); + assert_eq!(simd_fsqrt(simd_mul(b, b)), simd_fabs(b)); + + assert_eq!(simd_eq(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, !0, 0, 0])); + assert_eq!(simd_ne(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, 0, !0, !0])); + assert_eq!(simd_le(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, !0, !0, 0])); + assert_eq!(simd_lt(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([0, 0, !0, 0])); + assert_eq!(simd_ge(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, !0, 0, !0])); + assert_eq!(simd_gt(a, simd_mul(f16x4::splat(5.0), b)), i32x4::from_array([!0, 0, 0, !0])); + + assert_eq!(simd_reduce_add_ordered(a, 0.0), 40.0f16); + assert_eq!(simd_reduce_add_ordered(b, 0.0), 2.0f16); + assert_eq!(simd_reduce_mul_ordered(a, 1.0), 10000.0f16); + assert_eq!(simd_reduce_mul_ordered(b, 1.0), -24.0f16); + assert_eq!(simd_reduce_max(a), 10.0f16); + assert_eq!(simd_reduce_max(b), 3.0f16); + assert_eq!(simd_reduce_min(a), 10.0f16); + assert_eq!(simd_reduce_min(b), -4.0f16); + + assert_eq!( + simd_fmax(f16x2::from_array([0.0, f16::NAN]), f16x2::from_array([f16::NAN, 0.0])), + f16x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_max(f16x2::from_array([0.0, f16::NAN])), 0.0f16); + assert_eq!(simd_reduce_max(f16x2::from_array([f16::NAN, 0.0])), 0.0f16); + assert_eq!( + simd_fmin(f16x2::from_array([0.0, f16::NAN]), f16x2::from_array([f16::NAN, 0.0])), + f16x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_min(f16x2::from_array([0.0, f16::NAN])), 0.0f16); + assert_eq!(simd_reduce_min(f16x2::from_array([f16::NAN, 0.0])), 0.0f16); + } +} + fn simd_ops_f32() { let a = f32x4::splat(10.0); let b = f32x4::from_array([1.0, 2.0, 3.0, -4.0]); @@ -148,6 +273,87 @@ fn simd_ops_f64() { assert_eq!(f64x2::from_array([f64::NAN, 0.0]).reduce_min(), 0.0); } +pub fn simd_ops_f128() { + use intrinsics::*; + + // small hack to make type inference better + macro_rules! assert_eq { + ($a:expr, $b:expr $(,$t:tt)*) => { + ::std::assert_eq!($b, $a $(,$t)*) + } + } + + let a = f128x4::splat(10.0); + let b = f128x4::from_array([1.0, 2.0, 3.0, -4.0]); + + unsafe { + assert_eq!(simd_neg(b), f128x4::from_array([-1.0, -2.0, -3.0, 4.0])); + assert_eq!(simd_add(a, b), f128x4::from_array([11.0, 12.0, 13.0, 6.0])); + assert_eq!(simd_sub(a, b), f128x4::from_array([9.0, 8.0, 7.0, 14.0])); + assert_eq!(simd_mul(a, b), f128x4::from_array([10.0, 20.0, 30.0, -40.0])); + assert_eq!(simd_div(b, a), f128x4::from_array([0.1, 0.2, 0.3, -0.4])); + assert_eq!(simd_div(a, f128x4::splat(2.0)), f128x4::splat(5.0)); + assert_eq!(simd_rem(a, b), f128x4::from_array([0.0, 0.0, 1.0, 2.0])); + assert_eq!(simd_fabs(b), f128x4::from_array([1.0, 2.0, 3.0, 4.0])); + assert_eq!( + simd_fmax(a, simd_mul(b, f128x4::splat(4.0))), + f128x4::from_array([10.0, 10.0, 12.0, 10.0]) + ); + assert_eq!( + simd_fmin(a, simd_mul(b, f128x4::splat(4.0))), + f128x4::from_array([4.0, 8.0, 10.0, -16.0]) + ); + + assert_eq!(simd_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_fma(f128x4::splat(-3.2), b, f128x4::splat(f128::NEG_INFINITY)), + f128x4::splat(f128::NEG_INFINITY) + ); + + assert_eq!(simd_relaxed_fma(a, b, a), simd_add(simd_mul(a, b), a)); + assert_eq!(simd_relaxed_fma(b, b, a), simd_add(simd_mul(b, b), a)); + assert_eq!(simd_relaxed_fma(a, b, b), simd_add(simd_mul(a, b), b)); + assert_eq!( + simd_relaxed_fma(f128x4::splat(-3.2), b, f128x4::splat(f128::NEG_INFINITY)), + f128x4::splat(f128::NEG_INFINITY) + ); + + assert_eq!(simd_fsqrt(simd_mul(a, a)), a); + assert_eq!(simd_fsqrt(simd_mul(b, b)), simd_fabs(b)); + + assert_eq!(simd_eq(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, !0, 0, 0])); + assert_eq!(simd_ne(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, 0, !0, !0])); + assert_eq!(simd_le(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, !0, !0, 0])); + assert_eq!(simd_lt(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([0, 0, !0, 0])); + assert_eq!(simd_ge(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, !0, 0, !0])); + assert_eq!(simd_gt(a, simd_mul(f128x4::splat(5.0), b)), i32x4::from_array([!0, 0, 0, !0])); + + assert_eq!(simd_reduce_add_ordered(a, 0.0), 40.0f128); + assert_eq!(simd_reduce_add_ordered(b, 0.0), 2.0f128); + assert_eq!(simd_reduce_mul_ordered(a, 1.0), 10000.0f128); + assert_eq!(simd_reduce_mul_ordered(b, 1.0), -24.0f128); + assert_eq!(simd_reduce_max(a), 10.0f128); + assert_eq!(simd_reduce_max(b), 3.0f128); + assert_eq!(simd_reduce_min(a), 10.0f128); + assert_eq!(simd_reduce_min(b), -4.0f128); + + assert_eq!( + simd_fmax(f128x2::from_array([0.0, f128::NAN]), f128x2::from_array([f128::NAN, 0.0])), + f128x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_max(f128x2::from_array([0.0, f128::NAN])), 0.0f128); + assert_eq!(simd_reduce_max(f128x2::from_array([f128::NAN, 0.0])), 0.0f128); + assert_eq!( + simd_fmin(f128x2::from_array([0.0, f128::NAN]), f128x2::from_array([f128::NAN, 0.0])), + f128x2::from_array([0.0, 0.0]) + ); + assert_eq!(simd_reduce_min(f128x2::from_array([0.0, f128::NAN])), 0.0f128); + assert_eq!(simd_reduce_min(f128x2::from_array([f128::NAN, 0.0])), 0.0f128); + } +} + fn simd_ops_i32() { let a = i32x4::splat(10); let b = i32x4::from_array([1, 2, 3, -4]); @@ -563,6 +769,31 @@ fn simd_gather_scatter() { } fn simd_round() { + unsafe { + use intrinsics::*; + + assert_eq!( + simd_ceil(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([1.0, 2.0, 2.0, -4.0]) + ); + assert_eq!( + simd_floor(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([0.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([1.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round_ties_even(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([1.0, 1.0, 2.0, -4.0]) + ); + assert_eq!( + simd_trunc(f16x4::from_array([0.9, 1.001, 2.0, -4.5])), + f16x4::from_array([0.0, 1.0, 2.0, -4.0]) + ); + } + assert_eq!( f32x4::from_array([0.9, 1.001, 2.0, -4.5]).ceil(), f32x4::from_array([1.0, 2.0, 2.0, -4.0]) @@ -604,6 +835,31 @@ fn simd_round() { f64x4::from_array([0.9, 1.001, 2.0, -4.5]).trunc(), f64x4::from_array([0.0, 1.0, 2.0, -4.0]) ); + + unsafe { + use intrinsics::*; + + assert_eq!( + simd_ceil(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([1.0, 2.0, 2.0, -4.0]) + ); + assert_eq!( + simd_floor(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([0.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([1.0, 1.0, 2.0, -5.0]) + ); + assert_eq!( + simd_round_ties_even(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([1.0, 1.0, 2.0, -4.0]) + ); + assert_eq!( + simd_trunc(f128x4::from_array([0.9, 1.001, 2.0, -4.5])), + f128x4::from_array([0.0, 1.0, 2.0, -4.0]) + ); + } } fn simd_intrinsics() {