From 89f9da2056d88ab96faf95901687cb772d2dc10c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 10 Oct 2023 18:29:07 +0200 Subject: [PATCH 1/4] Bump stdarch submodule --- library/stdarch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/stdarch b/library/stdarch index 333e9e9977188..f4528dd6e85d9 160000 --- a/library/stdarch +++ b/library/stdarch @@ -1 +1 @@ -Subproject commit 333e9e9977188d0748327e9b5be0f3f412063174 +Subproject commit f4528dd6e85d97bb802240d7cd048b6e1bf72540 From b57a1570920623d57c9e47b668896fe288d4f6e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 10 Oct 2023 18:35:28 +0200 Subject: [PATCH 2/4] Remove from cranelift codegen LLVM intrinsics that are no longer needed --- .../src/intrinsics/llvm_x86.rs | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs index 0c9a94e1c231f..559c64bb13bdd 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs @@ -32,41 +32,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.write_cvalue(fx, CValue::by_val(res, fx.layout_of(fx.tcx.types.i64))); } - // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8` - "llvm.x86.sse2.pmovmskb.128" - | "llvm.x86.avx2.pmovmskb" - | "llvm.x86.sse.movmsk.ps" - | "llvm.x86.sse2.movmsk.pd" => { - intrinsic_args!(fx, args => (a); intrinsic); - - let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); - let lane_ty = fx.clif_type(lane_ty).unwrap(); - assert!(lane_count <= 32); - - let mut res = fx.bcx.ins().iconst(types::I32, 0); - - for lane in (0..lane_count).rev() { - let a_lane = a.value_lane(fx, lane).load_scalar(fx); - - // cast float to int - let a_lane = match lane_ty { - types::F32 => codegen_bitcast(fx, types::I32, a_lane), - types::F64 => codegen_bitcast(fx, types::I64, a_lane), - _ => a_lane, - }; - - // extract sign bit of an int - let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1)); - - // shift sign bit into result - let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false); - res = fx.bcx.ins().ishl_imm(res, 1); - res = fx.bcx.ins().bor(res, a_lane_sign); - } - - let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32)); - ret.write_cvalue(fx, res); - } "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => { let (x, y, kind) = match args { [x, y, kind] => (x, y, kind), From 337af7caf76e14051721943fb67484ad8a6cff35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 10 Oct 2023 18:43:42 +0200 Subject: [PATCH 3/4] Remove from miri LLVM intrinsics that are no longer needed --- src/tools/miri/src/shims/x86/sse.rs | 19 --- src/tools/miri/src/shims/x86/sse2.rs | 176 +-------------------------- src/tools/miri/src/shims/x86/sse3.rs | 26 ---- 3 files changed, 1 insertion(+), 220 deletions(-) diff --git a/src/tools/miri/src/shims/x86/sse.rs b/src/tools/miri/src/shims/x86/sse.rs index 6f0b76059f10d..831228b7a26cc 100644 --- a/src/tools/miri/src/shims/x86/sse.rs +++ b/src/tools/miri/src/shims/x86/sse.rs @@ -209,25 +209,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: )?; } } - // Used to implement the _mm_movemask_ps function. - // Returns a scalar integer where the i-th bit is the highest - // bit of the i-th component of `op`. - // https://www.felixcloutier.com/x86/movmskps - "movmsk.ps" => { - let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - let (op, op_len) = this.operand_to_simd(op)?; - - let mut res = 0; - for i in 0..op_len { - let op = this.read_scalar(&this.project_index(&op, i)?)?; - let op = op.to_u32()?; - - // Extract the highest bit of `op` and place it in the `i`-th bit of `res` - res |= (op >> 31) << i; - } - - this.write_scalar(Scalar::from_u32(res), dest)?; - } _ => return Ok(EmulateForeignItemResult::NotSupported), } Ok(EmulateForeignItemResult::NeedsJumping) diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs index c6a847b5cf824..3f2b9f5f0adfe 100644 --- a/src/tools/miri/src/shims/x86/sse2.rs +++ b/src/tools/miri/src/shims/x86/sse2.rs @@ -1,8 +1,4 @@ -use rustc_apfloat::{ - ieee::{Double, Single}, - Float as _, -}; -use rustc_middle::mir; +use rustc_apfloat::ieee::Double; use rustc_middle::ty::layout::LayoutOf as _; use rustc_middle::ty::Ty; use rustc_span::Symbol; @@ -39,49 +35,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: // Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned // vectors. match unprefixed_name { - // Used to implement the _mm_avg_epu8 and _mm_avg_epu16 functions. - // Averages packed unsigned 8/16-bit integers in `left` and `right`. - "pavg.b" | "pavg.w" => { - let [left, right] = - this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - - let (left, left_len) = this.operand_to_simd(left)?; - let (right, right_len) = this.operand_to_simd(right)?; - let (dest, dest_len) = this.place_to_simd(dest)?; - - assert_eq!(dest_len, left_len); - assert_eq!(dest_len, right_len); - - for i in 0..dest_len { - let left = this.read_immediate(&this.project_index(&left, i)?)?; - let right = this.read_immediate(&this.project_index(&right, i)?)?; - let dest = this.project_index(&dest, i)?; - - // Widen the operands to avoid overflow - let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?; - let left = this.int_to_int_or_float(&left, twice_wide)?; - let right = this.int_to_int_or_float(&right, twice_wide)?; - - // Calculate left + right + 1 - let added = this.wrapping_binary_op(mir::BinOp::Add, &left, &right)?; - let added = this.wrapping_binary_op( - mir::BinOp::Add, - &added, - &ImmTy::from_uint(1u32, twice_wide), - )?; - - // Calculate (left + right + 1) / 2 - let divided = this.wrapping_binary_op( - mir::BinOp::Div, - &added, - &ImmTy::from_uint(2u32, twice_wide), - )?; - - // Narrow back to the original type - let res = this.int_to_int_or_float(÷d, dest.layout)?; - this.write_immediate(*res, &dest)?; - } - } // Used to implement the _mm_madd_epi16 function. // Multiplies packed signed 16-bit integers in `left` and `right`, producing // intermediate signed 32-bit integers. Horizontally add adjacent pairs of @@ -118,70 +71,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this.write_scalar(Scalar::from_i32(res), &dest)?; } } - // Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions. - "pmulh.w" | "pmulhu.w" => { - let [left, right] = - this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - - let (left, left_len) = this.operand_to_simd(left)?; - let (right, right_len) = this.operand_to_simd(right)?; - let (dest, dest_len) = this.place_to_simd(dest)?; - - assert_eq!(dest_len, left_len); - assert_eq!(dest_len, right_len); - - for i in 0..dest_len { - let left = this.read_immediate(&this.project_index(&left, i)?)?; - let right = this.read_immediate(&this.project_index(&right, i)?)?; - let dest = this.project_index(&dest, i)?; - - // Widen the operands to avoid overflow - let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?; - let left = this.int_to_int_or_float(&left, twice_wide)?; - let right = this.int_to_int_or_float(&right, twice_wide)?; - - // Multiply - let multiplied = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?; - // Keep the high half - let high = this.wrapping_binary_op( - mir::BinOp::Shr, - &multiplied, - &ImmTy::from_uint(dest.layout.size.bits(), twice_wide), - )?; - - // Narrow back to the original type - let res = this.int_to_int_or_float(&high, dest.layout)?; - this.write_immediate(*res, &dest)?; - } - } - // Used to implement the _mm_mul_epu32 function. - // Multiplies the the low unsigned 32-bit integers from each packed - // 64-bit element and stores the result as 64-bit unsigned integers. - "pmulu.dq" => { - let [left, right] = - this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - - let (left, left_len) = this.operand_to_simd(left)?; - let (right, right_len) = this.operand_to_simd(right)?; - let (dest, dest_len) = this.place_to_simd(dest)?; - - // left and right are u32x4, dest is u64x2 - assert_eq!(left_len, 4); - assert_eq!(right_len, 4); - assert_eq!(dest_len, 2); - - for i in 0..dest_len { - let op_i = i.checked_mul(2).unwrap(); - let left = this.read_scalar(&this.project_index(&left, op_i)?)?.to_u32()?; - let right = this.read_scalar(&this.project_index(&right, op_i)?)?.to_u32()?; - let dest = this.project_index(&dest, i)?; - - // The multiplication will not overflow because stripping the - // operands are expanded from 32-bit to 64-bit. - let res = u64::from(left).checked_mul(u64::from(right)).unwrap(); - this.write_scalar(Scalar::from_u64(res), &dest)?; - } - } // Used to implement the _mm_sad_epu8 function. // Computes the absolute differences of packed unsigned 8-bit integers in `a` // and `b`, then horizontally sum each consecutive 8 differences to produce @@ -370,25 +259,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this.write_scalar(Scalar::from_u64(res), &dest)?; } } - // Used to implement the _mm_cvtepi32_ps function. - // Converts packed i32 to packed f32. - // FIXME: Can we get rid of this intrinsic and just use simd_as? - "cvtdq2ps" => { - let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - - let (op, op_len) = this.operand_to_simd(op)?; - let (dest, dest_len) = this.place_to_simd(dest)?; - - assert_eq!(dest_len, op_len); - - for i in 0..dest_len { - let op = this.read_scalar(&this.project_index(&op, i)?)?.to_i32()?; - let dest = this.project_index(&dest, i)?; - - let res = Scalar::from_f32(Single::from_i128(op.into()).value); - this.write_scalar(res, &dest)?; - } - } // Used to implement the _mm_cvtps_epi32 and _mm_cvttps_epi32 functions. // Converts packed f32 to packed i32. "cvtps2dq" | "cvttps2dq" => { @@ -652,31 +522,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: }; this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?; } - // Used to implement the _mm_cvtpd_ps and _mm_cvtps_pd functions. - // Converts packed f32/f64 to packed f64/f32. - "cvtpd2ps" | "cvtps2pd" => { - let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - - let (op, op_len) = this.operand_to_simd(op)?; - let (dest, dest_len) = this.place_to_simd(dest)?; - - // For cvtpd2ps: op is f64x2, dest is f32x4 - // For cvtps2pd: op is f32x4, dest is f64x2 - // In either case, the two first values are converted - for i in 0..op_len.min(dest_len) { - let op = this.read_immediate(&this.project_index(&op, i)?)?; - let dest = this.project_index(&dest, i)?; - - let res = this.float_to_float_or_int(&op, dest.layout)?; - this.write_immediate(*res, &dest)?; - } - // For f32 -> f64, ignore the remaining - // For f64 -> f32, fill the remaining with zeros - for i in op_len..dest_len { - let dest = this.project_index(&dest, i)?; - this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?; - } - } // Used to implement the _mm_cvtpd_epi32 and _mm_cvttpd_epi32 functions. // Converts packed f64 to packed i32. "cvtpd2dq" | "cvttpd2dq" => { @@ -772,25 +617,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: )?; } } - // Used to implement the _mm_movemask_pd function. - // Returns a scalar integer where the i-th bit is the highest - // bit of the i-th component of `op`. - // https://www.felixcloutier.com/x86/movmskpd - "movmsk.pd" => { - let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - let (op, op_len) = this.operand_to_simd(op)?; - - let mut res = 0; - for i in 0..op_len { - let op = this.read_scalar(&this.project_index(&op, i)?)?; - let op = op.to_u64()?; - - // Extract the highest bit of `op` and place it in the `i`-th bit of `res` - res |= (op >> 63) << i; - } - - this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?; - } // Used to implement the `_mm_pause` function. // The intrinsic is used to hint the processor that the code is in a spin-loop. "pause" => { diff --git a/src/tools/miri/src/shims/x86/sse3.rs b/src/tools/miri/src/shims/x86/sse3.rs index a41de5dbf7ee5..20a4b560749ac 100644 --- a/src/tools/miri/src/shims/x86/sse3.rs +++ b/src/tools/miri/src/shims/x86/sse3.rs @@ -23,32 +23,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap(); match unprefixed_name { - // Used to implement the _mm_addsub_ps and _mm_addsub_pd functions. - // Alternatingly add and subtract floating point (f32 or f64) from - // `left` and `right` - "addsub.ps" | "addsub.pd" => { - let [left, right] = - this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - - let (left, left_len) = this.operand_to_simd(left)?; - let (right, right_len) = this.operand_to_simd(right)?; - let (dest, dest_len) = this.place_to_simd(dest)?; - - assert_eq!(dest_len, left_len); - assert_eq!(dest_len, right_len); - - for i in 0..dest_len { - let left = this.read_immediate(&this.project_index(&left, i)?)?; - let right = this.read_immediate(&this.project_index(&right, i)?)?; - let dest = this.project_index(&dest, i)?; - - // Even elements are subtracted and odd elements are added. - let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add }; - let res = this.wrapping_binary_op(op, &left, &right)?; - - this.write_immediate(*res, &dest)?; - } - } // Used to implement the _mm_h{add,sub}_p{s,d} functions. // Horizontally add/subtract adjacent floating point values // in `left` and `right`. From 35e2f4e0af4bfce8802239ac498255ee9b02fbb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 10 Oct 2023 18:44:53 +0200 Subject: [PATCH 4/4] Fix identation of a `rustfmt::skip`ed statement --- src/tools/miri/tests/pass/intrinsics-x86-sse2.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs index 2c7665bc73631..e636d6c8aaf8d 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs @@ -117,12 +117,12 @@ mod tests { #[target_feature(enable = "sse2")] unsafe fn test_mm_sad_epu8() { #[rustfmt::skip] - let a = _mm_setr_epi8( - 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, - 1, 2, 3, 4, - 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, - 1, 2, 3, 4, - ); + let a = _mm_setr_epi8( + 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, + 1, 2, 3, 4, + 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, + 1, 2, 3, 4, + ); let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); let r = _mm_sad_epu8(a, b); let e = _mm_setr_epi64x(1020, 614);