diff --git a/src/shims/x86/avx.rs b/src/shims/x86/avx.rs index 636d308d78..cda9dbde04 100644 --- a/src/shims/x86/avx.rs +++ b/src/shims/x86/avx.rs @@ -6,7 +6,7 @@ use rustc_target::callconv::FnAbi; use super::{ FloatBinOp, FloatUnaryOp, bin_op_simd_float_all, conditional_dot_product, convert_float_to_int, - mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps, + round_all, test_bits_masked, test_high_bits_masked, unary_op_ps, }; use crate::*; @@ -200,27 +200,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { )?; } } - // Used to implement the _mm_maskload_ps, _mm_maskload_pd, _mm256_maskload_ps - // and _mm256_maskload_pd functions. - // For the element `i`, if the high bit of the `i`-th element of `mask` - // is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is - // loaded. - "maskload.ps" | "maskload.pd" | "maskload.ps.256" | "maskload.pd.256" => { - let [ptr, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - - mask_load(this, ptr, mask, dest)?; - } - // Used to implement the _mm_maskstore_ps, _mm_maskstore_pd, _mm256_maskstore_ps - // and _mm256_maskstore_pd functions. - // For the element `i`, if the high bit of the element `i`-th of `mask` - // is one, it is stored into `ptr.wapping_add(i)`. - // Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores. - "maskstore.ps" | "maskstore.pd" | "maskstore.ps.256" | "maskstore.pd.256" => { - let [ptr, mask, value] = - this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - - mask_store(this, ptr, mask, value)?; - } // Used to implement the _mm256_lddqu_si256 function. // Reads a 256-bit vector from an unaligned pointer. This intrinsic // is expected to perform better than a regular unaligned read when diff --git a/src/shims/x86/avx2.rs b/src/shims/x86/avx2.rs index cf96a61ff0..8fe225c494 100644 --- a/src/shims/x86/avx2.rs +++ b/src/shims/x86/avx2.rs @@ -5,8 +5,8 @@ use rustc_span::Symbol; use rustc_target::callconv::FnAbi; use super::{ - ShiftOp, horizontal_bin_op, mask_load, mask_store, mpsadbw, packssdw, packsswb, packusdw, - packuswb, permute, pmaddbw, pmulhrsw, psadbw, psign, shift_simd_by_scalar, shift_simd_by_simd, + ShiftOp, horizontal_bin_op, mpsadbw, packssdw, packsswb, packusdw, packuswb, permute, pmaddbw, + pmulhrsw, psadbw, psign, shift_simd_by_scalar, }; use crate::*; @@ -108,27 +108,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { pmaddbw(this, left, right, dest)?; } - // Used to implement the _mm_maskload_epi32, _mm_maskload_epi64, - // _mm256_maskload_epi32 and _mm256_maskload_epi64 functions. - // For the element `i`, if the high bit of the `i`-th element of `mask` - // is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is - // loaded. - "maskload.d" | "maskload.q" | "maskload.d.256" | "maskload.q.256" => { - let [ptr, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - - mask_load(this, ptr, mask, dest)?; - } - // Used to implement the _mm_maskstore_epi32, _mm_maskstore_epi64, - // _mm256_maskstore_epi32 and _mm256_maskstore_epi64 functions. - // For the element `i`, if the high bit of the element `i`-th of `mask` - // is one, it is stored into `ptr.wapping_add(i)`. - // Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores. - "maskstore.d" | "maskstore.q" | "maskstore.d.256" | "maskstore.q.256" => { - let [ptr, mask, value] = - this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - - mask_store(this, ptr, mask, value)?; - } // Used to implement the _mm256_mpsadbw_epu8 function. // Compute the sum of absolute differences of quadruplets of unsigned // 8-bit integers in `left` and `right`, and store the 16-bit results @@ -266,22 +245,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { shift_simd_by_scalar(this, left, right, which, dest)?; } - // Used to implement the _mm{,256}_{sllv,srlv,srav}_epi{32,64} functions - // (except _mm{,256}_srav_epi64, which are not available in AVX2). - "psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" | "psrlv.d" | "psrlv.d.256" - | "psrlv.q" | "psrlv.q.256" | "psrav.d" | "psrav.d.256" => { - let [left, right] = - this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; - - let which = match unprefixed_name { - "psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" => ShiftOp::Left, - "psrlv.d" | "psrlv.d.256" | "psrlv.q" | "psrlv.q.256" => ShiftOp::RightLogic, - "psrav.d" | "psrav.d.256" => ShiftOp::RightArith, - _ => unreachable!(), - }; - - shift_simd_by_simd(this, left, right, which, dest)?; - } _ => return interp_ok(EmulateItemResult::NotSupported), } interp_ok(EmulateItemResult::NeedsReturn) diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs index febfc5afa2..40dcd7ac1c 100644 --- a/src/shims/x86/mod.rs +++ b/src/shims/x86/mod.rs @@ -518,61 +518,6 @@ fn shift_simd_by_scalar<'tcx>( interp_ok(()) } -/// Shifts each element of `left` by the corresponding element of `right`. -/// -/// For logic shifts, when right is larger than BITS - 1, zero is produced. -/// For arithmetic right-shifts, when right is larger than BITS - 1, the sign -/// bit is copied to all bits. -fn shift_simd_by_simd<'tcx>( - ecx: &mut crate::MiriInterpCx<'tcx>, - left: &OpTy<'tcx>, - right: &OpTy<'tcx>, - which: ShiftOp, - dest: &MPlaceTy<'tcx>, -) -> InterpResult<'tcx, ()> { - let (left, left_len) = ecx.project_to_simd(left)?; - let (right, right_len) = ecx.project_to_simd(right)?; - let (dest, dest_len) = ecx.project_to_simd(dest)?; - - assert_eq!(dest_len, left_len); - assert_eq!(dest_len, right_len); - - for i in 0..dest_len { - let left = ecx.read_scalar(&ecx.project_index(&left, i)?)?; - let right = ecx.read_scalar(&ecx.project_index(&right, i)?)?; - let dest = ecx.project_index(&dest, i)?; - - // It is ok to saturate the value to u32::MAX because any value - // above BITS - 1 will produce the same result. - let shift = u32::try_from(right.to_uint(dest.layout.size)?).unwrap_or(u32::MAX); - - let res = match which { - ShiftOp::Left => { - let left = left.to_uint(dest.layout.size)?; - let res = left.checked_shl(shift).unwrap_or(0); - // `truncate` is needed as left-shift can make the absolute value larger. - Scalar::from_uint(dest.layout.size.truncate(res), dest.layout.size) - } - ShiftOp::RightLogic => { - let left = left.to_uint(dest.layout.size)?; - let res = left.checked_shr(shift).unwrap_or(0); - // No `truncate` needed as right-shift can only make the absolute value smaller. - Scalar::from_uint(res, dest.layout.size) - } - ShiftOp::RightArith => { - let left = left.to_int(dest.layout.size)?; - // On overflow, copy the sign bit to the remaining bits - let res = left.checked_shr(shift).unwrap_or(left >> 127); - // No `truncate` needed as right-shift can only make the absolute value smaller. - Scalar::from_int(res, dest.layout.size) - } - }; - ecx.write_scalar(res, &dest)?; - } - - interp_ok(()) -} - /// Takes a 128-bit vector, transmutes it to `[u64; 2]` and extracts /// the first value. fn extract_first_u64<'tcx>( @@ -912,73 +857,6 @@ fn test_high_bits_masked<'tcx>( interp_ok((direct, negated)) } -/// Conditionally loads from `ptr` according the high bit of each -/// element of `mask`. `ptr` does not need to be aligned. -fn mask_load<'tcx>( - ecx: &mut crate::MiriInterpCx<'tcx>, - ptr: &OpTy<'tcx>, - mask: &OpTy<'tcx>, - dest: &MPlaceTy<'tcx>, -) -> InterpResult<'tcx, ()> { - let (mask, mask_len) = ecx.project_to_simd(mask)?; - let (dest, dest_len) = ecx.project_to_simd(dest)?; - - assert_eq!(dest_len, mask_len); - - let mask_item_size = mask.layout.field(ecx, 0).size; - let high_bit_offset = mask_item_size.bits().strict_sub(1); - - let ptr = ecx.read_pointer(ptr)?; - for i in 0..dest_len { - let mask = ecx.project_index(&mask, i)?; - let dest = ecx.project_index(&dest, i)?; - - if ecx.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 { - let ptr = ptr.wrapping_offset(dest.layout.size * i, &ecx.tcx); - // Unaligned copy, which is what we want. - ecx.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?; - } else { - ecx.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?; - } - } - - interp_ok(()) -} - -/// Conditionally stores into `ptr` according the high bit of each -/// element of `mask`. `ptr` does not need to be aligned. -fn mask_store<'tcx>( - ecx: &mut crate::MiriInterpCx<'tcx>, - ptr: &OpTy<'tcx>, - mask: &OpTy<'tcx>, - value: &OpTy<'tcx>, -) -> InterpResult<'tcx, ()> { - let (mask, mask_len) = ecx.project_to_simd(mask)?; - let (value, value_len) = ecx.project_to_simd(value)?; - - assert_eq!(value_len, mask_len); - - let mask_item_size = mask.layout.field(ecx, 0).size; - let high_bit_offset = mask_item_size.bits().strict_sub(1); - - let ptr = ecx.read_pointer(ptr)?; - for i in 0..value_len { - let mask = ecx.project_index(&mask, i)?; - let value = ecx.project_index(&value, i)?; - - if ecx.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 { - // *Non-inbounds* pointer arithmetic to compute the destination. - // (That's why we can't use a place projection.) - let ptr = ptr.wrapping_offset(value.layout.size * i, &ecx.tcx); - // Deref the pointer *unaligned*, and do the copy. - let dest = ecx.ptr_to_mplace_unaligned(ptr, value.layout); - ecx.copy_op(&value, &dest)?; - } - } - - interp_ok(()) -} - /// Compute the sum of absolute differences of quadruplets of unsigned /// 8-bit integers in `left` and `right`, and store the 16-bit results /// in `right`. Quadruplets are selected from `left` and `right` with