rust-lang · RalfJung · Nov 24, 2025 · Nov 24, 2025
diff --git a/src/shims/x86/avx.rs b/src/shims/x86/avx.rs
@@ -6,7 +6,7 @@ use rustc_target::callconv::FnAbi;
 
 use super::{
     FloatBinOp, FloatUnaryOp, bin_op_simd_float_all, conditional_dot_product, convert_float_to_int,
-    mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
+    round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
 };
 use crate::*;
 
@@ -200,27 +200,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     )?;
                 }
             }
-            // Used to implement the _mm_maskload_ps, _mm_maskload_pd, _mm256_maskload_ps
-            // and _mm256_maskload_pd functions.
-            // For the element `i`, if the high bit of the `i`-th element of `mask`
-            // is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is
-            // loaded.
-            "maskload.ps" | "maskload.pd" | "maskload.ps.256" | "maskload.pd.256" => {
-                let [ptr, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                mask_load(this, ptr, mask, dest)?;
-            }
-            // Used to implement the _mm_maskstore_ps, _mm_maskstore_pd, _mm256_maskstore_ps
-            // and _mm256_maskstore_pd functions.
-            // For the element `i`, if the high bit of the element `i`-th of `mask`
-            // is one, it is stored into `ptr.wapping_add(i)`.
-            // Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores.
-            "maskstore.ps" | "maskstore.pd" | "maskstore.ps.256" | "maskstore.pd.256" => {
-                let [ptr, mask, value] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                mask_store(this, ptr, mask, value)?;
-            }
             // Used to implement the _mm256_lddqu_si256 function.
             // Reads a 256-bit vector from an unaligned pointer. This intrinsic
             // is expected to perform better than a regular unaligned read when

diff --git a/src/shims/x86/avx2.rs b/src/shims/x86/avx2.rs
@@ -5,8 +5,8 @@ use rustc_span::Symbol;
 use rustc_target::callconv::FnAbi;
 
 use super::{
-    ShiftOp, horizontal_bin_op, mask_load, mask_store, mpsadbw, packssdw, packsswb, packusdw,
-    packuswb, permute, pmaddbw, pmulhrsw, psadbw, psign, shift_simd_by_scalar, shift_simd_by_simd,
+    ShiftOp, horizontal_bin_op, mpsadbw, packssdw, packsswb, packusdw, packuswb, permute, pmaddbw,
+    pmulhrsw, psadbw, psign, shift_simd_by_scalar,
 };
 use crate::*;
 
@@ -108,27 +108,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                 pmaddbw(this, left, right, dest)?;
             }
-            // Used to implement the _mm_maskload_epi32, _mm_maskload_epi64,
-            // _mm256_maskload_epi32 and _mm256_maskload_epi64 functions.
-            // For the element `i`, if the high bit of the `i`-th element of `mask`
-            // is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is
-            // loaded.
-            "maskload.d" | "maskload.q" | "maskload.d.256" | "maskload.q.256" => {
-                let [ptr, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                mask_load(this, ptr, mask, dest)?;
-            }
-            // Used to implement the _mm_maskstore_epi32, _mm_maskstore_epi64,
-            // _mm256_maskstore_epi32 and _mm256_maskstore_epi64 functions.
-            // For the element `i`, if the high bit of the element `i`-th of `mask`
-            // is one, it is stored into `ptr.wapping_add(i)`.
-            // Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores.
-            "maskstore.d" | "maskstore.q" | "maskstore.d.256" | "maskstore.q.256" => {
-                let [ptr, mask, value] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                mask_store(this, ptr, mask, value)?;
-            }
             // Used to implement the _mm256_mpsadbw_epu8 function.
             // Compute the sum of absolute differences of quadruplets of unsigned
             // 8-bit integers in `left` and `right`, and store the 16-bit results
@@ -266,22 +245,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                 shift_simd_by_scalar(this, left, right, which, dest)?;
             }
-            // Used to implement the _mm{,256}_{sllv,srlv,srav}_epi{32,64} functions
-            // (except _mm{,256}_srav_epi64, which are not available in AVX2).
-            "psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" | "psrlv.d" | "psrlv.d.256"
-            | "psrlv.q" | "psrlv.q.256" | "psrav.d" | "psrav.d.256" => {
-                let [left, right] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                let which = match unprefixed_name {
-                    "psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" => ShiftOp::Left,
-                    "psrlv.d" | "psrlv.d.256" | "psrlv.q" | "psrlv.q.256" => ShiftOp::RightLogic,
-                    "psrav.d" | "psrav.d.256" => ShiftOp::RightArith,
-                    _ => unreachable!(),
-                };
-
-                shift_simd_by_simd(this, left, right, which, dest)?;
-            }
             _ => return interp_ok(EmulateItemResult::NotSupported),
         }
         interp_ok(EmulateItemResult::NeedsReturn)

diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs
@@ -518,61 +518,6 @@ fn shift_simd_by_scalar<'tcx>(
     interp_ok(())
 }
 
-/// Shifts each element of `left` by the corresponding element of `right`.
-///
-/// For logic shifts, when right is larger than BITS - 1, zero is produced.
-/// For arithmetic right-shifts, when right is larger than BITS - 1, the sign
-/// bit is copied to all bits.
-fn shift_simd_by_simd<'tcx>(
-    ecx: &mut crate::MiriInterpCx<'tcx>,
-    left: &OpTy<'tcx>,
-    right: &OpTy<'tcx>,
-    which: ShiftOp,
-    dest: &MPlaceTy<'tcx>,
-) -> InterpResult<'tcx, ()> {
-    let (left, left_len) = ecx.project_to_simd(left)?;
-    let (right, right_len) = ecx.project_to_simd(right)?;
-    let (dest, dest_len) = ecx.project_to_simd(dest)?;
-
-    assert_eq!(dest_len, left_len);
-    assert_eq!(dest_len, right_len);
-
-    for i in 0..dest_len {
-        let left = ecx.read_scalar(&ecx.project_index(&left, i)?)?;
-        let right = ecx.read_scalar(&ecx.project_index(&right, i)?)?;
-        let dest = ecx.project_index(&dest, i)?;
-
-        // It is ok to saturate the value to u32::MAX because any value
-        // above BITS - 1 will produce the same result.
-        let shift = u32::try_from(right.to_uint(dest.layout.size)?).unwrap_or(u32::MAX);
-
-        let res = match which {
-            ShiftOp::Left => {
-                let left = left.to_uint(dest.layout.size)?;
-                let res = left.checked_shl(shift).unwrap_or(0);
-                // `truncate` is needed as left-shift can make the absolute value larger.
-                Scalar::from_uint(dest.layout.size.truncate(res), dest.layout.size)
-            }
-            ShiftOp::RightLogic => {
-                let left = left.to_uint(dest.layout.size)?;
-                let res = left.checked_shr(shift).unwrap_or(0);
-                // No `truncate` needed as right-shift can only make the absolute value smaller.
-                Scalar::from_uint(res, dest.layout.size)
-            }
-            ShiftOp::RightArith => {
-                let left = left.to_int(dest.layout.size)?;
-                // On overflow, copy the sign bit to the remaining bits
-                let res = left.checked_shr(shift).unwrap_or(left >> 127);
-                // No `truncate` needed as right-shift can only make the absolute value smaller.
-                Scalar::from_int(res, dest.layout.size)
-            }
-        };
-        ecx.write_scalar(res, &dest)?;
-    }
-
-    interp_ok(())
-}
-
 /// Takes a 128-bit vector, transmutes it to `[u64; 2]` and extracts
 /// the first value.
 fn extract_first_u64<'tcx>(
@@ -912,73 +857,6 @@ fn test_high_bits_masked<'tcx>(
     interp_ok((direct, negated))
 }
 
-/// Conditionally loads from `ptr` according the high bit of each
-/// element of `mask`. `ptr` does not need to be aligned.
-fn mask_load<'tcx>(
-    ecx: &mut crate::MiriInterpCx<'tcx>,
-    ptr: &OpTy<'tcx>,
-    mask: &OpTy<'tcx>,
-    dest: &MPlaceTy<'tcx>,
-) -> InterpResult<'tcx, ()> {
-    let (mask, mask_len) = ecx.project_to_simd(mask)?;
-    let (dest, dest_len) = ecx.project_to_simd(dest)?;
-
-    assert_eq!(dest_len, mask_len);
-
-    let mask_item_size = mask.layout.field(ecx, 0).size;
-    let high_bit_offset = mask_item_size.bits().strict_sub(1);
-
-    let ptr = ecx.read_pointer(ptr)?;
-    for i in 0..dest_len {
-        let mask = ecx.project_index(&mask, i)?;
-        let dest = ecx.project_index(&dest, i)?;
-
-        if ecx.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
-            let ptr = ptr.wrapping_offset(dest.layout.size * i, &ecx.tcx);
-            // Unaligned copy, which is what we want.
-            ecx.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
-        } else {
-            ecx.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
-        }
-    }
-
-    interp_ok(())
-}
-
-/// Conditionally stores into `ptr` according the high bit of each
-/// element of `mask`. `ptr` does not need to be aligned.
-fn mask_store<'tcx>(
-    ecx: &mut crate::MiriInterpCx<'tcx>,
-    ptr: &OpTy<'tcx>,
-    mask: &OpTy<'tcx>,
-    value: &OpTy<'tcx>,
-) -> InterpResult<'tcx, ()> {
-    let (mask, mask_len) = ecx.project_to_simd(mask)?;
-    let (value, value_len) = ecx.project_to_simd(value)?;
-
-    assert_eq!(value_len, mask_len);
-
-    let mask_item_size = mask.layout.field(ecx, 0).size;
-    let high_bit_offset = mask_item_size.bits().strict_sub(1);
-
-    let ptr = ecx.read_pointer(ptr)?;
-    for i in 0..value_len {
-        let mask = ecx.project_index(&mask, i)?;
-        let value = ecx.project_index(&value, i)?;
-
-        if ecx.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
-            // *Non-inbounds* pointer arithmetic to compute the destination.
-            // (That's why we can't use a place projection.)
-            let ptr = ptr.wrapping_offset(value.layout.size * i, &ecx.tcx);
-            // Deref the pointer *unaligned*, and do the copy.
-            let dest = ecx.ptr_to_mplace_unaligned(ptr, value.layout);
-            ecx.copy_op(&value, &dest)?;
-        }
-    }
-
-    interp_ok(())
-}
-
 /// Compute the sum of absolute differences of quadruplets of unsigned
 /// 8-bit integers in `left` and `right`, and store the 16-bit results
 /// in `right`. Quadruplets are selected from `left` and `right` with