Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 1 addition & 22 deletions src/shims/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use rustc_target::callconv::FnAbi;

use super::{
FloatBinOp, FloatUnaryOp, bin_op_simd_float_all, conditional_dot_product, convert_float_to_int,
mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
};
use crate::*;

Expand Down Expand Up @@ -200,27 +200,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
)?;
}
}
// Used to implement the _mm_maskload_ps, _mm_maskload_pd, _mm256_maskload_ps
// and _mm256_maskload_pd functions.
// For the element `i`, if the high bit of the `i`-th element of `mask`
// is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is
// loaded.
"maskload.ps" | "maskload.pd" | "maskload.ps.256" | "maskload.pd.256" => {
let [ptr, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

mask_load(this, ptr, mask, dest)?;
}
// Used to implement the _mm_maskstore_ps, _mm_maskstore_pd, _mm256_maskstore_ps
// and _mm256_maskstore_pd functions.
// For the element `i`, if the high bit of the element `i`-th of `mask`
// is one, it is stored into `ptr.wapping_add(i)`.
// Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores.
"maskstore.ps" | "maskstore.pd" | "maskstore.ps.256" | "maskstore.pd.256" => {
let [ptr, mask, value] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

mask_store(this, ptr, mask, value)?;
}
// Used to implement the _mm256_lddqu_si256 function.
// Reads a 256-bit vector from an unaligned pointer. This intrinsic
// is expected to perform better than a regular unaligned read when
Expand Down
41 changes: 2 additions & 39 deletions src/shims/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use rustc_span::Symbol;
use rustc_target::callconv::FnAbi;

use super::{
ShiftOp, horizontal_bin_op, mask_load, mask_store, mpsadbw, packssdw, packsswb, packusdw,
packuswb, permute, pmaddbw, pmulhrsw, psadbw, psign, shift_simd_by_scalar, shift_simd_by_simd,
ShiftOp, horizontal_bin_op, mpsadbw, packssdw, packsswb, packusdw, packuswb, permute, pmaddbw,
pmulhrsw, psadbw, psign, shift_simd_by_scalar,
};
use crate::*;

Expand Down Expand Up @@ -108,27 +108,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

pmaddbw(this, left, right, dest)?;
}
// Used to implement the _mm_maskload_epi32, _mm_maskload_epi64,
// _mm256_maskload_epi32 and _mm256_maskload_epi64 functions.
// For the element `i`, if the high bit of the `i`-th element of `mask`
// is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is
// loaded.
"maskload.d" | "maskload.q" | "maskload.d.256" | "maskload.q.256" => {
let [ptr, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

mask_load(this, ptr, mask, dest)?;
}
// Used to implement the _mm_maskstore_epi32, _mm_maskstore_epi64,
// _mm256_maskstore_epi32 and _mm256_maskstore_epi64 functions.
// For the element `i`, if the high bit of the element `i`-th of `mask`
// is one, it is stored into `ptr.wapping_add(i)`.
// Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores.
"maskstore.d" | "maskstore.q" | "maskstore.d.256" | "maskstore.q.256" => {
let [ptr, mask, value] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

mask_store(this, ptr, mask, value)?;
}
// Used to implement the _mm256_mpsadbw_epu8 function.
// Compute the sum of absolute differences of quadruplets of unsigned
// 8-bit integers in `left` and `right`, and store the 16-bit results
Expand Down Expand Up @@ -266,22 +245,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

shift_simd_by_scalar(this, left, right, which, dest)?;
}
// Used to implement the _mm{,256}_{sllv,srlv,srav}_epi{32,64} functions
// (except _mm{,256}_srav_epi64, which are not available in AVX2).
"psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" | "psrlv.d" | "psrlv.d.256"
| "psrlv.q" | "psrlv.q.256" | "psrav.d" | "psrav.d.256" => {
let [left, right] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let which = match unprefixed_name {
"psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" => ShiftOp::Left,
"psrlv.d" | "psrlv.d.256" | "psrlv.q" | "psrlv.q.256" => ShiftOp::RightLogic,
"psrav.d" | "psrav.d.256" => ShiftOp::RightArith,
_ => unreachable!(),
};

shift_simd_by_simd(this, left, right, which, dest)?;
}
_ => return interp_ok(EmulateItemResult::NotSupported),
}
interp_ok(EmulateItemResult::NeedsReturn)
Expand Down
122 changes: 0 additions & 122 deletions src/shims/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -518,61 +518,6 @@ fn shift_simd_by_scalar<'tcx>(
interp_ok(())
}

/// Shifts each element of `left` by the corresponding element of `right`.
///
/// For logic shifts, when right is larger than BITS - 1, zero is produced.
/// For arithmetic right-shifts, when right is larger than BITS - 1, the sign
/// bit is copied to all bits.
fn shift_simd_by_simd<'tcx>(
ecx: &mut crate::MiriInterpCx<'tcx>,
left: &OpTy<'tcx>,
right: &OpTy<'tcx>,
which: ShiftOp,
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, ()> {
let (left, left_len) = ecx.project_to_simd(left)?;
let (right, right_len) = ecx.project_to_simd(right)?;
let (dest, dest_len) = ecx.project_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);

for i in 0..dest_len {
let left = ecx.read_scalar(&ecx.project_index(&left, i)?)?;
let right = ecx.read_scalar(&ecx.project_index(&right, i)?)?;
let dest = ecx.project_index(&dest, i)?;

// It is ok to saturate the value to u32::MAX because any value
// above BITS - 1 will produce the same result.
let shift = u32::try_from(right.to_uint(dest.layout.size)?).unwrap_or(u32::MAX);

let res = match which {
ShiftOp::Left => {
let left = left.to_uint(dest.layout.size)?;
let res = left.checked_shl(shift).unwrap_or(0);
// `truncate` is needed as left-shift can make the absolute value larger.
Scalar::from_uint(dest.layout.size.truncate(res), dest.layout.size)
}
ShiftOp::RightLogic => {
let left = left.to_uint(dest.layout.size)?;
let res = left.checked_shr(shift).unwrap_or(0);
// No `truncate` needed as right-shift can only make the absolute value smaller.
Scalar::from_uint(res, dest.layout.size)
}
ShiftOp::RightArith => {
let left = left.to_int(dest.layout.size)?;
// On overflow, copy the sign bit to the remaining bits
let res = left.checked_shr(shift).unwrap_or(left >> 127);
// No `truncate` needed as right-shift can only make the absolute value smaller.
Scalar::from_int(res, dest.layout.size)
}
};
ecx.write_scalar(res, &dest)?;
}

interp_ok(())
}

/// Takes a 128-bit vector, transmutes it to `[u64; 2]` and extracts
/// the first value.
fn extract_first_u64<'tcx>(
Expand Down Expand Up @@ -912,73 +857,6 @@ fn test_high_bits_masked<'tcx>(
interp_ok((direct, negated))
}

/// Conditionally loads from `ptr` according the high bit of each
/// element of `mask`. `ptr` does not need to be aligned.
fn mask_load<'tcx>(
ecx: &mut crate::MiriInterpCx<'tcx>,
ptr: &OpTy<'tcx>,
mask: &OpTy<'tcx>,
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, ()> {
let (mask, mask_len) = ecx.project_to_simd(mask)?;
let (dest, dest_len) = ecx.project_to_simd(dest)?;

assert_eq!(dest_len, mask_len);

let mask_item_size = mask.layout.field(ecx, 0).size;
let high_bit_offset = mask_item_size.bits().strict_sub(1);

let ptr = ecx.read_pointer(ptr)?;
for i in 0..dest_len {
let mask = ecx.project_index(&mask, i)?;
let dest = ecx.project_index(&dest, i)?;

if ecx.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
let ptr = ptr.wrapping_offset(dest.layout.size * i, &ecx.tcx);
// Unaligned copy, which is what we want.
ecx.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
} else {
ecx.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
}
}

interp_ok(())
}

/// Conditionally stores into `ptr` according the high bit of each
/// element of `mask`. `ptr` does not need to be aligned.
fn mask_store<'tcx>(
ecx: &mut crate::MiriInterpCx<'tcx>,
ptr: &OpTy<'tcx>,
mask: &OpTy<'tcx>,
value: &OpTy<'tcx>,
) -> InterpResult<'tcx, ()> {
let (mask, mask_len) = ecx.project_to_simd(mask)?;
let (value, value_len) = ecx.project_to_simd(value)?;

assert_eq!(value_len, mask_len);

let mask_item_size = mask.layout.field(ecx, 0).size;
let high_bit_offset = mask_item_size.bits().strict_sub(1);

let ptr = ecx.read_pointer(ptr)?;
for i in 0..value_len {
let mask = ecx.project_index(&mask, i)?;
let value = ecx.project_index(&value, i)?;

if ecx.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
// *Non-inbounds* pointer arithmetic to compute the destination.
// (That's why we can't use a place projection.)
let ptr = ptr.wrapping_offset(value.layout.size * i, &ecx.tcx);
// Deref the pointer *unaligned*, and do the copy.
let dest = ecx.ptr_to_mplace_unaligned(ptr, value.layout);
ecx.copy_op(&value, &dest)?;
}
}

interp_ok(())
}

/// Compute the sum of absolute differences of quadruplets of unsigned
/// 8-bit integers in `left` and `right`, and store the 16-bit results
/// in `right`. Quadruplets are selected from `left` and `right` with
Expand Down