Skip to content

Commit

Permalink
Auto merge of #116609 - eduardosm:bump-stdarch, r=workingjubilee
Browse files Browse the repository at this point in the history
Bump stdarch submodule and remove special handling for LLVM intrinsics that are no longer needed

Bumps stdarch to pull rust-lang/stdarch#1477, which reimplemented some functions with portable SIMD intrinsics instead of arch specific LLVM intrinsics.

Handling of those LLVM intrinsics is removed from cranelift codegen and miri.

cc `@RalfJung` `@bjorn3`
  • Loading branch information
bors committed Oct 28, 2023
2 parents 615d0f2 + 35e2f4e commit 3089c31
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 262 deletions.
35 changes: 0 additions & 35 deletions compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
Expand Up @@ -32,41 +32,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
ret.write_cvalue(fx, CValue::by_val(res, fx.layout_of(fx.tcx.types.i64)));
}

// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
"llvm.x86.sse2.pmovmskb.128"
| "llvm.x86.avx2.pmovmskb"
| "llvm.x86.sse.movmsk.ps"
| "llvm.x86.sse2.movmsk.pd" => {
intrinsic_args!(fx, args => (a); intrinsic);

let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
let lane_ty = fx.clif_type(lane_ty).unwrap();
assert!(lane_count <= 32);

let mut res = fx.bcx.ins().iconst(types::I32, 0);

for lane in (0..lane_count).rev() {
let a_lane = a.value_lane(fx, lane).load_scalar(fx);

// cast float to int
let a_lane = match lane_ty {
types::F32 => codegen_bitcast(fx, types::I32, a_lane),
types::F64 => codegen_bitcast(fx, types::I64, a_lane),
_ => a_lane,
};

// extract sign bit of an int
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1));

// shift sign bit into result
let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false);
res = fx.bcx.ins().ishl_imm(res, 1);
res = fx.bcx.ins().bor(res, a_lane_sign);
}

let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32));
ret.write_cvalue(fx, res);
}
"llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
let (x, y, kind) = match args {
[x, y, kind] => (x, y, kind),
Expand Down
19 changes: 0 additions & 19 deletions src/tools/miri/src/shims/x86/sse.rs
Expand Up @@ -209,25 +209,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
)?;
}
}
// Used to implement the _mm_movemask_ps function.
// Returns a scalar integer where the i-th bit is the highest
// bit of the i-th component of `op`.
// https://www.felixcloutier.com/x86/movmskps
"movmsk.ps" => {
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
let (op, op_len) = this.operand_to_simd(op)?;

let mut res = 0;
for i in 0..op_len {
let op = this.read_scalar(&this.project_index(&op, i)?)?;
let op = op.to_u32()?;

// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
res |= (op >> 31) << i;
}

this.write_scalar(Scalar::from_u32(res), dest)?;
}
_ => return Ok(EmulateForeignItemResult::NotSupported),
}
Ok(EmulateForeignItemResult::NeedsJumping)
Expand Down
176 changes: 1 addition & 175 deletions src/tools/miri/src/shims/x86/sse2.rs
@@ -1,8 +1,4 @@
use rustc_apfloat::{
ieee::{Double, Single},
Float as _,
};
use rustc_middle::mir;
use rustc_apfloat::ieee::Double;
use rustc_middle::ty::layout::LayoutOf as _;
use rustc_middle::ty::Ty;
use rustc_span::Symbol;
Expand Down Expand Up @@ -39,49 +35,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
// Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
// vectors.
match unprefixed_name {
// Used to implement the _mm_avg_epu8 and _mm_avg_epu16 functions.
// Averages packed unsigned 8/16-bit integers in `left` and `right`.
"pavg.b" | "pavg.w" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);

for i in 0..dest_len {
let left = this.read_immediate(&this.project_index(&left, i)?)?;
let right = this.read_immediate(&this.project_index(&right, i)?)?;
let dest = this.project_index(&dest, i)?;

// Widen the operands to avoid overflow
let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
let left = this.int_to_int_or_float(&left, twice_wide)?;
let right = this.int_to_int_or_float(&right, twice_wide)?;

// Calculate left + right + 1
let added = this.wrapping_binary_op(mir::BinOp::Add, &left, &right)?;
let added = this.wrapping_binary_op(
mir::BinOp::Add,
&added,
&ImmTy::from_uint(1u32, twice_wide),
)?;

// Calculate (left + right + 1) / 2
let divided = this.wrapping_binary_op(
mir::BinOp::Div,
&added,
&ImmTy::from_uint(2u32, twice_wide),
)?;

// Narrow back to the original type
let res = this.int_to_int_or_float(&divided, dest.layout)?;
this.write_immediate(*res, &dest)?;
}
}
// Used to implement the _mm_madd_epi16 function.
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
Expand Down Expand Up @@ -118,70 +71,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
this.write_scalar(Scalar::from_i32(res), &dest)?;
}
}
// Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions.
"pmulh.w" | "pmulhu.w" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);

for i in 0..dest_len {
let left = this.read_immediate(&this.project_index(&left, i)?)?;
let right = this.read_immediate(&this.project_index(&right, i)?)?;
let dest = this.project_index(&dest, i)?;

// Widen the operands to avoid overflow
let twice_wide = this.layout_of(this.get_twice_wide_int_ty(left.layout.ty))?;
let left = this.int_to_int_or_float(&left, twice_wide)?;
let right = this.int_to_int_or_float(&right, twice_wide)?;

// Multiply
let multiplied = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
// Keep the high half
let high = this.wrapping_binary_op(
mir::BinOp::Shr,
&multiplied,
&ImmTy::from_uint(dest.layout.size.bits(), twice_wide),
)?;

// Narrow back to the original type
let res = this.int_to_int_or_float(&high, dest.layout)?;
this.write_immediate(*res, &dest)?;
}
}
// Used to implement the _mm_mul_epu32 function.
// Multiplies the the low unsigned 32-bit integers from each packed
// 64-bit element and stores the result as 64-bit unsigned integers.
"pmulu.dq" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

// left and right are u32x4, dest is u64x2
assert_eq!(left_len, 4);
assert_eq!(right_len, 4);
assert_eq!(dest_len, 2);

for i in 0..dest_len {
let op_i = i.checked_mul(2).unwrap();
let left = this.read_scalar(&this.project_index(&left, op_i)?)?.to_u32()?;
let right = this.read_scalar(&this.project_index(&right, op_i)?)?.to_u32()?;
let dest = this.project_index(&dest, i)?;

// The multiplication will not overflow because stripping the
// operands are expanded from 32-bit to 64-bit.
let res = u64::from(left).checked_mul(u64::from(right)).unwrap();
this.write_scalar(Scalar::from_u64(res), &dest)?;
}
}
// Used to implement the _mm_sad_epu8 function.
// Computes the absolute differences of packed unsigned 8-bit integers in `a`
// and `b`, then horizontally sum each consecutive 8 differences to produce
Expand Down Expand Up @@ -370,25 +259,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
this.write_scalar(Scalar::from_u64(res), &dest)?;
}
}
// Used to implement the _mm_cvtepi32_ps function.
// Converts packed i32 to packed f32.
// FIXME: Can we get rid of this intrinsic and just use simd_as?
"cvtdq2ps" => {
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (op, op_len) = this.operand_to_simd(op)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, op_len);

for i in 0..dest_len {
let op = this.read_scalar(&this.project_index(&op, i)?)?.to_i32()?;
let dest = this.project_index(&dest, i)?;

let res = Scalar::from_f32(Single::from_i128(op.into()).value);
this.write_scalar(res, &dest)?;
}
}
// Used to implement the _mm_cvtps_epi32 and _mm_cvttps_epi32 functions.
// Converts packed f32 to packed i32.
"cvtps2dq" | "cvttps2dq" => {
Expand Down Expand Up @@ -652,31 +522,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
};
this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?;
}
// Used to implement the _mm_cvtpd_ps and _mm_cvtps_pd functions.
// Converts packed f32/f64 to packed f64/f32.
"cvtpd2ps" | "cvtps2pd" => {
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (op, op_len) = this.operand_to_simd(op)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

// For cvtpd2ps: op is f64x2, dest is f32x4
// For cvtps2pd: op is f32x4, dest is f64x2
// In either case, the two first values are converted
for i in 0..op_len.min(dest_len) {
let op = this.read_immediate(&this.project_index(&op, i)?)?;
let dest = this.project_index(&dest, i)?;

let res = this.float_to_float_or_int(&op, dest.layout)?;
this.write_immediate(*res, &dest)?;
}
// For f32 -> f64, ignore the remaining
// For f64 -> f32, fill the remaining with zeros
for i in op_len..dest_len {
let dest = this.project_index(&dest, i)?;
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
}
}
// Used to implement the _mm_cvtpd_epi32 and _mm_cvttpd_epi32 functions.
// Converts packed f64 to packed i32.
"cvtpd2dq" | "cvttpd2dq" => {
Expand Down Expand Up @@ -772,25 +617,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
)?;
}
}
// Used to implement the _mm_movemask_pd function.
// Returns a scalar integer where the i-th bit is the highest
// bit of the i-th component of `op`.
// https://www.felixcloutier.com/x86/movmskpd
"movmsk.pd" => {
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
let (op, op_len) = this.operand_to_simd(op)?;

let mut res = 0;
for i in 0..op_len {
let op = this.read_scalar(&this.project_index(&op, i)?)?;
let op = op.to_u64()?;

// Extract the highest bit of `op` and place it in the `i`-th bit of `res`
res |= (op >> 63) << i;
}

this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
}
// Used to implement the `_mm_pause` function.
// The intrinsic is used to hint the processor that the code is in a spin-loop.
"pause" => {
Expand Down
26 changes: 0 additions & 26 deletions src/tools/miri/src/shims/x86/sse3.rs
Expand Up @@ -22,32 +22,6 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap();

match unprefixed_name {
// Used to implement the _mm_addsub_ps and _mm_addsub_pd functions.
// Alternatingly add and subtract floating point (f32 or f64) from
// `left` and `right`
"addsub.ps" | "addsub.pd" => {
let [left, right] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let (left, left_len) = this.operand_to_simd(left)?;
let (right, right_len) = this.operand_to_simd(right)?;
let (dest, dest_len) = this.place_to_simd(dest)?;

assert_eq!(dest_len, left_len);
assert_eq!(dest_len, right_len);

for i in 0..dest_len {
let left = this.read_immediate(&this.project_index(&left, i)?)?;
let right = this.read_immediate(&this.project_index(&right, i)?)?;
let dest = this.project_index(&dest, i)?;

// Even elements are subtracted and odd elements are added.
let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add };
let res = this.wrapping_binary_op(op, &left, &right)?;

this.write_immediate(*res, &dest)?;
}
}
// Used to implement the _mm_h{add,sub}_p{s,d} functions.
// Horizontally add/subtract adjacent floating point values
// in `left` and `right`.
Expand Down
12 changes: 6 additions & 6 deletions src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
Expand Up @@ -117,12 +117,12 @@ mod tests {
#[target_feature(enable = "sse2")]
unsafe fn test_mm_sad_epu8() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4,
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
1, 2, 3, 4,
);
let a = _mm_setr_epi8(
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4,
155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
1, 2, 3, 4,
);
let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
let r = _mm_sad_epu8(a, b);
let e = _mm_setr_epi64x(1020, 614);
Expand Down

0 comments on commit 3089c31

Please sign in to comment.