Skip to content

Commit

Permalink
Another round of wasm SIMD updates
Browse files Browse the repository at this point in the history
This round is dependant on
rust-lang/llvm-project#101 landing first in
rust-lang/rust and won't pass CI until that does. That PR, however, will
also break wasm CI because it's changing how the wasm target works. My
goal here is to open this early to get it out there so that when that PR
lands in rust-lang/rust and CI breaks in stdarch then this can be merged
to make CI green again.

The changes here are mostly around the codegen for various intrinsics.
Some wasm-specific intrinsics have been removed in favor of more general
LLVM intrinsics, and other intrinsics have been removed in favor of
pattern-matching codegen.

The only new instruction supported as part of this chagne is
`v128.any_true`. This leaves only one instruction unsupported in LLVM
which is `i64x2.abs`. I think the codegen for the instruction is correct
in stdsimd, though, and LLVM just needs to update with a pattern-match
to actually emit the opcode. That'll happen in a future LLVM update.
  • Loading branch information
alexcrichton committed May 3, 2021
1 parent bc5c33c commit 50b6b7a
Showing 1 changed file with 53 additions and 35 deletions.
88 changes: 53 additions & 35 deletions crates/core_arch/src/wasm32/simd128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,6 @@ extern "C" {
fn llvm_i64x2_all_true(x: simd::i64x2) -> i32;
#[link_name = "llvm.wasm.bitmask.v2i64"]
fn llvm_bitmask_i64x2(a: simd::i64x2) -> i32;
#[link_name = "llvm.wasm.extend.low.signed"]
fn llvm_i64x2_extend_low_i32x4_s(a: simd::i32x4) -> simd::i64x2;
#[link_name = "llvm.wasm.extend.high.signed"]
fn llvm_i64x2_extend_high_i32x4_s(a: simd::i32x4) -> simd::i64x2;
#[link_name = "llvm.wasm.extend.low.unsigned"]
fn llvm_i64x2_extend_low_i32x4_u(a: simd::i32x4) -> simd::i64x2;
#[link_name = "llvm.wasm.extend.high.unsigned"]
fn llvm_i64x2_extend_high_i32x4_u(a: simd::i32x4) -> simd::i64x2;
#[link_name = "llvm.wasm.extmul.low.signed.v2i64"]
fn llvm_i64x2_extmul_low_i32x4_s(a: simd::i32x4, b: simd::i32x4) -> simd::i64x2;
#[link_name = "llvm.wasm.extmul.high.signed.v2i64"]
Expand All @@ -229,13 +221,13 @@ extern "C" {
#[link_name = "llvm.wasm.extmul.high.unsigned.v2i64"]
fn llvm_i64x2_extmul_high_i32x4_u(a: simd::i32x4, b: simd::i32x4) -> simd::i64x2;

#[link_name = "llvm.wasm.ceil.v4f32"]
#[link_name = "llvm.ceil.v4f32"]
fn llvm_f32x4_ceil(x: simd::f32x4) -> simd::f32x4;
#[link_name = "llvm.wasm.floor.v4f32"]
#[link_name = "llvm.floor.v4f32"]
fn llvm_f32x4_floor(x: simd::f32x4) -> simd::f32x4;
#[link_name = "llvm.wasm.trunc.v4f32"]
#[link_name = "llvm.trunc.v4f32"]
fn llvm_f32x4_trunc(x: simd::f32x4) -> simd::f32x4;
#[link_name = "llvm.wasm.nearest.v4f32"]
#[link_name = "llvm.nearbyint.v4f32"]
fn llvm_f32x4_nearest(x: simd::f32x4) -> simd::f32x4;
#[link_name = "llvm.fabs.v4f32"]
fn llvm_f32x4_abs(x: simd::f32x4) -> simd::f32x4;
Expand All @@ -250,13 +242,13 @@ extern "C" {
#[link_name = "llvm.wasm.pmax.v4f32"]
fn llvm_f32x4_pmax(x: simd::f32x4, y: simd::f32x4) -> simd::f32x4;

#[link_name = "llvm.wasm.ceil.v2f64"]
#[link_name = "llvm.ceil.v2f64"]
fn llvm_f64x2_ceil(x: simd::f64x2) -> simd::f64x2;
#[link_name = "llvm.wasm.floor.v2f64"]
#[link_name = "llvm.floor.v2f64"]
fn llvm_f64x2_floor(x: simd::f64x2) -> simd::f64x2;
#[link_name = "llvm.wasm.trunc.v2f64"]
#[link_name = "llvm.trunc.v2f64"]
fn llvm_f64x2_trunc(x: simd::f64x2) -> simd::f64x2;
#[link_name = "llvm.wasm.nearest.v2f64"]
#[link_name = "llvm.nearbyint.v2f64"]
fn llvm_f64x2_nearest(x: simd::f64x2) -> simd::f64x2;
#[link_name = "llvm.fabs.v2f64"]
fn llvm_f64x2_abs(x: simd::f64x2) -> simd::f64x2;
Expand All @@ -271,18 +263,14 @@ extern "C" {
#[link_name = "llvm.wasm.pmax.v2f64"]
fn llvm_f64x2_pmax(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2;

#[link_name = "llvm.wasm.trunc.saturate.signed.v4i32.v4f32"]
#[link_name = "llvm.fptosi.sat.v4i32.v4f32"]
fn llvm_i32x4_trunc_sat_f32x4_s(x: simd::f32x4) -> simd::i32x4;
#[link_name = "llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32"]
#[link_name = "llvm.fptoui.sat.v4i32.v4f32"]
fn llvm_i32x4_trunc_sat_f32x4_u(x: simd::f32x4) -> simd::i32x4;
#[link_name = "llvm.wasm.convert.low.signed"]
fn llvm_f64x2_convert_low_i32x4_s(x: simd::i32x4) -> simd::f64x2;
#[link_name = "llvm.wasm.convert.low.unsigned"]
fn llvm_f64x2_convert_low_i32x4_u(x: simd::i32x4) -> simd::f64x2;
#[link_name = "llvm.wasm.trunc.sat.zero.signed"]
fn llvm_i32x4_trunc_sat_f64x2_s_zero(x: simd::f64x2) -> simd::i32x4;
#[link_name = "llvm.wasm.trunc.sat.zero.unsigned"]
fn llvm_i32x4_trunc_sat_f64x2_u_zero(x: simd::f64x2) -> simd::i32x4;
#[link_name = "llvm.fptosi.sat.v2i32.v2f64"]
fn llvm_i32x2_trunc_sat_f64x2_s(x: simd::f64x2) -> simd::i32x2;
#[link_name = "llvm.fptoui.sat.v2i32.v2f64"]
fn llvm_i32x2_trunc_sat_f64x2_u(x: simd::f64x2) -> simd::i32x2;
#[link_name = "llvm.wasm.demote.zero"]
fn llvm_f32x4_demote_f64x2_zero(x: simd::f64x2) -> simd::f32x4;
#[link_name = "llvm.wasm.promote.low"]
Expand Down Expand Up @@ -1836,7 +1824,7 @@ pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 {

/// Returns true if any lane is nonzero or false if all lanes are zero.
#[inline]
// #[cfg_attr(test, assert_instr(v128.any_true))] // FIXME llvm
#[cfg_attr(test, assert_instr(v128.any_true))]
#[target_feature(enable = "simd128")]
pub unsafe fn v128_any_true(a: v128) -> bool {
llvm_any_true_i8x16(a.as_i8x16()) != 0
Expand Down Expand Up @@ -2688,7 +2676,9 @@ pub unsafe fn i64x2_bitmask(a: v128) -> i32 {
// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_s))] // FIXME wasmtime
#[target_feature(enable = "simd128")]
pub unsafe fn i64x2_extend_low_i32x4(a: v128) -> v128 {
transmute(llvm_i64x2_extend_low_i32x4_s(a.as_i32x4()))
transmute(simd_cast::<_, simd::i64x2>(
simd_shuffle2::<_, simd::i32x2>(a.as_i32x4(), a.as_i32x4(), [0, 1]),
))
}

/// Converts high half of the smaller lane vector to a larger lane
Expand All @@ -2697,7 +2687,9 @@ pub unsafe fn i64x2_extend_low_i32x4(a: v128) -> v128 {
// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_s))] // FIXME wasmtime
#[target_feature(enable = "simd128")]
pub unsafe fn i64x2_extend_high_i32x4(a: v128) -> v128 {
transmute(llvm_i64x2_extend_high_i32x4_s(a.as_i32x4()))
transmute(simd_cast::<_, simd::i64x2>(
simd_shuffle2::<_, simd::i32x2>(a.as_i32x4(), a.as_i32x4(), [2, 3]),
))
}

/// Converts low half of the smaller lane vector to a larger lane
Expand All @@ -2706,7 +2698,9 @@ pub unsafe fn i64x2_extend_high_i32x4(a: v128) -> v128 {
// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_u))] // FIXME wasmtime
#[target_feature(enable = "simd128")]
pub unsafe fn i64x2_extend_low_u32x4(a: v128) -> v128 {
transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4()))
transmute(simd_cast::<_, simd::i64x2>(
simd_shuffle2::<_, simd::u32x2>(a.as_u32x4(), a.as_u32x4(), [0, 1]),
))
}

/// Converts high half of the smaller lane vector to a larger lane
Expand All @@ -2715,7 +2709,9 @@ pub unsafe fn i64x2_extend_low_u32x4(a: v128) -> v128 {
// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_u))] // FIXME wasmtime
#[target_feature(enable = "simd128")]
pub unsafe fn i64x2_extend_high_u32x4(a: v128) -> v128 {
transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4()))
transmute(simd_cast::<_, simd::i64x2>(
simd_shuffle2::<_, simd::u32x2>(a.as_u32x4(), a.as_u32x4(), [2, 3]),
))
}

/// Shifts each lane to the left by the specified number of bits.
Expand Down Expand Up @@ -3137,7 +3133,11 @@ pub unsafe fn f32x4_convert_u32x4(a: v128) -> v128 {
// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_s_zero))] // FIXME wasmtime
#[target_feature(enable = "simd128")]
pub unsafe fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
transmute(llvm_i32x4_trunc_sat_f64x2_s_zero(a.as_f64x2()))
transmute(simd_shuffle4::<simd::i32x2, simd::i32x4>(
llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()),
simd::i32x2::splat(0),
[0, 1, 2, 3],
))
}

/// Saturating conversion of the two double-precision floating point lanes to
Expand All @@ -3152,23 +3152,41 @@ pub unsafe fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_u_zero))] // FIXME wasmtime
#[target_feature(enable = "simd128")]
pub unsafe fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
transmute(llvm_i32x4_trunc_sat_f64x2_u_zero(a.as_f64x2()))
transmute(simd_shuffle4::<simd::i32x2, simd::i32x4>(
llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()),
simd::i32x2::splat(0),
[0, 1, 2, 3],
))
}

/// Lane-wise conversion from integer to floating point.
#[inline]
#[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_s))]
#[target_feature(enable = "simd128")]
pub unsafe fn f64x2_convert_low_i32x4(a: v128) -> v128 {
transmute(llvm_f64x2_convert_low_i32x4_s(a.as_i32x4()))
transmute(simd_cast::<_, simd::f64x2>(simd_shuffle2::<
simd::i32x4,
simd::i32x2,
>(
a.as_i32x4(),
a.as_i32x4(),
[0, 1],
)))
}

/// Lane-wise conversion from integer to floating point.
#[inline]
// #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_u))] // FIXME wasmtime
#[target_feature(enable = "simd128")]
pub unsafe fn f64x2_convert_low_u32x4(a: v128) -> v128 {
transmute(llvm_f64x2_convert_low_i32x4_u(a.as_i32x4()))
transmute(simd_cast::<_, simd::f64x2>(simd_shuffle2::<
simd::u32x4,
simd::u32x2,
>(
a.as_u32x4(),
a.as_u32x4(),
[0, 1],
)))
}

/// Conversion of the two double-precision floating point lanes to two lower
Expand Down

0 comments on commit 50b6b7a

Please sign in to comment.