From 50b6b7ad62ddd094e45ac10d2631a0d625745d70 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 19 Apr 2021 10:00:59 -0700 Subject: [PATCH] Another round of wasm SIMD updates This round is dependant on https://github.com/rust-lang/llvm-project/pull/101 landing first in rust-lang/rust and won't pass CI until that does. That PR, however, will also break wasm CI because it's changing how the wasm target works. My goal here is to open this early to get it out there so that when that PR lands in rust-lang/rust and CI breaks in stdarch then this can be merged to make CI green again. The changes here are mostly around the codegen for various intrinsics. Some wasm-specific intrinsics have been removed in favor of more general LLVM intrinsics, and other intrinsics have been removed in favor of pattern-matching codegen. The only new instruction supported as part of this chagne is `v128.any_true`. This leaves only one instruction unsupported in LLVM which is `i64x2.abs`. I think the codegen for the instruction is correct in stdsimd, though, and LLVM just needs to update with a pattern-match to actually emit the opcode. That'll happen in a future LLVM update. --- crates/core_arch/src/wasm32/simd128.rs | 88 ++++++++++++++++---------- 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs index 13812fb4fc..e057e403a6 100644 --- a/crates/core_arch/src/wasm32/simd128.rs +++ b/crates/core_arch/src/wasm32/simd128.rs @@ -212,14 +212,6 @@ extern "C" { fn llvm_i64x2_all_true(x: simd::i64x2) -> i32; #[link_name = "llvm.wasm.bitmask.v2i64"] fn llvm_bitmask_i64x2(a: simd::i64x2) -> i32; - #[link_name = "llvm.wasm.extend.low.signed"] - fn llvm_i64x2_extend_low_i32x4_s(a: simd::i32x4) -> simd::i64x2; - #[link_name = "llvm.wasm.extend.high.signed"] - fn llvm_i64x2_extend_high_i32x4_s(a: simd::i32x4) -> simd::i64x2; - #[link_name = "llvm.wasm.extend.low.unsigned"] - fn llvm_i64x2_extend_low_i32x4_u(a: simd::i32x4) -> simd::i64x2; - #[link_name = "llvm.wasm.extend.high.unsigned"] - fn llvm_i64x2_extend_high_i32x4_u(a: simd::i32x4) -> simd::i64x2; #[link_name = "llvm.wasm.extmul.low.signed.v2i64"] fn llvm_i64x2_extmul_low_i32x4_s(a: simd::i32x4, b: simd::i32x4) -> simd::i64x2; #[link_name = "llvm.wasm.extmul.high.signed.v2i64"] @@ -229,13 +221,13 @@ extern "C" { #[link_name = "llvm.wasm.extmul.high.unsigned.v2i64"] fn llvm_i64x2_extmul_high_i32x4_u(a: simd::i32x4, b: simd::i32x4) -> simd::i64x2; - #[link_name = "llvm.wasm.ceil.v4f32"] + #[link_name = "llvm.ceil.v4f32"] fn llvm_f32x4_ceil(x: simd::f32x4) -> simd::f32x4; - #[link_name = "llvm.wasm.floor.v4f32"] + #[link_name = "llvm.floor.v4f32"] fn llvm_f32x4_floor(x: simd::f32x4) -> simd::f32x4; - #[link_name = "llvm.wasm.trunc.v4f32"] + #[link_name = "llvm.trunc.v4f32"] fn llvm_f32x4_trunc(x: simd::f32x4) -> simd::f32x4; - #[link_name = "llvm.wasm.nearest.v4f32"] + #[link_name = "llvm.nearbyint.v4f32"] fn llvm_f32x4_nearest(x: simd::f32x4) -> simd::f32x4; #[link_name = "llvm.fabs.v4f32"] fn llvm_f32x4_abs(x: simd::f32x4) -> simd::f32x4; @@ -250,13 +242,13 @@ extern "C" { #[link_name = "llvm.wasm.pmax.v4f32"] fn llvm_f32x4_pmax(x: simd::f32x4, y: simd::f32x4) -> simd::f32x4; - #[link_name = "llvm.wasm.ceil.v2f64"] + #[link_name = "llvm.ceil.v2f64"] fn llvm_f64x2_ceil(x: simd::f64x2) -> simd::f64x2; - #[link_name = "llvm.wasm.floor.v2f64"] + #[link_name = "llvm.floor.v2f64"] fn llvm_f64x2_floor(x: simd::f64x2) -> simd::f64x2; - #[link_name = "llvm.wasm.trunc.v2f64"] + #[link_name = "llvm.trunc.v2f64"] fn llvm_f64x2_trunc(x: simd::f64x2) -> simd::f64x2; - #[link_name = "llvm.wasm.nearest.v2f64"] + #[link_name = "llvm.nearbyint.v2f64"] fn llvm_f64x2_nearest(x: simd::f64x2) -> simd::f64x2; #[link_name = "llvm.fabs.v2f64"] fn llvm_f64x2_abs(x: simd::f64x2) -> simd::f64x2; @@ -271,18 +263,14 @@ extern "C" { #[link_name = "llvm.wasm.pmax.v2f64"] fn llvm_f64x2_pmax(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2; - #[link_name = "llvm.wasm.trunc.saturate.signed.v4i32.v4f32"] + #[link_name = "llvm.fptosi.sat.v4i32.v4f32"] fn llvm_i32x4_trunc_sat_f32x4_s(x: simd::f32x4) -> simd::i32x4; - #[link_name = "llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32"] + #[link_name = "llvm.fptoui.sat.v4i32.v4f32"] fn llvm_i32x4_trunc_sat_f32x4_u(x: simd::f32x4) -> simd::i32x4; - #[link_name = "llvm.wasm.convert.low.signed"] - fn llvm_f64x2_convert_low_i32x4_s(x: simd::i32x4) -> simd::f64x2; - #[link_name = "llvm.wasm.convert.low.unsigned"] - fn llvm_f64x2_convert_low_i32x4_u(x: simd::i32x4) -> simd::f64x2; - #[link_name = "llvm.wasm.trunc.sat.zero.signed"] - fn llvm_i32x4_trunc_sat_f64x2_s_zero(x: simd::f64x2) -> simd::i32x4; - #[link_name = "llvm.wasm.trunc.sat.zero.unsigned"] - fn llvm_i32x4_trunc_sat_f64x2_u_zero(x: simd::f64x2) -> simd::i32x4; + #[link_name = "llvm.fptosi.sat.v2i32.v2f64"] + fn llvm_i32x2_trunc_sat_f64x2_s(x: simd::f64x2) -> simd::i32x2; + #[link_name = "llvm.fptoui.sat.v2i32.v2f64"] + fn llvm_i32x2_trunc_sat_f64x2_u(x: simd::f64x2) -> simd::i32x2; #[link_name = "llvm.wasm.demote.zero"] fn llvm_f32x4_demote_f64x2_zero(x: simd::f64x2) -> simd::f32x4; #[link_name = "llvm.wasm.promote.low"] @@ -1836,7 +1824,7 @@ pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 { /// Returns true if any lane is nonzero or false if all lanes are zero. #[inline] -// #[cfg_attr(test, assert_instr(v128.any_true))] // FIXME llvm +#[cfg_attr(test, assert_instr(v128.any_true))] #[target_feature(enable = "simd128")] pub unsafe fn v128_any_true(a: v128) -> bool { llvm_any_true_i8x16(a.as_i8x16()) != 0 @@ -2688,7 +2676,9 @@ pub unsafe fn i64x2_bitmask(a: v128) -> i32 { // #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_low_i32x4(a: v128) -> v128 { - transmute(llvm_i64x2_extend_low_i32x4_s(a.as_i32x4())) + transmute(simd_cast::<_, simd::i64x2>( + simd_shuffle2::<_, simd::i32x2>(a.as_i32x4(), a.as_i32x4(), [0, 1]), + )) } /// Converts high half of the smaller lane vector to a larger lane @@ -2697,7 +2687,9 @@ pub unsafe fn i64x2_extend_low_i32x4(a: v128) -> v128 { // #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_high_i32x4(a: v128) -> v128 { - transmute(llvm_i64x2_extend_high_i32x4_s(a.as_i32x4())) + transmute(simd_cast::<_, simd::i64x2>( + simd_shuffle2::<_, simd::i32x2>(a.as_i32x4(), a.as_i32x4(), [2, 3]), + )) } /// Converts low half of the smaller lane vector to a larger lane @@ -2706,7 +2698,9 @@ pub unsafe fn i64x2_extend_high_i32x4(a: v128) -> v128 { // #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_low_u32x4(a: v128) -> v128 { - transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4())) + transmute(simd_cast::<_, simd::i64x2>( + simd_shuffle2::<_, simd::u32x2>(a.as_u32x4(), a.as_u32x4(), [0, 1]), + )) } /// Converts high half of the smaller lane vector to a larger lane @@ -2715,7 +2709,9 @@ pub unsafe fn i64x2_extend_low_u32x4(a: v128) -> v128 { // #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_high_u32x4(a: v128) -> v128 { - transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4())) + transmute(simd_cast::<_, simd::i64x2>( + simd_shuffle2::<_, simd::u32x2>(a.as_u32x4(), a.as_u32x4(), [2, 3]), + )) } /// Shifts each lane to the left by the specified number of bits. @@ -3137,7 +3133,11 @@ pub unsafe fn f32x4_convert_u32x4(a: v128) -> v128 { // #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_s_zero))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { - transmute(llvm_i32x4_trunc_sat_f64x2_s_zero(a.as_f64x2())) + transmute(simd_shuffle4::( + llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()), + simd::i32x2::splat(0), + [0, 1, 2, 3], + )) } /// Saturating conversion of the two double-precision floating point lanes to @@ -3152,7 +3152,11 @@ pub unsafe fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { // #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_u_zero))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { - transmute(llvm_i32x4_trunc_sat_f64x2_u_zero(a.as_f64x2())) + transmute(simd_shuffle4::( + llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()), + simd::i32x2::splat(0), + [0, 1, 2, 3], + )) } /// Lane-wise conversion from integer to floating point. @@ -3160,7 +3164,14 @@ pub unsafe fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_s))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_convert_low_i32x4(a: v128) -> v128 { - transmute(llvm_f64x2_convert_low_i32x4_s(a.as_i32x4())) + transmute(simd_cast::<_, simd::f64x2>(simd_shuffle2::< + simd::i32x4, + simd::i32x2, + >( + a.as_i32x4(), + a.as_i32x4(), + [0, 1], + ))) } /// Lane-wise conversion from integer to floating point. @@ -3168,7 +3179,14 @@ pub unsafe fn f64x2_convert_low_i32x4(a: v128) -> v128 { // #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn f64x2_convert_low_u32x4(a: v128) -> v128 { - transmute(llvm_f64x2_convert_low_i32x4_u(a.as_i32x4())) + transmute(simd_cast::<_, simd::f64x2>(simd_shuffle2::< + simd::u32x4, + simd::u32x2, + >( + a.as_u32x4(), + a.as_u32x4(), + [0, 1], + ))) } /// Conversion of the two double-precision floating point lanes to two lower