diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 14097d7b40a9c..0bdddcffd723d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1541,6 +1541,32 @@ def : Pat<(v4i32 (int_wasm_extadd_pairwise_signed (v8i16 V128:$in))), def : Pat<(v8i16 (int_wasm_extadd_pairwise_signed (v16i8 V128:$in))), (extadd_pairwise_s_I16x8 V128:$in)>; +multiclass ExtAddPairwiseShuffle { + foreach sign = ["s", "u"] in { + def : Pat<(to_ty (add + (!cast("extend_low_"#sign) (from_ty (wasm_shuffle (from_ty V128:$vec), (from_ty srcvalue), + (i32 a0), (i32 a1), (i32 a2), (i32 a3), + (i32 a4), (i32 a5), (i32 a6), (i32 a7), + (i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue), + (i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue)))), + (!cast("extend_low_"#sign) (from_ty (wasm_shuffle (from_ty V128:$vec), (from_ty srcvalue), + (i32 b0), (i32 b1), (i32 b2), (i32 b3), + (i32 b4), (i32 b5), (i32 b6), (i32 b7), + (i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue), + (i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue)))))), + (!cast("extadd_pairwise_"#sign#"_"#suffix) V128:$vec)>; + } +} + +defm : ExtAddPairwiseShuffle; +defm : ExtAddPairwiseShuffle; + // f64x2 <-> f32x4 conversions def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>; diff --git a/llvm/test/CodeGen/WebAssembly/simd-extadd.ll b/llvm/test/CodeGen/WebAssembly/simd-extadd.ll new file mode 100644 index 0000000000000..dfc47a6abf03a --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-extadd.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -O2 -mtriple=wasm32 -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +; Test that adding two extended shuffles from the same vector that ends w/ an add converts to extadd_pairwise + +define <8 x i16> @test_extadd_pairwise_i8x16_s(<16 x i8> %v) { +; CHECK-LABEL: test_extadd_pairwise_i8x16_s: +; CHECK: .functype test_extadd_pairwise_i8x16_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extadd_pairwise_i8x16_s +; CHECK-NEXT: # fallthrough-return + %even = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> + %odd = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> + %even_ext = sext <8 x i8> %even to <8 x i16> + %odd_ext = sext <8 x i8> %odd to <8 x i16> + %result = add <8 x i16> %even_ext, %odd_ext + ret <8 x i16> %result +} + +define <8 x i16> @test_extadd_pairwise_i8x16_u(<16 x i8> %v) { +; CHECK-LABEL: test_extadd_pairwise_i8x16_u: +; CHECK: .functype test_extadd_pairwise_i8x16_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.extadd_pairwise_i8x16_u +; CHECK-NEXT: # fallthrough-return + %even = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> + %odd = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> + %even_ext = zext <8 x i8> %even to <8 x i16> + %odd_ext = zext <8 x i8> %odd to <8 x i16> + %result = add <8 x i16> %even_ext, %odd_ext + ret <8 x i16> %result +} + +define <4 x i32> @test_extadd_pairwise_i16x8_s(<8 x i16> %v) { +; CHECK-LABEL: test_extadd_pairwise_i16x8_s: +; CHECK: .functype test_extadd_pairwise_i16x8_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.extadd_pairwise_i16x8_s +; CHECK-NEXT: # fallthrough-return + %even = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> + %odd = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> + %even_ext = sext <4 x i16> %even to <4 x i32> + %odd_ext = sext <4 x i16> %odd to <4 x i32> + %result = add <4 x i32> %even_ext, %odd_ext + ret <4 x i32> %result +} + +define <4 x i32> @test_extadd_pairwise_i16x8_u(<8 x i16> %v) { +; CHECK-LABEL: test_extadd_pairwise_i16x8_u: +; CHECK: .functype test_extadd_pairwise_i16x8_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.extadd_pairwise_i16x8_u +; CHECK-NEXT: # fallthrough-return + %even = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> + %odd = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> + %even_ext = zext <4 x i16> %even to <4 x i32> + %odd_ext = zext <4 x i16> %odd to <4 x i32> + %result = add <4 x i32> %even_ext, %odd_ext + ret <4 x i32> %result +} + +; Negative test: shuffling mask doesn't fit pattern +define <4 x i32> @negative_test_extadd_pairwise_i16x8_u(<8 x i16> %v) { +; CHECK-LABEL: negative_test_extadd_pairwise_i16x8_u: +; CHECK: .functype negative_test_extadd_pairwise_i16x8_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 6, 7, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK-NEXT: i32x4.extend_low_i16x8_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK-NEXT: i32x4.extend_low_i16x8_u +; CHECK-NEXT: i32x4.add +; CHECK-NEXT: # fallthrough-return + %even = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> + %odd = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> + %even_ext = zext <4 x i16> %even to <4 x i32> + %odd_ext = zext <4 x i16> %odd to <4 x i32> + %result = add <4 x i32> %even_ext, %odd_ext + ret <4 x i32> %result +}