Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -1541,6 +1541,32 @@ def : Pat<(v4i32 (int_wasm_extadd_pairwise_signed (v8i16 V128:$in))),
def : Pat<(v8i16 (int_wasm_extadd_pairwise_signed (v16i8 V128:$in))),
(extadd_pairwise_s_I16x8 V128:$in)>;

multiclass ExtAddPairwiseShuffle<ValueType from_ty, ValueType to_ty, string suffix,
int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
int b0, int b1, int b2, int b3, int b4, int b5, int b6, int b7> {
foreach sign = ["s", "u"] in {
def : Pat<(to_ty (add
(!cast<SDNode>("extend_low_"#sign) (from_ty (wasm_shuffle (from_ty V128:$vec), (from_ty srcvalue),
(i32 a0), (i32 a1), (i32 a2), (i32 a3),
(i32 a4), (i32 a5), (i32 a6), (i32 a7),
(i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue),
(i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue)))),
(!cast<SDNode>("extend_low_"#sign) (from_ty (wasm_shuffle (from_ty V128:$vec), (from_ty srcvalue),
(i32 b0), (i32 b1), (i32 b2), (i32 b3),
(i32 b4), (i32 b5), (i32 b6), (i32 b7),
(i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue),
(i32 srcvalue), (i32 srcvalue), (i32 srcvalue), (i32 srcvalue)))))),
(!cast<Instruction>("extadd_pairwise_"#sign#"_"#suffix) V128:$vec)>;
}
}

defm : ExtAddPairwiseShuffle<v8i16, v4i32, "I32x4",
0, 1, 4, 5, 8, 9, 12, 13,
2, 3, 6, 7, 10, 11, 14, 15>;
defm : ExtAddPairwiseShuffle<v16i8, v8i16, "I16x8",
0, 2, 4, 6, 8, 10, 12, 14,
1, 3, 5, 7, 9, 11, 13, 15>;

// f64x2 <-> f32x4 conversions
def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;
Expand Down
89 changes: 89 additions & 0 deletions llvm/test/CodeGen/WebAssembly/simd-extadd.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -O2 -mtriple=wasm32 -mattr=+simd128 | FileCheck %s

target triple = "wasm32-unknown-unknown"

; Test that adding two extended shuffles from the same vector that ends w/ an add converts to extadd_pairwise

define <8 x i16> @test_extadd_pairwise_i8x16_s(<16 x i8> %v) {
; CHECK-LABEL: test_extadd_pairwise_i8x16_s:
; CHECK: .functype test_extadd_pairwise_i8x16_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extadd_pairwise_i8x16_s
; CHECK-NEXT: # fallthrough-return
%even = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%odd = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%even_ext = sext <8 x i8> %even to <8 x i16>
%odd_ext = sext <8 x i8> %odd to <8 x i16>
%result = add <8 x i16> %even_ext, %odd_ext
ret <8 x i16> %result
}

define <8 x i16> @test_extadd_pairwise_i8x16_u(<16 x i8> %v) {
; CHECK-LABEL: test_extadd_pairwise_i8x16_u:
; CHECK: .functype test_extadd_pairwise_i8x16_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.extadd_pairwise_i8x16_u
; CHECK-NEXT: # fallthrough-return
%even = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%odd = shufflevector <16 x i8> %v, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%even_ext = zext <8 x i8> %even to <8 x i16>
%odd_ext = zext <8 x i8> %odd to <8 x i16>
%result = add <8 x i16> %even_ext, %odd_ext
ret <8 x i16> %result
}

define <4 x i32> @test_extadd_pairwise_i16x8_s(<8 x i16> %v) {
; CHECK-LABEL: test_extadd_pairwise_i16x8_s:
; CHECK: .functype test_extadd_pairwise_i16x8_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.extadd_pairwise_i16x8_s
; CHECK-NEXT: # fallthrough-return
%even = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%odd = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%even_ext = sext <4 x i16> %even to <4 x i32>
%odd_ext = sext <4 x i16> %odd to <4 x i32>
%result = add <4 x i32> %even_ext, %odd_ext
ret <4 x i32> %result
}

define <4 x i32> @test_extadd_pairwise_i16x8_u(<8 x i16> %v) {
; CHECK-LABEL: test_extadd_pairwise_i16x8_u:
; CHECK: .functype test_extadd_pairwise_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.extadd_pairwise_i16x8_u
; CHECK-NEXT: # fallthrough-return
%even = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%odd = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%even_ext = zext <4 x i16> %even to <4 x i32>
%odd_ext = zext <4 x i16> %odd to <4 x i32>
%result = add <4 x i32> %even_ext, %odd_ext
ret <4 x i32> %result
}

; Negative test: shuffling mask doesn't fit pattern
define <4 x i32> @negative_test_extadd_pairwise_i16x8_u(<8 x i16> %v) {
; CHECK-LABEL: negative_test_extadd_pairwise_i16x8_u:
; CHECK: .functype negative_test_extadd_pairwise_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 6, 7, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: i32x4.extend_low_i16x8_u
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: i32x4.extend_low_i16x8_u
; CHECK-NEXT: i32x4.add
; CHECK-NEXT: # fallthrough-return
%even = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 0, i32 3, i32 4, i32 6>
%odd = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%even_ext = zext <4 x i16> %even to <4 x i32>
%odd_ext = zext <4 x i16> %odd to <4 x i32>
%result = add <4 x i32> %even_ext, %odd_ext
ret <4 x i32> %result
}
Loading