-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[WebAssembly] [Codegen] Add patterns for relaxed dot #163266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-webassembly Author: Jasmine Tang (badumbatish) ChangesThe pattern I added for For One current obstacles is I don't think there is any pattern to singly create a extmul pairwise from other instructions so the related to #55932 Full diff: https://github.com/llvm/llvm-project/pull/163266.diff 2 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 49af78bce68c3..34b8167ee9c07 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1711,6 +1711,26 @@ defm RELAXED_DOT :
"i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
"i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>;
+def : Pat<
+ (v8i16 (add
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 0), (i32 1), (i32 4), (i32 5),
+ (i32 8), (i32 9), (i32 12), (i32 13),
+ (i32 16), (i32 17), (i32 20), (i32 21),
+ (i32 24), (i32 25), (i32 28), (i32 29)),
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 2), (i32 3), (i32 6), (i32 7),
+ (i32 10), (i32 11), (i32 14), (i32 15),
+ (i32 18), (i32 19), (i32 22), (i32 23),
+ (i32 26), (i32 27), (i32 30), (i32 31)))
+ ),
+ (v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs))
+>;
+
defm RELAXED_DOT_ADD :
RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
(outs), (ins),
@@ -1719,6 +1739,13 @@ defm RELAXED_DOT_ADD :
"i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
"i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
+def : Pat<
+ (v4i32 (add
+ (v4i32 (int_wasm_extadd_pairwise_signed
+ (v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))),
+ (v4i32 V128:$acc))),
+ (v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc)))
+ >;
//===----------------------------------------------------------------------===//
// Relaxed BFloat16 dot product
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll
new file mode 100644
index 0000000000000..9716cbe077080
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s
+
+target triple = "wasm32"
+; relaxed_dot stands for relaxed_dot_i8x16_i7x16_s, as in td
+; relaxed_dot_add stands for i32x4.relaxed_dot_i8x16_i7x16_add_s, as in td
+
+define <8 x i16> @relaxed_dot_sext_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_sext_1:
+; CHECK: .functype relaxed_dot_sext_1 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1
+; CHECK-NEXT: return $pop0
+ %sext1 = sext <16 x i8> %a to <16 x i16>
+ %sext2 = sext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %sext1, %sext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+}
+
+
+define <8 x i16> @relaxed_dot_sext_2(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_sext_2:
+; CHECK: .functype relaxed_dot_sext_2 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1
+; CHECK-NEXT: return $pop0
+ %sext1 = sext <16 x i8> %a to <16 x i16>
+ %sext2 = sext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %sext1, %sext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle2, %shuffle1
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @relaxed_dot_sext_self(<16 x i8> %v) {
+; CHECK-LABEL: relaxed_dot_sext_self:
+; CHECK: .functype relaxed_dot_sext_self (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $0
+; CHECK-NEXT: return $pop0
+ %sext = sext <16 x i8> %v to <16 x i16>
+ %mul = mul <16 x i16> %sext, %sext
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @relaxed_dot_add_from_relaxed_dot(<16 x i8> %a, <16 x i8> %b, <4 x i32> %c) {
+; CHECK-LABEL: relaxed_dot_add_from_relaxed_dot:
+; CHECK: .functype relaxed_dot_add_from_relaxed_dot (v128, v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32x4.relaxed_dot_i8x16_i7x16_add_s $push0=, $0, $1, $2
+; CHECK-NEXT: return $pop0
+ %relaxed_dot_call = call <8 x i16> @llvm.wasm.relaxed.dot.i8x16.i7x16.signed(<16 x i8> %a, <16 x i8> %b)
+ %sext = call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> %relaxed_dot_call)
+ %res = add <4 x i32> %sext, %c
+ ret <4 x i32> %res
+}
+
+; INFO: Negative test
+define <8 x i16> @relaxed_dot_zext(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_zext:
+; CHECK: .functype relaxed_dot_zext (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.extmul_low_i8x16_u $push6=, $0, $1
+; CHECK-NEXT: local.tee $push5=, $2=, $pop6
+; CHECK-NEXT: i16x8.extmul_high_i8x16_u $push4=, $0, $1
+; CHECK-NEXT: local.tee $push3=, $1=, $pop4
+; CHECK-NEXT: i8x16.shuffle $push1=, $pop5, $pop3, 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: i8x16.shuffle $push0=, $2, $1, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
+; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0
+; CHECK-NEXT: return $pop2
+ %zext1 = zext <16 x i8> %a to <16 x i16>
+ %zext2 = zext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %zext1, %zext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+
+}
+
+; INFO: Negative test
+define <8 x i16> @relaxed_dot_wrong_shuffle(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_wrong_shuffle:
+; CHECK: .functype relaxed_dot_wrong_shuffle (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push1=, $0, $1
+; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push0=, $0, $1
+; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0
+; CHECK-NEXT: return $pop2
+ %sext1 = sext <16 x i8> %a to <16 x i16>
+ %sext2 = sext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %sext1, %sext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+}
|
This reverts commit f0a0ade.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
thanks everyone, i've been waiting for buildkite/libcxx-ci for a while now, i'm not sure if there's a way to restart it or should i go ahead and bypass it for now? |
I don't think I'd wait for it. It doesn't cover running the tests on wasm, and in any case it's mostly focused on library compliance rather than codegen. (Which isn't to say that codegen changes can't break it, but the likelihood seems small enough that I don't think it's worth waiting arbitrary amounts of time for the job to start). |
The pattern I added for
relaxed dot
similar to normal dot @ #151775.For
relaxed dot add
, i noticed that in the proposal the portion of dot implementation is similar torelaxed dot
, so I think we can add a pattern where after we do relaxed dot and do extadd pairwise, we can dorelaxed dot add
.One current obstacles is I don't think there is any pattern to singly create a extadd pairwise from other instructions so the
relaxed dot add
pattern would not cover a wide range of instructions.related to #55932