-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[WebAssembly] extadd_pairwise for PartialReduce #157669
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WebAssembly] extadd_pairwise for PartialReduce #157669
Conversation
Avoid using extends, and adding the high and low half and use extadd_pairwise instead.
@llvm/pr-subscribers-backend-webassembly Author: Sam Parker (sparker-arm) ChangesAvoid using extends, and adding the high and low half and use extadd_pairwise instead. Full diff: https://github.com/llvm/llvm-project/pull/157669.diff 4 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index 378ef2c8f250e..1eae3586d16b8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -27,6 +27,7 @@ HANDLE_NODETYPE(WrapperREL)
HANDLE_NODETYPE(BR_IF)
HANDLE_NODETYPE(BR_TABLE)
HANDLE_NODETYPE(DOT)
+HANDLE_NODETYPE(EXT_ADD_PAIRWISE_U)
HANDLE_NODETYPE(SHUFFLE)
HANDLE_NODETYPE(SWIZZLE)
HANDLE_NODETYPE(VEC_SHL)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 5a45134692865..7827d79697a94 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2183,13 +2183,8 @@ SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) {
SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
- SDValue LowLow = DAG.getNode(LowOpc, DL, MVT::v4i32, MulLow);
- SDValue LowHigh = DAG.getNode(LowOpc, DL, MVT::v4i32, MulHigh);
- SDValue HighLow = DAG.getNode(HighOpc, DL, MVT::v4i32, MulLow);
- SDValue HighHigh = DAG.getNode(HighOpc, DL, MVT::v4i32, MulHigh);
-
- SDValue AddLow = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowLow, HighLow);
- SDValue AddHigh = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowHigh, HighHigh);
+ SDValue AddLow = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, MVT::v4i32, MulLow);
+ SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, MVT::v4i32, MulHigh);
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh);
return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index f06f8d5174e3e..3c26b453c4482 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1453,15 +1453,22 @@ if !ne(t1, t2) then
def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>;
// Extended pairwise addition
+def extadd_pairwise_u : SDNode<"WebAssemblyISD::EXT_ADD_PAIRWISE_U", extend_t>;
+
defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed,
"extadd_pairwise_i8x16_s", 0x7c>;
-defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned,
+defm "" : SIMDConvert<I16x8, I8x16, extadd_pairwise_u,
"extadd_pairwise_i8x16_u", 0x7d>;
defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
"extadd_pairwise_i16x8_s", 0x7e>;
-defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
+defm "" : SIMDConvert<I32x4, I16x8, extadd_pairwise_u,
"extadd_pairwise_i16x8_u", 0x7f>;
+def : Pat<(v4i32 (int_wasm_extadd_pairwise_unsigned (v8i16 V128:$in))),
+ (extadd_pairwise_u_I32x4 V128:$in)>;
+def : Pat<(v8i16 (int_wasm_extadd_pairwise_unsigned (v16i8 V128:$in))),
+ (extadd_pairwise_u_I16x8 V128:$in)>;
+
// f64x2 <-> f32x4 conversions
def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;
diff --git a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
index 0184e22a3b40d..04a2268db1755 100644
--- a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
+++ b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
@@ -191,13 +191,9 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n
; MAX-BANDWIDTH: v128.load
; MAX-BANDWIDTH: v128.load
; MAX-BANDWIDTH: i16x8.extmul_low_i8x16_u
-; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
-; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
-; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
; MAX-BANDWIDTH: i16x8.extmul_high_i8x16_u
-; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
-; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
-; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
; MAX-BANDWIDTH: i32x4.add
; MAX-BANDWIDTH: i32x4.add
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/18833 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/23969 Here is the relevant piece of the build log for the reference
|
Avoid using extends, and adding the high and low half and use extadd_pairwise instead.