diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 378ef2c8f250e..1eae3586d16b8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -27,6 +27,7 @@ HANDLE_NODETYPE(WrapperREL) HANDLE_NODETYPE(BR_IF) HANDLE_NODETYPE(BR_TABLE) HANDLE_NODETYPE(DOT) +HANDLE_NODETYPE(EXT_ADD_PAIRWISE_U) HANDLE_NODETYPE(SHUFFLE) HANDLE_NODETYPE(SWIZZLE) HANDLE_NODETYPE(VEC_SHL) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 5a45134692865..fe100dab427ef 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -2183,13 +2183,10 @@ SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) { SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS); SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS); - SDValue LowLow = DAG.getNode(LowOpc, DL, MVT::v4i32, MulLow); - SDValue LowHigh = DAG.getNode(LowOpc, DL, MVT::v4i32, MulHigh); - SDValue HighLow = DAG.getNode(HighOpc, DL, MVT::v4i32, MulLow); - SDValue HighHigh = DAG.getNode(HighOpc, DL, MVT::v4i32, MulHigh); - - SDValue AddLow = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowLow, HighLow); - SDValue AddHigh = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowHigh, HighHigh); + SDValue AddLow = + DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, MVT::v4i32, MulLow); + SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, + MVT::v4i32, MulHigh); SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh); return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index f06f8d5174e3e..3c26b453c4482 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1453,15 +1453,22 @@ if !ne(t1, t2) then def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>; // Extended pairwise addition +def extadd_pairwise_u : SDNode<"WebAssemblyISD::EXT_ADD_PAIRWISE_U", extend_t>; + defm "" : SIMDConvert; -defm "" : SIMDConvert; defm "" : SIMDConvert; -defm "" : SIMDConvert; +def : Pat<(v4i32 (int_wasm_extadd_pairwise_unsigned (v8i16 V128:$in))), + (extadd_pairwise_u_I32x4 V128:$in)>; +def : Pat<(v8i16 (int_wasm_extadd_pairwise_unsigned (v16i8 V128:$in))), + (extadd_pairwise_u_I16x8 V128:$in)>; + // f64x2 <-> f32x4 conversions def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>; diff --git a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll index 0184e22a3b40d..04a2268db1755 100644 --- a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll +++ b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll @@ -191,13 +191,9 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n ; MAX-BANDWIDTH: v128.load ; MAX-BANDWIDTH: v128.load ; MAX-BANDWIDTH: i16x8.extmul_low_i8x16_u -; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u -; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u -; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u ; MAX-BANDWIDTH: i16x8.extmul_high_i8x16_u -; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u -; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u -; MAX-BANDWIDTH: i32x4.add +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u ; MAX-BANDWIDTH: i32x4.add ; MAX-BANDWIDTH: i32x4.add