Skip to content

Commit 6dacdc3

Browse files
authored
[WebAssembly] extadd_pairwise for PartialReduce (#157669)
Avoid using extends, and adding the high and low half and use extadd_pairwise instead.
1 parent 660441a commit 6dacdc3

File tree

4 files changed

+16
-15
lines changed

4 files changed

+16
-15
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISD.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ HANDLE_NODETYPE(WrapperREL)
2727
HANDLE_NODETYPE(BR_IF)
2828
HANDLE_NODETYPE(BR_TABLE)
2929
HANDLE_NODETYPE(DOT)
30+
HANDLE_NODETYPE(EXT_ADD_PAIRWISE_U)
3031
HANDLE_NODETYPE(SHUFFLE)
3132
HANDLE_NODETYPE(SWIZZLE)
3233
HANDLE_NODETYPE(VEC_SHL)

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2183,13 +2183,10 @@ SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) {
21832183
SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
21842184
SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
21852185

2186-
SDValue LowLow = DAG.getNode(LowOpc, DL, MVT::v4i32, MulLow);
2187-
SDValue LowHigh = DAG.getNode(LowOpc, DL, MVT::v4i32, MulHigh);
2188-
SDValue HighLow = DAG.getNode(HighOpc, DL, MVT::v4i32, MulLow);
2189-
SDValue HighHigh = DAG.getNode(HighOpc, DL, MVT::v4i32, MulHigh);
2190-
2191-
SDValue AddLow = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowLow, HighLow);
2192-
SDValue AddHigh = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowHigh, HighHigh);
2186+
SDValue AddLow =
2187+
DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL, MVT::v4i32, MulLow);
2188+
SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL,
2189+
MVT::v4i32, MulHigh);
21932190
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh);
21942191
return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
21952192
}

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,15 +1453,22 @@ if !ne(t1, t2) then
14531453
def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>;
14541454

14551455
// Extended pairwise addition
1456+
def extadd_pairwise_u : SDNode<"WebAssemblyISD::EXT_ADD_PAIRWISE_U", extend_t>;
1457+
14561458
defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed,
14571459
"extadd_pairwise_i8x16_s", 0x7c>;
1458-
defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned,
1460+
defm "" : SIMDConvert<I16x8, I8x16, extadd_pairwise_u,
14591461
"extadd_pairwise_i8x16_u", 0x7d>;
14601462
defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
14611463
"extadd_pairwise_i16x8_s", 0x7e>;
1462-
defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
1464+
defm "" : SIMDConvert<I32x4, I16x8, extadd_pairwise_u,
14631465
"extadd_pairwise_i16x8_u", 0x7f>;
14641466

1467+
def : Pat<(v4i32 (int_wasm_extadd_pairwise_unsigned (v8i16 V128:$in))),
1468+
(extadd_pairwise_u_I32x4 V128:$in)>;
1469+
def : Pat<(v8i16 (int_wasm_extadd_pairwise_unsigned (v16i8 V128:$in))),
1470+
(extadd_pairwise_u_I16x8 V128:$in)>;
1471+
14651472
// f64x2 <-> f32x4 conversions
14661473
def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
14671474
def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;

llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,13 +191,9 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n
191191
; MAX-BANDWIDTH: v128.load
192192
; MAX-BANDWIDTH: v128.load
193193
; MAX-BANDWIDTH: i16x8.extmul_low_i8x16_u
194-
; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
195-
; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
196-
; MAX-BANDWIDTH: i32x4.add
194+
; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
197195
; MAX-BANDWIDTH: i16x8.extmul_high_i8x16_u
198-
; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
199-
; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
200-
; MAX-BANDWIDTH: i32x4.add
196+
; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
201197
; MAX-BANDWIDTH: i32x4.add
202198
; MAX-BANDWIDTH: i32x4.add
203199

0 commit comments

Comments
 (0)