Skip to content

Commit 15a1d7e

Browse files
committed
[ARM] Switch order of creating VADDV and VMLAV.
It can be beneficial to attempt to try the larger VMLAV patterns before VADDV, in case both may match the same code.
1 parent 85d6045 commit 15a1d7e

File tree

3 files changed

+40
-44
lines changed

3 files changed

+40
-44
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16285,38 +16285,8 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
1628516285
SDValue(Node.getNode(), 1));
1628616286
};
1628716287

16288-
if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
16289-
return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
16290-
if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
16291-
return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
16292-
if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
16293-
return Create64bitNode(ARMISD::VADDLVs, {A});
16294-
if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
16295-
return Create64bitNode(ARMISD::VADDLVu, {A});
16296-
if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
16297-
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16298-
DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
16299-
if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
16300-
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16301-
DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
16302-
16303-
SDValue Mask;
16304-
if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
16305-
return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
16306-
if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
16307-
return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
16308-
if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
16309-
return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
16310-
if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
16311-
return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
16312-
if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
16313-
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16314-
DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
16315-
if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
16316-
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16317-
DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
16318-
1631916288
SDValue A, B;
16289+
SDValue Mask;
1632016290
if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
1632116291
return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
1632216292
if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
@@ -16353,6 +16323,36 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
1635316323
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
1635416324
DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
1635516325

16326+
if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
16327+
return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
16328+
if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
16329+
return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
16330+
if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
16331+
return Create64bitNode(ARMISD::VADDLVs, {A});
16332+
if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
16333+
return Create64bitNode(ARMISD::VADDLVu, {A});
16334+
if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
16335+
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16336+
DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
16337+
if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
16338+
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16339+
DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
16340+
16341+
if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
16342+
return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
16343+
if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
16344+
return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
16345+
if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
16346+
return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
16347+
if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
16348+
return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
16349+
if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
16350+
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16351+
DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
16352+
if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
16353+
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
16354+
DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
16355+
1635616356
// Some complications. We can get a case where the two inputs of the mul are
1635716357
// the same, then the output sext will have been helpfully converted to a
1635816358
// zext. Turn it back.

llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -776,8 +776,7 @@ define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i32_v4i64_zext(<4 x i8> %x, <4 x i16>
776776
; CHECK-NEXT: vmov.i32 q2, #0xff
777777
; CHECK-NEXT: vmovlb.u16 q1, q1
778778
; CHECK-NEXT: vand q0, q0, q2
779-
; CHECK-NEXT: vmul.i32 q0, q0, q1
780-
; CHECK-NEXT: vaddlv.u32 r0, r1, q0
779+
; CHECK-NEXT: vmlalv.u32 r0, r1, q0, q1
781780
; CHECK-NEXT: bx lr
782781
entry:
783782
%xx = zext <4 x i8> %x to <4 x i32>
@@ -794,8 +793,7 @@ define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i32_v4i64_sext(<4 x i8> %x, <4 x i16>
794793
; CHECK-NEXT: vmovlb.s8 q0, q0
795794
; CHECK-NEXT: vmovlb.s16 q1, q1
796795
; CHECK-NEXT: vmovlb.s16 q0, q0
797-
; CHECK-NEXT: vmul.i32 q0, q0, q1
798-
; CHECK-NEXT: vaddlv.s32 r0, r1, q0
796+
; CHECK-NEXT: vmlalv.s32 r0, r1, q0, q1
799797
; CHECK-NEXT: bx lr
800798
entry:
801799
%xx = sext <4 x i8> %x to <4 x i32>

llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,10 +1530,9 @@ define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i32_v4i64_zext(<4 x i8> %x, <4 x i16>
15301530
; CHECK-NEXT: vmov.i32 q3, #0xff
15311531
; CHECK-NEXT: vmovlb.u16 q1, q1
15321532
; CHECK-NEXT: vand q0, q0, q3
1533-
; CHECK-NEXT: vmul.i32 q0, q0, q1
1534-
; CHECK-NEXT: vand q1, q2, q3
1535-
; CHECK-NEXT: vpt.i32 eq, q1, zr
1536-
; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
1533+
; CHECK-NEXT: vand q2, q2, q3
1534+
; CHECK-NEXT: vpt.i32 eq, q2, zr
1535+
; CHECK-NEXT: vmlalvt.u32 r0, r1, q0, q1
15371536
; CHECK-NEXT: bx lr
15381537
entry:
15391538
%c = icmp eq <4 x i8> %b, zeroinitializer
@@ -1550,13 +1549,12 @@ define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i32_v4i64_sext(<4 x i8> %x, <4 x i16>
15501549
; CHECK-LABEL: add_v4i8i16_v4i32_v4i64_sext:
15511550
; CHECK: @ %bb.0: @ %entry
15521551
; CHECK-NEXT: vmovlb.s8 q0, q0
1552+
; CHECK-NEXT: vmov.i32 q3, #0xff
1553+
; CHECK-NEXT: vand q2, q2, q3
15531554
; CHECK-NEXT: vmovlb.s16 q1, q1
15541555
; CHECK-NEXT: vmovlb.s16 q0, q0
1555-
; CHECK-NEXT: vmul.i32 q0, q0, q1
1556-
; CHECK-NEXT: vmov.i32 q1, #0xff
1557-
; CHECK-NEXT: vand q1, q2, q1
1558-
; CHECK-NEXT: vpt.i32 eq, q1, zr
1559-
; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
1556+
; CHECK-NEXT: vpt.i32 eq, q2, zr
1557+
; CHECK-NEXT: vmlalvt.s32 r0, r1, q0, q1
15601558
; CHECK-NEXT: bx lr
15611559
entry:
15621560
%c = icmp eq <4 x i8> %b, zeroinitializer

0 commit comments

Comments
 (0)