Skip to content

Commit

Permalink
[DAGCombiner][AArch64][VE] Teach BuildUDIV/SDIV to use 2x mul when mu…
Browse files Browse the repository at this point in the history
…lh/mul_lohi are not available.

Correct the legality of i32 mul_lohi on AArch64.

Previously, AArch64 incorrectly reported i32 mul_lohi as Legal.
This allowed BuildUDIV/SDIV to use them. A later DAGCombiner would
replace them with MULHS/MULHU because only the high half was used.
This conversion does not check the legality of MULHS/MULHU under
the assumption that LegalizeDAG can turn it back into MUL_LOHI later.

After they are converted to MULHS/MULHU, DAGCombine ran and saw that
these operations aren't supported but an i64 MUL is. So they get
converted to that plus a shift. Without this, LegalizeDAG would
convert back MUL_LOHI and isel would fail to find a pattern.

This patch teaches BuildUDIV/SDIV to create the wide mul and shift
so that we can report the correct operation legality on AArch64. It
also enables div by constant folding for more cases on VE.

I don't know if VE wants this div by constant optimization or not. If they
don't want it, they can use the isIntDivCheap hook to disable it.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D150333
  • Loading branch information
topperc committed May 12, 2023
1 parent 2da2995 commit a983ef2
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 19 deletions.
26 changes: 26 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5990,6 +5990,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
// If type twice as wide legal, widen and use a mul plus a shift.
if (!VT.isVector()) {
unsigned Size = VT.getSizeInBits();
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
if (isOperationLegal(ISD::MUL, WideVT)) {
X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
DAG.getShiftAmountConstant(EltBits, WideVT, dl));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
}
}
return SDValue();
};

Expand Down Expand Up @@ -6163,6 +6176,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
// If type twice as wide legal, widen and use a mul plus a shift.
if (!VT.isVector()) {
unsigned Size = VT.getSizeInBits();
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
if (isOperationLegal(ISD::MUL, WideVT)) {
X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
DAG.getShiftAmountConstant(EltBits, WideVT, dl));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
}
}
return SDValue(); // No mulhu or equivalent
};

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::i32, Expand);

// AArch64 doesn't have {U|S}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);

Expand Down
12 changes: 9 additions & 3 deletions llvm/test/CodeGen/VE/Scalar/div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,11 @@ define i64 @divi64ri(i64 %a, i64 %b) {
define signext i32 @divi32ri(i32 signext %a, i32 signext %b) {
; CHECK-LABEL: divi32ri:
; CHECK: # %bb.0:
; CHECK-NEXT: divs.w.sx %s0, %s0, (62)0
; CHECK-NEXT: lea %s1, 1431655766
; CHECK-NEXT: muls.l %s0, %s0, %s1
; CHECK-NEXT: srl %s1, %s0, 63
; CHECK-NEXT: srl %s0, %s0, 32
; CHECK-NEXT: adds.w.sx %s0, %s0, %s1
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10)
%r = sdiv i32 %a, 3
Expand Down Expand Up @@ -185,8 +189,10 @@ define i64 @divu64ri(i64 %a, i64 %b) {
define zeroext i32 @divu32ri(i32 zeroext %a, i32 zeroext %b) {
; CHECK-LABEL: divu32ri:
; CHECK: # %bb.0:
; CHECK-NEXT: divu.w %s0, %s0, (62)0
; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
; CHECK-NEXT: lea %s1, -1431655765
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: muls.l %s0, %s0, %s1
; CHECK-NEXT: srl %s0, %s0, 33
; CHECK-NEXT: b.l.t (, %s10)
%r = udiv i32 %a, 3
ret i32 %r
Expand Down
11 changes: 9 additions & 2 deletions llvm/test/CodeGen/VE/Scalar/rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,11 @@ define i64 @remi64ri(i64 %a) {
define signext i32 @remi32ri(i32 signext %a) {
; CHECK-LABEL: remi32ri:
; CHECK: # %bb.0:
; CHECK-NEXT: divs.w.sx %s1, %s0, (62)0
; CHECK-NEXT: lea %s1, 1431655766
; CHECK-NEXT: muls.l %s1, %s0, %s1
; CHECK-NEXT: srl %s2, %s1, 63
; CHECK-NEXT: srl %s1, %s1, 32
; CHECK-NEXT: adds.w.sx %s1, %s1, %s2
; CHECK-NEXT: muls.w.sx %s1, 3, %s1
; CHECK-NEXT: subs.w.sx %s0, %s0, %s1
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
Expand Down Expand Up @@ -205,7 +209,10 @@ define i64 @remu64ri(i64 %a) {
define zeroext i32 @remu32ri(i32 zeroext %a) {
; CHECK-LABEL: remu32ri:
; CHECK: # %bb.0:
; CHECK-NEXT: divu.w %s1, %s0, (62)0
; CHECK-NEXT: lea %s1, -1431655765
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: muls.l %s1, %s0, %s1
; CHECK-NEXT: srl %s1, %s1, 33
; CHECK-NEXT: muls.w.sx %s1, 3, %s1
; CHECK-NEXT: subs.w.sx %s0, %s0, %s1
; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
Expand Down
38 changes: 24 additions & 14 deletions llvm/test/CodeGen/VE/Vector/vec_divrem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,18 @@ define <4 x i8> @udiv_by_minus_one(<4 x i8> %x) {
; CHECK-LABEL: udiv_by_minus_one:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (56)0
; CHECK-NEXT: lea %s4, 16843010
; CHECK-NEXT: muls.l %s0, %s0, %s4
; CHECK-NEXT: srl %s0, %s0, 32
; CHECK-NEXT: and %s1, %s1, (56)0
; CHECK-NEXT: muls.l %s1, %s1, %s4
; CHECK-NEXT: srl %s1, %s1, 32
; CHECK-NEXT: and %s2, %s2, (56)0
; CHECK-NEXT: muls.l %s2, %s2, %s4
; CHECK-NEXT: srl %s2, %s2, 32
; CHECK-NEXT: and %s3, %s3, (56)0
; CHECK-NEXT: divu.w %s3, %s3, (56)0
; CHECK-NEXT: divu.w %s2, %s2, (56)0
; CHECK-NEXT: divu.w %s1, %s1, (56)0
; CHECK-NEXT: divu.w %s0, %s0, (56)0
; CHECK-NEXT: muls.l %s3, %s3, %s4
; CHECK-NEXT: srl %s3, %s3, 32
; CHECK-NEXT: b.l.t (, %s10)
%r = udiv <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
ret <4 x i8> %r
Expand All @@ -27,16 +32,21 @@ define <4 x i8> @urem_by_minus_one(<4 x i8> %x) {
; CHECK-NEXT: and %s1, %s1, (56)0
; CHECK-NEXT: and %s2, %s2, (56)0
; CHECK-NEXT: and %s3, %s3, (56)0
; CHECK-NEXT: divu.w %s4, %s3, (56)0
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
; CHECK-NEXT: subs.w.sx %s3, %s3, %s4
; CHECK-NEXT: divu.w %s4, %s2, (56)0
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
; CHECK-NEXT: subs.w.sx %s2, %s2, %s4
; CHECK-NEXT: divu.w %s4, %s1, (56)0
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
; CHECK-NEXT: subs.w.sx %s1, %s1, %s4
; CHECK-NEXT: divu.w %s4, %s0, (56)0
; CHECK-NEXT: lea %s4, 16843010
; CHECK-NEXT: muls.l %s5, %s3, %s4
; CHECK-NEXT: srl %s5, %s5, 32
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
; CHECK-NEXT: subs.w.sx %s3, %s3, %s5
; CHECK-NEXT: muls.l %s5, %s2, %s4
; CHECK-NEXT: srl %s5, %s5, 32
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
; CHECK-NEXT: subs.w.sx %s2, %s2, %s5
; CHECK-NEXT: muls.l %s5, %s1, %s4
; CHECK-NEXT: srl %s5, %s5, 32
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
; CHECK-NEXT: subs.w.sx %s1, %s1, %s5
; CHECK-NEXT: muls.l %s4, %s0, %s4
; CHECK-NEXT: srl %s4, %s4, 32
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
; CHECK-NEXT: subs.w.sx %s0, %s0, %s4
; CHECK-NEXT: b.l.t (, %s10)
Expand Down

0 comments on commit a983ef2

Please sign in to comment.