-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CombinerHelper]: Use undef for handling divisors of one #91037
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: AtariDreams (AtariDreams) ChangesFull diff: https://github.com/llvm/llvm-project/pull/91037.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 22eb4a3e0d7cb..644f56b6bf276 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5099,6 +5099,8 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
auto &MIB = Builder;
bool UseNPQ = false;
+ bool UsePreShift = false;
+ bool UsePostShift = false;
SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](const Constant *C) {
@@ -5111,27 +5113,30 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
// Magic algorithm doesn't work for division by 1. We need to emit a select
// at the end.
- // TODO: Use undef values for divisor of 1.
- if (!Divisor.isOne()) {
-
- // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
- // in the dividend exceeds the leading zeros for the divisor.
- UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(
- Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
-
- Magic = std::move(magics.Magic);
-
- assert(magics.PreShift < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- assert(magics.PostShift < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
- PreShift = magics.PreShift;
- PostShift = magics.PostShift;
- SelNPQ = magics.IsAdd;
+ if (Divisor.isOne()) {
+ PreShifts.push_back(MIB.buildUndef(ScalarShiftAmtTy).getReg(0));
+ MagicFactors.push_back(MIB.buildUndef(ScalarTy).getReg(0));
+ NPQFactors.push_back(MIB.buildUndef(ScalarTy).getReg(0));
+ PostShifts.push_back(MIB.buildUndef(ScalarShiftAmtTy).getReg(0));
+ return true;
}
+ // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
+ // in the dividend exceeds the leading zeros for the divisor.
+ UnsignedDivisionByConstantInfo magics = UnsignedDivisionByConstantInfo::get(
+ Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
+
+ Magic = std::move(magics.Magic);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
+ PreShift = magics.PreShift;
+ PostShift = magics.PostShift;
+ SelNPQ = magics.IsAdd;
+
PreShifts.push_back(
MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
@@ -5143,6 +5148,8 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
PostShifts.push_back(
MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
UseNPQ |= SelNPQ;
+ UsePreShift |= PreShift != 0;
+ UsePostShift |= magics.PostShift != 0;
return true;
};
@@ -5167,7 +5174,9 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
}
Register Q = LHS;
- Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ if (UsePreShift)
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
// Multiply the numerator (operand 0) by the magic value.
Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
@@ -5185,7 +5194,8 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
}
- Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ if (UsePostShift)
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
auto One = MIB.buildConstant(Ty, 1);
auto IsOne = MIB.buildICmp(
CmpInst::Predicate::ICMP_EQ,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index c97a00ccdd455..ab2f2cb5f12a9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -169,17 +169,17 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
;
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI4_2
-; GISEL-NEXT: adrp x9, .LCPI4_0
-; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
-; GISEL-NEXT: adrp x8, .LCPI4_1
-; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
-; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1]
+; GISEL-NEXT: mov w8, #171 // =0xab
+; GISEL-NEXT: fmov s1, w8
+; GISEL-NEXT: adrp x8, .LCPI4_0
+; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_0]
+; GISEL-NEXT: mov w8, #7 // =0x7
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
+; GISEL-NEXT: shl v3.16b, v3.16b, #7
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
-; GISEL-NEXT: neg v2.16b, v3.16b
-; GISEL-NEXT: shl v3.16b, v4.16b, #7
+; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: neg v2.16b, v2.16b
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
; GISEL-NEXT: sshr v2.16b, v3.16b, #7
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
@@ -217,25 +217,66 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
;
; GISEL-LABEL: pr38477:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI5_3
-; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
-; GISEL-NEXT: adrp x8, .LCPI5_2
-; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
-; GISEL-NEXT: adrp x8, .LCPI5_0
-; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; GISEL-NEXT: mov w8, #4957 // =0x135d
+; GISEL-NEXT: mov w9, #16385 // =0x4001
+; GISEL-NEXT: fmov s1, w8
+; GISEL-NEXT: mov w8, #57457 // =0xe071
+; GISEL-NEXT: fmov s4, w9
+; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: mov w8, #4103 // =0x1007
+; GISEL-NEXT: mov w9, #35545 // =0x8ad9
+; GISEL-NEXT: fmov s5, w9
+; GISEL-NEXT: mov w9, #2048 // =0x800
+; GISEL-NEXT: mov v1.h[1], v1.h[0]
+; GISEL-NEXT: fmov s6, w9
+; GISEL-NEXT: adrp x9, .LCPI5_0
+; GISEL-NEXT: mov v1.h[2], v2.h[0]
+; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: mov w8, #32768 // =0x8000
+; GISEL-NEXT: fmov s3, w8
+; GISEL-NEXT: mov w8, #0 // =0x0
+; GISEL-NEXT: mov v1.h[3], v2.h[0]
+; GISEL-NEXT: mov v2.h[1], v3.h[0]
+; GISEL-NEXT: mov v1.h[4], v4.h[0]
+; GISEL-NEXT: fmov s4, w8
+; GISEL-NEXT: mov w8, #6 // =0x6
+; GISEL-NEXT: mov v2.h[2], v4.h[0]
+; GISEL-NEXT: mov v1.h[5], v5.h[0]
+; GISEL-NEXT: fmov s5, w8
+; GISEL-NEXT: mov w8, #2115 // =0x843
+; GISEL-NEXT: mov v2.h[3], v4.h[0]
+; GISEL-NEXT: mov v7.h[1], v5.h[0]
+; GISEL-NEXT: mov v1.h[6], v6.h[0]
+; GISEL-NEXT: fmov s6, w8
+; GISEL-NEXT: mov w8, #12 // =0xc
+; GISEL-NEXT: mov v2.h[4], v4.h[0]
+; GISEL-NEXT: mov v7.h[2], v5.h[0]
+; GISEL-NEXT: mov v1.h[7], v6.h[0]
+; GISEL-NEXT: fmov s6, w8
+; GISEL-NEXT: mov w8, #14 // =0xe
+; GISEL-NEXT: fmov s16, w8
+; GISEL-NEXT: mov w8, #4 // =0x4
+; GISEL-NEXT: mov v2.h[5], v4.h[0]
+; GISEL-NEXT: mov v7.h[3], v6.h[0]
+; GISEL-NEXT: umull2 v6.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
-; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
-; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h
-; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h
-; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
-; GISEL-NEXT: ldr d3, [x8, :lo12:.LCPI5_0]
-; GISEL-NEXT: adrp x8, .LCPI5_1
-; GISEL-NEXT: ushll v3.8h, v3.8b, #0
+; GISEL-NEXT: mov v2.h[6], v4.h[0]
+; GISEL-NEXT: mov v7.h[4], v16.h[0]
+; GISEL-NEXT: uzp2 v1.8h, v1.8h, v6.8h
+; GISEL-NEXT: mov v2.h[7], v3.h[0]
+; GISEL-NEXT: mov v7.h[5], v5.h[0]
+; GISEL-NEXT: ldr d5, [x9, :lo12:.LCPI5_0]
+; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
+; GISEL-NEXT: mov v7.h[6], v4.h[0]
+; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
+; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
+; GISEL-NEXT: fmov s3, w8
+; GISEL-NEXT: mov v7.h[7], v3.h[0]
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
-; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
+; GISEL-NEXT: ushll v3.8h, v5.8b, #0
; GISEL-NEXT: shl v3.8h, v3.8h, #15
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
-; GISEL-NEXT: neg v2.8h, v4.8h
+; GISEL-NEXT: neg v2.8h, v7.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
@@ -257,10 +298,10 @@ define i32 @udiv_div_by_180(i32 %x)
;
; GISEL-LABEL: udiv_div_by_180:
; GISEL: // %bb.0:
-; GISEL-NEXT: uxtb w8, w0
-; GISEL-NEXT: mov w9, #5826 // =0x16c2
-; GISEL-NEXT: movk w9, #364, lsl #16
-; GISEL-NEXT: umull x8, w8, w9
+; GISEL-NEXT: mov w8, #5826 // =0x16c2
+; GISEL-NEXT: and w9, w0, #0xff
+; GISEL-NEXT: movk w8, #364, lsl #16
+; GISEL-NEXT: umull x8, w9, w8
; GISEL-NEXT: lsr x0, x8, #32
; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
index 02233b9f498bd..11203925d1fe0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
@@ -228,16 +228,16 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 -85
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -85
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<16 x s8>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<16 x s8>) = G_LSHR [[UMULH]], [[BUILD_VECTOR1]](<16 x s8>)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s1>) = G_BUILD_VECTOR [[C3]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1), [[C4]](s1)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s1>) = G_BUILD_VECTOR [[C2]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<16 x s8>) = G_SELECT [[BUILD_VECTOR2]](<16 x s1>), [[COPY]], [[LSHR]]
; CHECK-NEXT: $q0 = COPY [[SELECT]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -264,6 +264,7 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4957
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
@@ -277,9 +278,9 @@ body: |
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2048
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C4]](s16), [[C5]](s16), [[C7]](s16), [[C9]](s16), [[C10]](s16), [[C11]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C2]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C2]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C3]](s16), [[C3]](s16), [[C6]](s16), [[C8]](s16), [[C3]](s16), [[C]](s16), [[C12]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[C1]](s16), [[C4]](s16), [[C5]](s16), [[C7]](s16), [[C9]](s16), [[C10]](s16), [[C11]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[C2]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C2]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[C3]](s16), [[C3]](s16), [[C6]](s16), [[C8]](s16), [[C3]](s16), [[C]](s16), [[C12]](s16)
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR1]]
|
PreShifts.push_back(MIB.buildUndef(ScalarShiftAmtTy).getReg(0)); | ||
MagicFactors.push_back(MIB.buildUndef(ScalarTy).getReg(0)); | ||
NPQFactors.push_back(MIB.buildUndef(ScalarTy).getReg(0)); | ||
PostShifts.push_back(MIB.buildUndef(ScalarShiftAmtTy).getReg(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
call buildUndef once per type
; GISEL-NEXT: fmov s5, w9 | ||
; GISEL-NEXT: mov w9, #2048 // =0x800 | ||
; GISEL-NEXT: mov v1.h[1], v1.h[0] | ||
; GISEL-NEXT: fmov s6, w9 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks way worse
No description provided.