-
Notifications
You must be signed in to change notification settings - Fork 15.2k
InstCombine: Stop transforming EQ/NE of SHR to 0 to ULT/UGT if >1 use #168007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
InstCombine: Stop transforming EQ/NE of SHR to 0 to ULT/UGT if >1 use #168007
Conversation
Created using spr 1.3.6-beta.1
|
@llvm/pr-subscribers-llvm-analysis Author: Peter Collingbourne (pcc) ChangesThis is a small code size optimization that lets us avoid both shifting Full diff: https://github.com/llvm/llvm-project/pull/168007.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fba1ccf2c8c9b..a50a44280e8a7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2638,17 +2638,17 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
if (Shr->isExact())
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
- if (C.isZero()) {
- // == 0 is u< 1.
- if (Pred == CmpInst::ICMP_EQ)
- return new ICmpInst(CmpInst::ICMP_ULT, X,
- ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
- else
- return new ICmpInst(CmpInst::ICMP_UGT, X,
- ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
- }
-
if (Shr->hasOneUse()) {
+ if (0) {
+ // == 0 is u< 1.
+ if (Pred == CmpInst::ICMP_EQ)
+ return new ICmpInst(CmpInst::ICMP_ULT, X,
+ ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
+ else
+ return new ICmpInst(CmpInst::ICMP_UGT, X,
+ ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
+ }
+
// Canonicalize the shift into an 'and':
// icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
index 55c3e7779478e..afabf6ce0fdf2 100644
--- a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
+++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
@@ -228,7 +228,7 @@ define i64 @known_power_of_two_urem_loop_lshr(i64 %size, i64 %a) {
; CHECK-NEXT: [[UREM:%.*]] = and i64 [[SIZE:%.*]], [[TMP0]]
; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]]
; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], 1
-; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 2
+; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0
; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret i64 [[SUM]]
@@ -328,7 +328,7 @@ define i64 @known_power_of_two_urem_loop_ashr_negative_2(i64 %size, i64 %a) {
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]]
; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[UREM]]
; CHECK-NEXT: [[I]] = ashr i64 [[PHI]], 2
-; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 4
+; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0
; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret i64 [[SUM]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 8aceba04e0aeb..532e8b014b328 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -579,7 +579,7 @@ define i1 @ashr_ugt_0(i4 %x) {
define i1 @ashr_ugt_0_multiuse(i4 %x, ptr %p) {
; CHECK-LABEL: @ashr_ugt_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X]], 1
+; CHECK-NEXT: [[R:%.*]] = icmp ne i4 [[S]], 0
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
; CHECK-NEXT: ret i1 [[R]]
;
@@ -934,7 +934,7 @@ define i1 @lshr_eq_0_multiuse(i8 %x) {
; CHECK-LABEL: @lshr_eq_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = lshr i8 %x, 2
@@ -947,7 +947,7 @@ define i1 @lshr_ne_0_multiuse(i8 %x) {
; CHECK-LABEL: @lshr_ne_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = lshr i8 %x, 2
@@ -960,7 +960,7 @@ define i1 @ashr_eq_0_multiuse(i8 %x) {
; CHECK-LABEL: @ashr_eq_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i8 %x, 2
@@ -973,7 +973,7 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
; CHECK-LABEL: @ashr_ne_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i8 %x, 2
@@ -982,6 +982,46 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
ret i1 %c
}
+define i1 @lshr_eq_0(i8 %x) {
+; CHECK-LABEL: @lshr_eq_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = lshr i8 %x, 2
+ %c = icmp eq i8 %s, 0
+ ret i1 %c
+}
+
+define i1 @lshr_ne_0(i8 %x) {
+; CHECK-LABEL: @lshr_ne_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = lshr i8 %x, 2
+ %c = icmp ne i8 %s, 0
+ ret i1 %c
+}
+
+define i1 @ashr_eq_0(i8 %x) {
+; CHECK-LABEL: @ashr_eq_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = ashr i8 %x, 2
+ %c = icmp eq i8 %s, 0
+ ret i1 %c
+}
+
+define i1 @ashr_ne_0(i8 %x) {
+; CHECK-LABEL: @ashr_ne_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = ashr i8 %x, 2
+ %c = icmp ne i8 %s, 0
+ ret i1 %c
+}
+
define i1 @lshr_exact_eq_0_multiuse(i8 %x) {
; CHECK-LABEL: @lshr_exact_eq_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = lshr exact i8 [[X:%.*]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 66e4de5da7955..ff4b1bbd38794 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -914,7 +914,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 9
+; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902
diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
index f8ddd344f5587..28736c89f6867 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
@@ -65,7 +65,7 @@ define void @scalar_store(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806
@@ -125,7 +125,7 @@ define void @expansion(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
index 4274719f2efd3..9ff9f92c4edca 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
@@ -9,13 +9,11 @@ target triple = "thumbv6m-none-none-eabi"
define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef %pResult) #0 {
; CHECK-LABEL: @arm_mean_q7(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 16
-; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE]], 4
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE:%.*]], 4
+; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[SHR]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
; CHECK: while.body:
-; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER:%.*]] ]
; CHECK-NEXT: [[PSRC_ADDR_012:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRC:%.*]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BLKCNT_011:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[SHR]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1
@@ -30,8 +28,8 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
-; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[ENTRY:%.*]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_BODY_PREHEADER]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15
; CHECK-NEXT: [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
index eb5e279947ecb..3585fe9f757d3 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
@@ -32,7 +32,7 @@ define i32 @ctlz_loop_with_abs(i32 %n) {
; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP1]] = lshr i32 [[N_ADDR_03]], 1
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_02]], 1
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp samesign ult i32 [[N_ADDR_03]], 2
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
; CHECK: while.end:
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC]], [[WHILE_BODY]] ]
|
|
@llvm/pr-subscribers-llvm-transforms Author: Peter Collingbourne (pcc) ChangesThis is a small code size optimization that lets us avoid both shifting Full diff: https://github.com/llvm/llvm-project/pull/168007.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fba1ccf2c8c9b..a50a44280e8a7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2638,17 +2638,17 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
if (Shr->isExact())
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
- if (C.isZero()) {
- // == 0 is u< 1.
- if (Pred == CmpInst::ICMP_EQ)
- return new ICmpInst(CmpInst::ICMP_ULT, X,
- ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
- else
- return new ICmpInst(CmpInst::ICMP_UGT, X,
- ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
- }
-
if (Shr->hasOneUse()) {
+ if (0) {
+ // == 0 is u< 1.
+ if (Pred == CmpInst::ICMP_EQ)
+ return new ICmpInst(CmpInst::ICMP_ULT, X,
+ ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
+ else
+ return new ICmpInst(CmpInst::ICMP_UGT, X,
+ ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
+ }
+
// Canonicalize the shift into an 'and':
// icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
index 55c3e7779478e..afabf6ce0fdf2 100644
--- a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
+++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll
@@ -228,7 +228,7 @@ define i64 @known_power_of_two_urem_loop_lshr(i64 %size, i64 %a) {
; CHECK-NEXT: [[UREM:%.*]] = and i64 [[SIZE:%.*]], [[TMP0]]
; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]]
; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], 1
-; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 2
+; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0
; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret i64 [[SUM]]
@@ -328,7 +328,7 @@ define i64 @known_power_of_two_urem_loop_ashr_negative_2(i64 %size, i64 %a) {
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]]
; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[UREM]]
; CHECK-NEXT: [[I]] = ashr i64 [[PHI]], 2
-; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 4
+; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0
; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret i64 [[SUM]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 8aceba04e0aeb..532e8b014b328 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -579,7 +579,7 @@ define i1 @ashr_ugt_0(i4 %x) {
define i1 @ashr_ugt_0_multiuse(i4 %x, ptr %p) {
; CHECK-LABEL: @ashr_ugt_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X]], 1
+; CHECK-NEXT: [[R:%.*]] = icmp ne i4 [[S]], 0
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
; CHECK-NEXT: ret i1 [[R]]
;
@@ -934,7 +934,7 @@ define i1 @lshr_eq_0_multiuse(i8 %x) {
; CHECK-LABEL: @lshr_eq_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = lshr i8 %x, 2
@@ -947,7 +947,7 @@ define i1 @lshr_ne_0_multiuse(i8 %x) {
; CHECK-LABEL: @lshr_ne_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = lshr i8 %x, 2
@@ -960,7 +960,7 @@ define i1 @ashr_eq_0_multiuse(i8 %x) {
; CHECK-LABEL: @ashr_eq_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i8 %x, 2
@@ -973,7 +973,7 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
; CHECK-LABEL: @ashr_ne_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2
; CHECK-NEXT: call void @use(i8 [[S]])
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i8 %x, 2
@@ -982,6 +982,46 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
ret i1 %c
}
+define i1 @lshr_eq_0(i8 %x) {
+; CHECK-LABEL: @lshr_eq_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = lshr i8 %x, 2
+ %c = icmp eq i8 %s, 0
+ ret i1 %c
+}
+
+define i1 @lshr_ne_0(i8 %x) {
+; CHECK-LABEL: @lshr_ne_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = lshr i8 %x, 2
+ %c = icmp ne i8 %s, 0
+ ret i1 %c
+}
+
+define i1 @ashr_eq_0(i8 %x) {
+; CHECK-LABEL: @ashr_eq_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = ashr i8 %x, 2
+ %c = icmp eq i8 %s, 0
+ ret i1 %c
+}
+
+define i1 @ashr_ne_0(i8 %x) {
+; CHECK-LABEL: @ashr_ne_0(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %s = ashr i8 %x, 2
+ %c = icmp ne i8 %s, 0
+ ret i1 %c
+}
+
define i1 @lshr_exact_eq_0_multiuse(i8 %x) {
; CHECK-LABEL: @lshr_exact_eq_0_multiuse(
; CHECK-NEXT: [[S:%.*]] = lshr exact i8 [[X:%.*]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 66e4de5da7955..ff4b1bbd38794 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -914,7 +914,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 9
+; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902
diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
index f8ddd344f5587..28736c89f6867 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
@@ -65,7 +65,7 @@ define void @scalar_store(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806
@@ -125,7 +125,7 @@ define void @expansion(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
index 4274719f2efd3..9ff9f92c4edca 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll
@@ -9,13 +9,11 @@ target triple = "thumbv6m-none-none-eabi"
define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef %pResult) #0 {
; CHECK-LABEL: @arm_mean_q7(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 16
-; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK: while.body.preheader:
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE]], 4
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE:%.*]], 4
+; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[SHR]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
; CHECK: while.body:
-; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER:%.*]] ]
; CHECK-NEXT: [[PSRC_ADDR_012:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRC:%.*]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BLKCNT_011:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[SHR]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1
@@ -30,8 +28,8 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
-; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[ENTRY:%.*]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_BODY_PREHEADER]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15
; CHECK-NEXT: [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
index eb5e279947ecb..3585fe9f757d3 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
@@ -32,7 +32,7 @@ define i32 @ctlz_loop_with_abs(i32 %n) {
; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP1]] = lshr i32 [[N_ADDR_03]], 1
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_02]], 1
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp samesign ult i32 [[N_ADDR_03]], 2
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
; CHECK: while.end:
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC]], [[WHILE_BODY]] ]
|
| } | ||
|
|
||
| if (Shr->hasOneUse()) { | ||
| if (0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't match the PR title.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I temporarily disabled this transform to check whether we still had test coverage for the case of a single use, which revealed that this code is no longer necessary (because other transformations implement the same thing in case of a single use), but I didn't realize this at the time so I accidentally uploaded the change with dead code here. So now I've removed this code entirely and added an explanation to the commit message.
Created using spr 1.3.6-beta.1
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make sense to me. Although we have made massive efforts in recovering the pattern in the backend, it is still not enough since some information gets lost during transformation.
The overall IR diff in dtcxzyw/llvm-opt-benchmark#3070 looks positive.
My remaining concern is that the removal of canonicalization (in multi-use cases) may lead to imprecise analysis results. As a follow-up, the following code needs to be updated to handle pattern (X >> C) eq/ne 0:
llvm-project/llvm/lib/Analysis/ValueTracking.cpp
Lines 916 to 922 in 22f550b
| case ICmpInst::ICMP_NE: { | |
| // assume (V & B != 0) where B is a power of 2 | |
| const APInt *BPow2; | |
| if (C->isZero() && match(LHS, m_And(m_V, m_Power2(BPow2)))) | |
| Known.One |= *BPow2; | |
| break; | |
| } |
llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
Lines 1333 to 1334 in 22f550b
| std::optional<ValueLatticeElement> LazyValueInfoImpl::getValueFromICmpCondition( | |
| Value *Val, ICmpInst *ICI, bool isTrueDest, bool UseBlockValue) { |
…T if >1 use This is a small code size optimization that lets us avoid both shifting and comparing to a constant if we need the shifted value anyway. On most architectures the zero comparison is cheaper than a constant comparison (or free if the shift sets flags). Although this change appears to remove the optimization entirely, we continue to do this transform if there is one use because of the code below the removed code that transforms the shift into an and, followed by the PR10267 case in InstCombinerImpl::foldICmpAndConstConst that transforms the and into a ult/ugt. Added a test case to verify this explicitly. Per [1] reduces clang .text size by 0.09% and dynamic instruction count by 0.01%. [1] https://llvm-compile-time-tracker.com/compare.php?from=1f38d49ebe96417e368a567efa4d650b8a9ac30f&to=0873787a12b8f2eab019d8211ace4bccc1807343&stat=size-text Reviewers: nikic, dtcxzyw Reviewed By: dtcxzyw Pull Request: llvm/llvm-project#168007
This is a small code size optimization that lets us avoid both shifting
and comparing to a constant if we need the shifted value anyway. On most
architectures the zero comparison is cheaper than a constant comparison
(or free if the shift sets flags).
Although this change appears to remove the optimization entirely, we
continue to do this transform if there is one use because of the code
below the removed code that transforms the shift into an and, followed
by the PR10267 case in InstCombinerImpl::foldICmpAndConstConst that
transforms the and into a ult/ugt. Added a test case to verify this
explicitly.
Per [1] reduces clang .text size by 0.09% and dynamic instruction count
by 0.01%.
[1] https://llvm-compile-time-tracker.com/compare.php?from=1f38d49ebe96417e368a567efa4d650b8a9ac30f&to=0873787a12b8f2eab019d8211ace4bccc1807343&stat=size-text