-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Don't require GEP in indexed compare fold #81614
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesThe indexed compare fold folds comparisons like p+a == p+b to a == b, even in cases where the a/b are complex (e.g. via multiple geps, or phis). Currently, it requires that the LHS is actually a GEP, but this requirement isn't really necessary: We can handle the pattern p == p+b as well. This patch removes the GEP requirement, allowing additional comparisons to be optimized away. Full diff: https://github.com/llvm/llvm-project/pull/81614.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 280c4d77b6dfca..cdc1b7668fe746 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -619,27 +619,27 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
return NewInsts[Start];
}
-/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
+/// Converts (CMP LHS, RHS) if this change would make RHS a constant.
/// We can look through PHIs, GEPs and casts in order to determine a common base
-/// between GEPLHS and RHS.
-static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
+/// between LHS and RHS.
+static Instruction *transformToIndexedCompare(Value *LHS, Value *RHS,
ICmpInst::Predicate Cond,
const DataLayout &DL,
InstCombiner &IC) {
- // FIXME: Support vector of pointers.
- if (GEPLHS->getType()->isVectorTy())
+ if (ICmpInst::isSigned(Cond))
return nullptr;
- if (!GEPLHS->hasAllConstantIndices())
+ // FIXME: Support vector of pointers.
+ if (!LHS->getType()->isPointerTy())
return nullptr;
- APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0);
+ APInt Offset(DL.getIndexTypeSizeInBits(LHS->getType()), 0);
Value *PtrBase =
- GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset,
- /*AllowNonInbounds*/ false);
+ LHS->stripAndAccumulateConstantOffsets(DL, Offset,
+ /*AllowNonInbounds*/ false);
// Bail if we looked through addrspacecast.
- if (PtrBase->getType() != GEPLHS->getType())
+ if (PtrBase->getType() != LHS->getType())
return nullptr;
// The set of nodes that will take part in this transformation.
@@ -771,10 +771,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return replaceInstUsesWith(I, Cmp);
}
- // Otherwise, the base pointers are different and the indices are
- // different. Try convert this to an indexed compare by looking through
- // PHIs/casts.
- return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
+ return nullptr;
}
bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
@@ -841,9 +838,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
}
- // Try convert this to an indexed compare by looking through PHIs/casts as a
- // last resort.
- return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
+ return nullptr;
}
bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) {
@@ -6909,6 +6904,10 @@ Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred,
if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI))
return NI;
+ if (Instruction *Res =
+ transformToIndexedCompare(Op0, Op1, Pred, getDataLayout(), *this))
+ return Res;
+
if (auto *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI))
return NI;
diff --git a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
index e5b8ba151e04c2..e2df11ddfb4c51 100644
--- a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
+++ b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
@@ -622,16 +622,16 @@ while.end.i:
define i1 @recursiveGEP_withPtrSub1_notKnownNonEqual2(ptr %val1) {
; CHECK-LABEL: @recursiveGEP_withPtrSub1_notKnownNonEqual2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TEST_VAL1:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 -1
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
; CHECK: while.cond.i:
-; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[TEST_VAL1]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 2
+; CHECK-NEXT: [[A_PN_I_IDX:%.*]] = phi i64 [ [[A_PN_I_ADD:%.*]], [[WHILE_COND_I]] ], [ -1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[A_PN_I_ADD]] = add nsw i64 [[A_PN_I_IDX]], 1
+; CHECK-NEXT: [[TEST_0_I_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 [[A_PN_I_ADD]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I_PTR]], align 2
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
; CHECK: while.end.i:
-; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
+; CHECK-NEXT: [[BOOL:%.*]] = icmp eq i64 [[A_PN_I_ADD]], 0
; CHECK-NEXT: ret i1 [[BOOL]]
;
entry:
@@ -656,16 +656,16 @@ while.end.i:
define i1 @recursiveGEP_withPtrSub1_notKnownNonEqual3(ptr %val1) {
; CHECK-LABEL: @recursiveGEP_withPtrSub1_notKnownNonEqual3(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TEST_VAL1:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 5
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
; CHECK: while.cond.i:
-; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[TEST_VAL1]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 -1
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 2
+; CHECK-NEXT: [[A_PN_I_IDX:%.*]] = phi i64 [ [[A_PN_I_ADD:%.*]], [[WHILE_COND_I]] ], [ 5, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[A_PN_I_ADD]] = add nsw i64 [[A_PN_I_IDX]], -1
+; CHECK-NEXT: [[TEST_0_I_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 [[A_PN_I_ADD]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I_PTR]], align 2
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
; CHECK: while.end.i:
-; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
+; CHECK-NEXT: [[BOOL:%.*]] = icmp eq i64 [[A_PN_I_ADD]], 0
; CHECK-NEXT: ret i1 [[BOOL]]
;
entry:
@@ -692,13 +692,14 @@ define i1 @recursiveGEP_withPtrSub_maybeZero(ptr %val1) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
; CHECK: while.cond.i:
-; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_PN_I]], align 2
-; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 1
+; CHECK-NEXT: [[A_PN_I_IDX:%.*]] = phi i64 [ [[A_PN_I_ADD:%.*]], [[WHILE_COND_I]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[A_PN_I_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 [[A_PN_I_IDX]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_PN_I_PTR]], align 2
+; CHECK-NEXT: [[A_PN_I_ADD]] = add nuw nsw i64 [[A_PN_I_IDX]], 1
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
; CHECK: while.end.i:
-; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[A_PN_I]], [[VAL1]]
+; CHECK-NEXT: [[BOOL:%.*]] = icmp eq i64 [[A_PN_I_IDX]], 0
; CHECK-NEXT: ret i1 [[BOOL]]
;
entry:
@@ -963,13 +964,16 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP_inbounds(ptr %val1) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
; CHECK: while.cond.i:
-; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds <vscale x 16 x i8>, ptr [[A_PN_I]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 1
-; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: [[A_PN_I_IDX:%.*]] = phi i64 [ [[A_PN_I_IDX1:%.*]], [[WHILE_COND_I]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[A_PN_I_IDX1]] = add i64 [[A_PN_I_IDX]], [[TMP1]]
+; CHECK-NEXT: [[TEST_0_I_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 [[A_PN_I_IDX1]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I_PTR]], align 1
+; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
; CHECK: while.end.i:
-; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
+; CHECK-NEXT: [[BOOL:%.*]] = icmp eq i64 [[A_PN_I_IDX1]], 0
; CHECK-NEXT: ret i1 [[BOOL]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 08931eb7f02b80..155d24c92c4f2e 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -116,6 +116,7 @@ define void @test_overaligned_vec(i8 %B) {
; CHECK-LABEL: @test_overaligned_vec(
; CHECK-NEXT: store i8 [[B:%.*]], ptr getelementptr inbounds ([10 x i8], ptr @Global, i64 0, i64 2), align 1
; CHECK-NEXT: ret void
+;
%A = getelementptr <2 x half>, ptr @Global, i64 0, i64 1
store i8 %B, ptr %A
ret void
@@ -623,15 +624,15 @@ define i32 @test28() nounwind {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ORIENTATIONS:%.*]] = alloca [1 x [1 x %struct.x]], align 8
; CHECK-NEXT: [[T3:%.*]] = call i32 @puts(ptr noundef nonnull dereferenceable(1) @.str) #[[ATTR0]]
-; CHECK-NEXT: [[T45:%.*]] = getelementptr inbounds i8, ptr [[ORIENTATIONS]], i64 1
; CHECK-NEXT: br label [[BB10:%.*]]
; CHECK: bb10:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[BB10]] ]
; CHECK-NEXT: [[T12_REC:%.*]] = xor i32 [[INDVAR]], -1
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T12_REC]] to i64
-; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds [[STRUCT_X:%.*]], ptr [[T45]], i64 [[TMP0]]
-; CHECK-NEXT: [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12]]) #[[ATTR0]]
-; CHECK-NEXT: [[T84:%.*]] = icmp eq ptr [[T12]], [[ORIENTATIONS]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[ORIENTATIONS]], i64 [[TMP0]]
+; CHECK-NEXT: [[T12_PTR:%.*]] = getelementptr i8, ptr [[TMP1]], i64 1
+; CHECK-NEXT: [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12_PTR]]) #[[ATTR0]]
+; CHECK-NEXT: [[T84:%.*]] = icmp eq i32 [[INDVAR]], 0
; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[T84]], label [[BB17:%.*]], label [[BB10]]
; CHECK: bb17:
diff --git a/llvm/test/Transforms/InstCombine/pr39908.ll b/llvm/test/Transforms/InstCombine/pr39908.ll
index ca143f417fb27d..5d13a331c6d2e0 100644
--- a/llvm/test/Transforms/InstCombine/pr39908.ll
+++ b/llvm/test/Transforms/InstCombine/pr39908.ll
@@ -7,9 +7,7 @@ target datalayout = "p:32:32"
define i1 @test(ptr %p, i32 %n) {
; CHECK-LABEL: @test(
-; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[N:%.*]], i32 0, i32 0
-; CHECK-NEXT: [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%end = getelementptr inbounds [0 x %S], ptr %p, i32 0, i32 %n, i32 0, i32 0
@@ -22,9 +20,7 @@ define i1 @test(ptr %p, i32 %n) {
define i1 @test64(ptr %p, i64 %n) {
; CHECK-LABEL: @test64(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0
-; CHECK-NEXT: [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 0
@@ -37,9 +33,7 @@ define i1 @test64(ptr %p, i64 %n) {
define i1 @test64_overflow(ptr %p, i64 %n) {
; CHECK-LABEL: @test64_overflow(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0
-; CHECK-NEXT: [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 8589934592
diff --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
index bd509509c321f8..d987f9f5b3b46e 100644
--- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
@@ -24,7 +24,7 @@ define void @test_fill_with_foreach([2 x i64] %elems.coerce) {
; CHECK-NEXT: [[ELEMS_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 0
; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ELEMS_COERCE_FCA_0_EXTRACT]] to ptr
; CHECK-NEXT: [[ELEMS_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 1
-; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[ELEMS_COERCE_FCA_1_EXTRACT]]
+; CHECK-NEXT: [[ADD_PTR_I_IDX:%.*]] = shl nsw i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 2
; CHECK-NEXT: [[CMP_NOT_I_I_I_I:%.*]] = icmp slt i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 0
; CHECK-NEXT: br i1 [[CMP_NOT_I_I_I_I]], label [[ERROR:%.*]], label [[FOR_COND_PREHEADER_SPLIT:%.*]]
; CHECK: for.cond.preheader.split:
@@ -36,10 +36,12 @@ define void @test_fill_with_foreach([2 x i64] %elems.coerce) {
; CHECK-NEXT: tail call void @error()
; CHECK-NEXT: br label [[COMMON_RET]]
; CHECK: for.body:
-; CHECK-NEXT: [[__BEGIN1_SROA_0_03:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_COND_PREHEADER_SPLIT]] ]
-; CHECK-NEXT: tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_03]])
-; CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__BEGIN1_SROA_0_03]], i64 4
-; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR_I]], [[ADD_PTR_I]]
+; CHECK-NEXT: [[__BEGIN1_SROA_0_0_PTR4:%.*]] = phi ptr [ [[__BEGIN1_SROA_0_0_PTR:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_COND_PREHEADER_SPLIT]] ]
+; CHECK-NEXT: [[__BEGIN1_SROA_0_0_IDX3:%.*]] = phi i64 [ [[__BEGIN1_SROA_0_0_ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER_SPLIT]] ]
+; CHECK-NEXT: tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_0_PTR4]])
+; CHECK-NEXT: [[__BEGIN1_SROA_0_0_ADD]] = add nuw nsw i64 [[__BEGIN1_SROA_0_0_IDX3]], 4
+; CHECK-NEXT: [[__BEGIN1_SROA_0_0_PTR]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[__BEGIN1_SROA_0_0_ADD]]
+; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i64 [[__BEGIN1_SROA_0_0_ADD]], [[ADD_PTR_I_IDX]]
; CHECK-NEXT: br i1 [[CMP_I_NOT]], label [[COMMON_RET]], label [[FOR_BODY]]
;
entry:
|
Huh, this seems to have a larger impact than I expected: http://llvm-compile-time-tracker.com/compare.php?from=f0db35b93f31ea5d6ff9bd4791fb6755b5a5bb9b&to=c77f948ef66114ea99f0daf5a72d3646bb5845ff&stat=instructions%3Au clang stage2 becomes 0.6% larger and 0.5% faster as a result of this change. |
stage2 is using the bootstrapped clang with this commit? Is the |
Yes.
Right. This is likely correlated with the code size increase. |
You would expect the same logic to apply to the rest of stage2 CTMark no? |
I don't really follow. The stage2 CTMark results use the stage2 compiler and are affected by optimization improvements that affect the stage2 compiler. If there are substantial differences between stage1 and stage2 CTMark results, that's usually the cause. |
Sorry has a misunderstanding, makes sense! |
@dtcxzyw Can you please test this patch? |
Done. |
The indexed compare fold folds comparisons like p+a == p+b to a == b, even in cases where the a/b are complex (e.g. via multiple geps, or phis). Currently, it requires that the LHS is actually a GEP, but this requirement isn't really necessary: We can handle the pattern p == p+b as well. This patch removes the GEP requirement, allowing additional comparisons to be optimized away.
58df564
to
fef6e3f
Compare
Looks like this patch causes a regression in
|
The indexed compare fold folds comparisons like p+a == p+b to a == b, even in cases where the a/b are complex (e.g. via multiple geps, or phis).
Currently, it requires that the LHS is actually a GEP, but this requirement isn't really necessary: We can handle the pattern p == p+b as well.
This patch removes the GEP requirement, allowing additional comparisons to be optimized away.