-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SROA] Unfold gep of index phi #83087
Conversation
If a gep has only one phi as one of its operands and the remaining indexes are constant, we can unfold `gep ptr, (phi idx1, idx2)` to `phi ((gep ptr, idx1), (gep ptr, idx2))`. Take care not to unfold recursive phis. Followup to llvm#80983.
@llvm/pr-subscribers-llvm-transforms Author: Arthur Eubanks (aeubanks) ChangesIf a gep has only one phi as one of its operands and the remaining indexes are constant, we can unfold Take care not to unfold recursive phis. Followup to #80983. Full diff: https://github.com/llvm/llvm-project/pull/83087.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 6c8785d52c4eab..f99f608ea3fe0f 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3985,8 +3985,8 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
return false;
LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):"
- << "\n original: " << *Sel
- << "\n " << GEPI);
+ << "\n original: " << *Sel << "\n "
+ << GEPI);
auto GetNewOps = [&](Value *SelOp) {
SmallVector<Value *> NewOps;
@@ -4023,64 +4023,88 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
Visited.insert(NSelI);
enqueueUsers(*NSelI);
- LLVM_DEBUG(dbgs() << "\n to: " << *NTrue
- << "\n " << *NFalse
- << "\n " << *NSel << '\n');
+ LLVM_DEBUG(dbgs() << "\n to: " << *NTrue << "\n "
+ << *NFalse << "\n " << *NSel << '\n');
return true;
}
- // Fold gep (phi ptr1, ptr2) => phi gep(ptr1), gep(ptr2)
+ // Fold gep (phi ptr1, ptr2), idx
+ // => phi ((gep ptr1, idx), (gep ptr2, idx))
+ // and gep ptr, (phi idx1, idx2)
+ // => phi ((gep ptr, idx1), (gep ptr, idx2))
bool foldGEPPhi(GetElementPtrInst &GEPI) {
- if (!GEPI.hasAllConstantIndices())
+ // Check whether the GEP has exactly one phi operand and all indices
+ // will become constant after the transform.
+ PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
+ // To prevent infinitely expanding recursive phis, bail if the GEP pointer
+ // operand is the phi and any of its incoming values is not an alloca or a
+ // constant.
+ if (Phi && any_of(Phi->operands(), [](Value *V) {
+ return isa<Instruction>(V) && !isa<AllocaInst>(V);
+ })) {
return false;
+ }
+ for (Value *Op : GEPI.indices()) {
+ if (auto *SI = dyn_cast<PHINode>(Op)) {
+ if (Phi)
+ return false;
+
+ Phi = SI;
+ if (!all_of(Phi->incoming_values(),
+ [](Value *V) { return isa<ConstantInt>(V); }))
+ return false;
+ continue;
+ }
+
+ if (!isa<ConstantInt>(Op))
+ return false;
+ }
- PHINode *PHI = cast<PHINode>(GEPI.getPointerOperand());
- if (GEPI.getParent() != PHI->getParent() ||
- llvm::any_of(PHI->incoming_values(), [](Value *In)
- { Instruction *I = dyn_cast<Instruction>(In);
- return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
- succ_empty(I->getParent()) ||
- !I->getParent()->isLegalToHoistInto();
- }))
+ if (!Phi)
return false;
LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):"
- << "\n original: " << *PHI
- << "\n " << GEPI
- << "\n to: ");
+ << "\n original: " << *Phi << "\n "
+ << GEPI);
- SmallVector<Value *, 4> Index(GEPI.indices());
- bool IsInBounds = GEPI.isInBounds();
- IRB.SetInsertPoint(GEPI.getParent(), GEPI.getParent()->getFirstNonPHIIt());
- PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
- PHI->getName() + ".sroa.phi");
- for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
- BasicBlock *B = PHI->getIncomingBlock(I);
- Value *NewVal = nullptr;
- int Idx = NewPN->getBasicBlockIndex(B);
- if (Idx >= 0) {
- NewVal = NewPN->getIncomingValue(Idx);
- } else {
- Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
+ auto GetNewOps = [&](Value *PhiOp) {
+ SmallVector<Value *> NewOps;
+ for (Value *Op : GEPI.operands())
+ if (Op == Phi)
+ NewOps.push_back(PhiOp);
+ else
+ NewOps.push_back(Op);
+ return NewOps;
+ };
- IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator()));
- Type *Ty = GEPI.getSourceElementType();
- NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep",
- IsInBounds);
- }
- NewPN->addIncoming(NewVal, B);
+ IRB.SetInsertPoint(Phi);
+ PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
+ Phi->getName() + ".sroa.phi");
+
+ bool IsInBounds = GEPI.isInBounds();
+ Type *SourceTy = GEPI.getSourceElementType();
+ // We only handle constants and static allocas here, so we can insert GEPs
+ // at the beginning of the function after static allocas.
+ IRB.SetInsertPointPastAllocas(GEPI.getFunction());
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ Value *Op = Phi->getIncomingValue(I);
+ BasicBlock *BB = Phi->getIncomingBlock(I);
+ SmallVector<Value *> NewOps = GetNewOps(Op);
+
+ Value *NewGEP =
+ IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
+ Phi->getName() + ".sroa.gep", IsInBounds);
+ NewPhi->addIncoming(NewGEP, BB);
}
Visited.erase(&GEPI);
- GEPI.replaceAllUsesWith(NewPN);
+ GEPI.replaceAllUsesWith(NewPhi);
GEPI.eraseFromParent();
- Visited.insert(NewPN);
- enqueueUsers(*NewPN);
+ Visited.insert(NewPhi);
+ enqueueUsers(*NewPhi);
- LLVM_DEBUG(for (Value *In : NewPN->incoming_values())
- dbgs() << "\n " << *In;
- dbgs() << "\n " << *NewPN << '\n');
+ LLVM_DEBUG(dbgs() << "\n to: " << *NewPhi << '\n');
return true;
}
@@ -4089,8 +4113,7 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
if (foldGEPSelect(GEPI))
return true;
- if (isa<PHINode>(GEPI.getPointerOperand()) &&
- foldGEPPhi(GEPI))
+ if (foldGEPPhi(GEPI))
return true;
enqueueUsers(GEPI);
diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll
index 54cfb10793a1ac..7c8b27c9de9c0b 100644
--- a/llvm/test/Transforms/SROA/phi-and-select.ll
+++ b/llvm/test/Transforms/SROA/phi-and-select.ll
@@ -114,13 +114,13 @@ define i32 @test3(i32 %x) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[X:%.*]], label [[BB0:%.*]] [
-; CHECK-NEXT: i32 1, label [[BB1:%.*]]
-; CHECK-NEXT: i32 2, label [[BB2:%.*]]
-; CHECK-NEXT: i32 3, label [[BB3:%.*]]
-; CHECK-NEXT: i32 4, label [[BB4:%.*]]
-; CHECK-NEXT: i32 5, label [[BB5:%.*]]
-; CHECK-NEXT: i32 6, label [[BB6:%.*]]
-; CHECK-NEXT: i32 7, label [[BB7:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1:%.*]]
+; CHECK-NEXT: i32 2, label [[BB2:%.*]]
+; CHECK-NEXT: i32 3, label [[BB3:%.*]]
+; CHECK-NEXT: i32 4, label [[BB4:%.*]]
+; CHECK-NEXT: i32 5, label [[BB5:%.*]]
+; CHECK-NEXT: i32 6, label [[BB6:%.*]]
+; CHECK-NEXT: i32 7, label [[BB7:%.*]]
; CHECK-NEXT: ]
; CHECK: bb0:
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -733,6 +733,7 @@ define void @PR20822(i1 %c1, i1 %c2, ptr %ptr) {
; CHECK-LABEL: @PR20822(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[F_SROA_0:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[F1_SROA_GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[PTR:%.*]], i32 0, i32 0
; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_END:%.*]], label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: br label [[IF_END]]
@@ -742,9 +743,8 @@ define void @PR20822(i1 %c1, i1 %c2, ptr %ptr) {
; CHECK: if.then2:
; CHECK-NEXT: br label [[IF_THEN5]]
; CHECK: if.then5:
-; CHECK-NEXT: [[F1:%.*]] = phi ptr [ [[PTR:%.*]], [[IF_THEN2]] ], [ [[F_SROA_0]], [[IF_END]] ]
-; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[F1]], i32 0, i32 0
-; CHECK-NEXT: store i32 0, ptr [[DOTFCA_0_GEP]], align 4
+; CHECK-NEXT: [[F1_SROA_PHI:%.*]] = phi ptr [ [[F1_SROA_GEP]], [[IF_THEN2]] ], [ [[F_SROA_0]], [[IF_END]] ]
+; CHECK-NEXT: store i32 0, ptr [[F1_SROA_PHI]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll
index c5aa1cdd9cf654..d8a49cc5c54b6c 100644
--- a/llvm/test/Transforms/SROA/phi-gep.ll
+++ b/llvm/test/Transforms/SROA/phi-gep.ll
@@ -65,15 +65,13 @@ end:
define i32 @test_sroa_phi_gep_poison(i1 %cond) {
; CHECK-LABEL: @test_sroa_phi_gep_poison(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [[PAIR:%.*]], align 4
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[END:%.*]]
; CHECK: if.then:
+; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATE_LOAD_IF_THEN:%.*]] = load i32, ptr poison, align 4
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ poison, [[IF_THEN]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i32 0, i32 1
-; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT: ret i32 [[LOAD]]
+; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[PHI_SROA_PHI_SROA_SPECULATE_LOAD_IF_THEN]], [[IF_THEN]] ]
+; CHECK-NEXT: ret i32 [[PHI_SROA_PHI_SROA_SPECULATED]]
;
entry:
%a = alloca %pair, align 4
@@ -94,17 +92,13 @@ end:
define i32 @test_sroa_phi_gep_global(i1 %cond) {
; CHECK-LABEL: @test_sroa_phi_gep_global(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [[PAIR:%.*]], align 4
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [[PAIR]], ptr [[A]], i32 0, i32 1
-; CHECK-NEXT: store i32 1, ptr [[GEP_A]], align 4
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[END:%.*]]
; CHECK: if.then:
+; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATE_LOAD_IF_THEN:%.*]] = load i32, ptr getelementptr inbounds ([[PAIR:%.*]], ptr @g, i32 0, i32 1), align 4
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ @g, [[IF_THEN]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i32 0, i32 1
-; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT: ret i32 [[LOAD]]
+; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[PHI_SROA_PHI_SROA_SPECULATE_LOAD_IF_THEN]], [[IF_THEN]] ]
+; CHECK-NEXT: ret i32 [[PHI_SROA_PHI_SROA_SPECULATED]]
;
entry:
%a = alloca %pair, align 4
@@ -245,7 +239,7 @@ define i32 @test_sroa_invoke_phi_gep(i1 %cond) personality ptr @__gxx_personalit
; CHECK-NEXT: br i1 [[COND:%.*]], label [[CALL:%.*]], label [[END:%.*]]
; CHECK: call:
; CHECK-NEXT: [[B:%.*]] = invoke ptr @foo()
-; CHECK-NEXT: to label [[END]] unwind label [[INVOKE_CATCH:%.*]]
+; CHECK-NEXT: to label [[END]] unwind label [[INVOKE_CATCH:%.*]]
; CHECK: end:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[B]], [[CALL]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i32 0, i32 1
@@ -253,7 +247,7 @@ define i32 @test_sroa_invoke_phi_gep(i1 %cond) personality ptr @__gxx_personalit
; CHECK-NEXT: ret i32 [[LOAD]]
; CHECK: invoke_catch:
; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT: catch ptr null
+; CHECK-NEXT: catch ptr null
; CHECK-NEXT: ret i32 0
;
entry:
@@ -468,10 +462,10 @@ define i32 @test_sroa_phi_gep_multiple_values_from_same_block(i32 %arg) {
; CHECK-LABEL: @test_sroa_phi_gep_multiple_values_from_same_block(
; CHECK-NEXT: bb.1:
; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[BB_3:%.*]] [
-; CHECK-NEXT: i32 1, label [[BB_2:%.*]]
-; CHECK-NEXT: i32 2, label [[BB_2]]
-; CHECK-NEXT: i32 3, label [[BB_4:%.*]]
-; CHECK-NEXT: i32 4, label [[BB_4]]
+; CHECK-NEXT: i32 1, label [[BB_2:%.*]]
+; CHECK-NEXT: i32 2, label [[BB_2]]
+; CHECK-NEXT: i32 3, label [[BB_4:%.*]]
+; CHECK-NEXT: i32 4, label [[BB_4]]
; CHECK-NEXT: ]
; CHECK: bb.2:
; CHECK-NEXT: br label [[BB_4]]
@@ -504,6 +498,77 @@ bb.4: ; preds = %bb.1, %bb.1, %bb
ret i32 %load
}
+define i64 @test_phi_idx_mem2reg_const(i1 %arg) {
+; CHECK-LABEL: @test_phi_idx_mem2reg_const(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi i64 [ 2, [[BB1]] ], [ 3, [[BB2]] ]
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[BB1]] ], [ 1, [[BB2]] ]
+; CHECK-NEXT: ret i64 [[PHI_SROA_PHI_SROA_SPECULATED]]
+;
+bb:
+ %alloca = alloca [2 x i64], align 8
+ %gep1 = getelementptr inbounds i64, ptr %alloca, i64 1
+ store i64 2, ptr %alloca
+ store i64 3, ptr %gep1
+ br i1 %arg, label %bb1, label %bb2
+
+bb1:
+ br label %end
+
+bb2:
+ br label %end
+
+end:
+ %phi = phi i64 [ 0, %bb1 ], [ 1, %bb2 ]
+ %getelementptr = getelementptr inbounds i64, ptr %alloca, i64 %phi
+ %load = load i64, ptr %getelementptr
+ ret i64 %load
+}
+
+define i64 @test_phi_idx_mem2reg_not_const(i1 %arg, i64 %idx) {
+; CHECK-LABEL: @test_phi_idx_mem2reg_not_const(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i64], align 8
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[ALLOCA]], i64 1
+; CHECK-NEXT: store i64 2, ptr [[ALLOCA]], align 4
+; CHECK-NEXT: store i64 3, ptr [[GEP1]], align 4
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[IDX:%.*]], [[BB2]] ]
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i64, ptr [[ALLOCA]], i64 [[PHI]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GETELEMENTPTR]], align 4
+; CHECK-NEXT: ret i64 [[LOAD]]
+;
+bb:
+ %alloca = alloca [2 x i64], align 8
+ %gep1 = getelementptr inbounds i64, ptr %alloca, i64 1
+ store i64 2, ptr %alloca
+ store i64 3, ptr %gep1
+ br i1 %arg, label %bb1, label %bb2
+
+bb1:
+ br label %end
+
+bb2:
+ br label %end
+
+end:
+ %phi = phi i64 [ 0, %bb1 ], [ %idx, %bb2 ]
+ %getelementptr = getelementptr inbounds i64, ptr %alloca, i64 %phi
+ %load = load i64, ptr %getelementptr
+ ret i64 %load
+}
+
declare ptr @foo()
declare i32 @__gxx_personality_v0(...)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM overall, with some nits and comments.
llvm/lib/Transforms/Scalar/SROA.cpp
Outdated
// operand is the phi and any of its incoming values is not an alloca or a | ||
// constant. | ||
if (Phi && any_of(Phi->operands(), [](Value *V) { | ||
return isa<Instruction>(V) && !isa<AllocaInst>(V); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any of its incoming values is not an alloca or a constant.
- restricting it to
alloca
only is too conservative. E.g. we may have a chain ofGEP
s of analloca
. I'm not sure if we're guaranteed that instcombine will always be able to collapse it for us into alloca+offset. There may also be various shenanigans with int/pointer conversions. - Does negation of
isa<Instruction>(V)
guarantee that what we've got is a constant? E.g. what if we get a function argument as an incoming value? Can we ever get an operator?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oops, I meant allocas/constants/arguments, will update
(will address other comments in a bit)
if (!all_of(Phi->incoming_values(), | ||
[](Value *V) { return isa<ConstantInt>(V); })) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There may be merit in folding if we see any constant values. It should still be useful for SROA, but there's obviously a trade-off vs having the non-const GEPs around.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
folding as in changing the GEP to an i8 GEP with one constant index?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Not sure if "folding" is the right term here (or ta good name for the function). Considering that we're creating more IR instructions, "unfolding"/transforming would fit better, IMO.
What I mean is that instead of replacing phi of constant values with phi of geps with constant index only, we may want to allow replacing phis of mix of GEPs with constant and non-constant indices. GEP with constant index will help SROA directly, and gep with non-constant index may or may not be useful. It would likely be more amenable to further optimizations, as "phi" is often a roadblock for some optimizations that only look within one basic block.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
renamed to "unfold"
SROA is just focused on removing allocas. Other transforms may be blocked by phis, but it's not SROA's job to fix those. If we decide that's a canonical form, instcombine should do that transformation.
llvm/lib/Transforms/Scalar/SROA.cpp
Outdated
// Check whether the GEP has exactly one phi operand and all indices | ||
// will become constant after the transform. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
single phi-only will do for now.
That said, it would give us a bit more flexibility, if we would still expand one phi at a time, but would do that for GEPs with up to N phis we could convert that way.
The folding itself will work pretty much as is, and up-fron thecks on the number of phis we can handles guarantees that the recursion will end, as all expended GEPs will have one less expandable phi.
Instead of bailing out on the first unaceptable phi, we can count acceptable ones, and only expand the last one, if we've found no more than N. With N=1 we'll have the behavior identical to the current version of the algorithm, but if we add a CLI option to control N, we can then experiment on real world code and see whether N>1 buys us anything in practice.
Up to you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm doubtful that more than one will be useful, but yes some data would be good
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. now that most geps get normalized to a single-index variant, we only have gep pointer and gep index. Still, expanding both may be useful. E.g. if some code conditionally iterates over one of two buffers, folding both pointer and the index would allow SROA to see all accesses.
llvm/lib/Transforms/Scalar/SROA.cpp
Outdated
bool IsInBounds = GEPI.isInBounds(); | ||
Type *SourceTy = GEPI.getSourceElementType(); | ||
// We only handle constants and static allocas here, so we can insert GEPs | ||
// at the beginning of the function after static allocas. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. This explains limiting GEP pointers to allocas only.
I think we should check whether accepting only allocas is sufficient to deal with the regression that prompted this SROA improvement. Just in case, can you check what happens w/ this patch to the test I've added in #82425.
In general case, I believe we do need to deal with GEPs, too, even if that means that we need to put GEPs in different basic blocks, but that's an improvement that may be done in a separate patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the test case ends up with the same IR as in your patch
yes, handling GEPs can be done in a follow-up. we should be able to IRB.SetInsertPoint(OldGEP)
in that case instead of IRB.SetInsertPointPastAllocas()
✅ With the latest revision this PR passed the C/C++ code formatter. |
llvm/lib/Transforms/Scalar/SROA.cpp
Outdated
// Check whether the GEP has exactly one phi operand and all indices | ||
// will become constant after the transform. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. now that most geps get normalized to a single-index variant, we only have gep pointer and gep index. Still, expanding both may be useful. E.g. if some code conditionally iterates over one of two buffers, folding both pointer and the index would allow SROA to see all accesses.
if (!all_of(Phi->incoming_values(), | ||
[](Value *V) { return isa<ConstantInt>(V); })) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Not sure if "folding" is the right term here (or ta good name for the function). Considering that we're creating more IR instructions, "unfolding"/transforming would fit better, IMO.
What I mean is that instead of replacing phi of constant values with phi of geps with constant index only, we may want to allow replacing phis of mix of GEPs with constant and non-constant indices. GEP with constant index will help SROA directly, and gep with non-constant index may or may not be useful. It would likely be more amenable to further optimizations, as "phi" is often a roadblock for some optimizations that only look within one basic block.
This reverts commit 2eb6398. This caused verifier error ``` Instruction does not dominate all uses! ``` for some projects using Halide. The verifier error happens inside `Halide::Internal::CodeGen_LLVM::optimize_module` and looks like a genuine SROA issue.
the issue causing the revert is that I assumed SROA only handled allocas at the very beginning of the entry block, but it actually handles all allocas in the entry block, so we might insert a GEP before an alloca if there is a non-alloca instruction between the alloca and the function entry |
If a gep has only one phi as one of its operands and the remaining indexes are constant, we can unfold `gep ptr, (phi idx1, idx2)` to `phi ((gep ptr, idx1), (gep ptr, idx2))`. Take care not to unfold recursive phis. Followup to #80983. This was initially was #83087. Initial PR did not handle allocas in entry block that weren't at the beginning of the function, causing GEPs to be inserted after the first chunk of allocas but potentially before an alloca not at the beginning. Insert GEPs at the end of the entry block instead since constants/arguments/static allocas can all be used there.
If a gep has only one phi as one of its operands and the remaining indexes are constant, we can unfold
gep ptr, (phi idx1, idx2)
tophi ((gep ptr, idx1), (gep ptr, idx2))
.Take care not to unfold recursive phis.
Followup to #80983.