-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SROA] analyze gep(ptr,phi(const...)) #82425
base: main
Are you sure you want to change the base?
Conversation
Recent changes to normalize GEPs to single-index i8 variant allow more optimizations, and some of them result in gep(alloca, phi(C1,C2,...)) which SROA could not handle. This patch splits such GEPs into phi(gep(alloca, C1), gep(alloca, C2)) so SROA can analyze alloca accesses.
@llvm/pr-subscribers-llvm-transforms Author: Artem Belevich (Artem-B) ChangesRecent changes to normalize GEPs to single-index i8 variant (90ba330) allow more optimizations, This patch splits such GEPs into phi(gep(alloca, C1), gep(alloca, C2)) so SROA can analyze alloca slices. Full diff: https://github.com/llvm/llvm-project/pull/82425.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 138dc38b5c14ce..19f092245c6308 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -4070,12 +4070,67 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
return true;
}
+ bool foldGEPPhiConst(GetElementPtrInst &GEPI) {
+ if (GEPI.getNumIndices() != 1)
+ return false;
+
+ PHINode *PHI = cast<PHINode>(GEPI.getOperand(1));
+ if (!all_of(PHI->incoming_values(), [&](auto &V) {
+ auto *Parent = PHI->getIncomingBlock(V);
+ return isa<ConstantInt>(V) && !succ_empty(Parent) &&
+ Parent->isLegalToHoistInto();
+ }))
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << " Rewriting gep(ptr, phi(const)) -> phi(gep(ptr, const)):"
+ << "\n original: " << *PHI << "\n " << GEPI
+ << "\n to: ");
+
+ SmallVector<Value *, 4> Index(GEPI.indices());
+ bool IsInBounds = GEPI.isInBounds();
+ IRB.SetInsertPoint(GEPI.getParent(), GEPI.getParent()->getFirstNonPHIIt());
+ PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
+ PHI->getName() + ".sroa.phi");
+ for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *B = PHI->getIncomingBlock(I);
+ Value *NewVal = nullptr;
+ int Idx = NewPN->getBasicBlockIndex(B);
+ if (Idx >= 0) {
+ NewVal = NewPN->getIncomingValue(Idx);
+ } else {
+ Value *In = PHI->getIncomingValue(I);
+ BasicBlock *Parent = PHI->getIncomingBlock(I);
+ IRB.SetInsertPoint(Parent, Parent->getTerminator()->getIterator());
+ Type *Ty = GEPI.getSourceElementType();
+ NewVal = IRB.CreateGEP(Ty, GEPI.getPointerOperand(), In,
+ In->getName() + ".sroa.gep", IsInBounds);
+ }
+ NewPN->addIncoming(NewVal, B);
+ }
+
+ Visited.erase(&GEPI);
+ GEPI.replaceAllUsesWith(NewPN);
+ GEPI.eraseFromParent();
+ Visited.insert(NewPN);
+ enqueueUsers(*NewPN);
+
+ LLVM_DEBUG(for (Value *In
+ : NewPN->incoming_values()) dbgs()
+ << "\n " << *In;
+ dbgs() << "\n " << *NewPN << '\n');
+
+ return true;
+ }
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (foldGEPSelect(GEPI))
return true;
- if (isa<PHINode>(GEPI.getPointerOperand()) &&
- foldGEPPhi(GEPI))
+ if (isa<PHINode>(GEPI.getPointerOperand()) && foldGEPPhi(GEPI))
+ return true;
+
+ if (GEPI.hasIndices() && isa<PHINode>(GEPI.getOperand(1)) &&
+ foldGEPPhiConst(GEPI))
return true;
enqueueUsers(GEPI);
diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll
index c5aa1cdd9cf654..7a69e6d40dd9d2 100644
--- a/llvm/test/Transforms/SROA/phi-gep.ll
+++ b/llvm/test/Transforms/SROA/phi-gep.ll
@@ -3,6 +3,7 @@
; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
%pair = type { i32, i32 }
+%t1 = type { i64, i32, i32, i64, i32, i32 }
define i32 @test_sroa_phi_gep(i1 %cond) {
; CHECK-LABEL: @test_sroa_phi_gep(
@@ -245,7 +246,7 @@ define i32 @test_sroa_invoke_phi_gep(i1 %cond) personality ptr @__gxx_personalit
; CHECK-NEXT: br i1 [[COND:%.*]], label [[CALL:%.*]], label [[END:%.*]]
; CHECK: call:
; CHECK-NEXT: [[B:%.*]] = invoke ptr @foo()
-; CHECK-NEXT: to label [[END]] unwind label [[INVOKE_CATCH:%.*]]
+; CHECK-NEXT: to label [[END]] unwind label [[INVOKE_CATCH:%.*]]
; CHECK: end:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[B]], [[CALL]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i32 0, i32 1
@@ -253,7 +254,7 @@ define i32 @test_sroa_invoke_phi_gep(i1 %cond) personality ptr @__gxx_personalit
; CHECK-NEXT: ret i32 [[LOAD]]
; CHECK: invoke_catch:
; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT: catch ptr null
+; CHECK-NEXT: catch ptr null
; CHECK-NEXT: ret i32 0
;
entry:
@@ -468,10 +469,10 @@ define i32 @test_sroa_phi_gep_multiple_values_from_same_block(i32 %arg) {
; CHECK-LABEL: @test_sroa_phi_gep_multiple_values_from_same_block(
; CHECK-NEXT: bb.1:
; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[BB_3:%.*]] [
-; CHECK-NEXT: i32 1, label [[BB_2:%.*]]
-; CHECK-NEXT: i32 2, label [[BB_2]]
-; CHECK-NEXT: i32 3, label [[BB_4:%.*]]
-; CHECK-NEXT: i32 4, label [[BB_4]]
+; CHECK-NEXT: i32 1, label [[BB_2:%.*]]
+; CHECK-NEXT: i32 2, label [[BB_2]]
+; CHECK-NEXT: i32 3, label [[BB_4:%.*]]
+; CHECK-NEXT: i32 4, label [[BB_4]]
; CHECK-NEXT: ]
; CHECK: bb.2:
; CHECK-NEXT: br label [[BB_4]]
@@ -504,6 +505,47 @@ bb.4: ; preds = %bb.1, %bb.1, %bb
ret i32 %load
}
+
+define void @test_gep_phi_const(i32 %arg) {
+; CHECK-LABEL: @test_gep_phi_const(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[BB9:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB11:%.*]]
+; CHECK-NEXT: i32 1, label [[BB10:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb9:
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 8, [[BB10]] ], [ 16, [[BB:%.*]] ]
+; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi ptr [ undef, [[BB10]] ], [ undef, [[BB]] ]
+; CHECK-NEXT: br label [[BB11]]
+; CHECK: bb10:
+; CHECK-NEXT: br label [[BB9]]
+; CHECK: bb11:
+; CHECK-NEXT: [[PHI12:%.*]] = phi ptr [ [[PHI_SROA_PHI_SROA_SPECULATED]], [[BB9]] ], [ undef, [[BB]] ]
+; CHECK-NEXT: store i8 0, ptr [[PHI12]], align 1
+; CHECK-NEXT: ret void
+;
+bb:
+ %alloca = alloca %t1, align 8
+ switch i32 %arg, label %bb9 [
+ i32 0, label %bb11
+ i32 1, label %bb10
+ ]
+
+bb9: ; preds = %bb10, %bb
+ %phi = phi i64 [ 8, %bb10 ], [ 16, %bb ]
+ %getelementptr = getelementptr i8, ptr %alloca, i64 %phi
+ %load = load ptr, ptr %getelementptr, align 8
+ br label %bb11
+
+bb10: ; preds = %bb
+ br label %bb9
+
+bb11: ; preds = %bb9, %bb
+ %phi12 = phi ptr [ %load, %bb9 ], [ undef, %bb ]
+ store i8 0, ptr %phi12, align 1
+ ret void
+}
+
declare ptr @foo()
declare i32 @__gxx_personality_v0(...)
|
@@ -4070,12 +4070,67 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { | |||
return true; | |||
} | |||
|
|||
bool foldGEPPhiConst(GetElementPtrInst &GEPI) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we copy foldGEPSelect
and handle both gep(phi(ptr), idx) and gep(ptr, phi(idx)) in one function? and also handle GEPs with more than one index
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we copy foldGEPSelect and handle both gep(phi(ptr), idx) and gep(ptr, phi(idx)) in one function?
They are deceptively similar on the surface, but they do literally everything differently. The first one operates on pointer, the other one on the indices. Combined function will literally have to provide different if/else for each of the steps, except for a just a few common lines . I've tried and the code is rather unreadable.
and also handle GEPs with more than one index
That is probably unnecessary, now that instcombine normalizes GEPs to untyped ones with a single byte offset.
Also, if we were to do try expanding multiple indices this way, it would result in a combinatorial explosion of the number of GEPs we may need to create.
If/when we run into this, and do discover that we want to handle multiple indices, we should be able to relax the algorithm a bit and iteratively expand one index at a time.
%phi12 = phi ptr [ %load, %bb9 ], [ undef, %bb ] | ||
store i8 0, ptr %phi12, align 1 | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please also add a negative test with non-constant incoming value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would the test below do? It verifies that we only handle all-const phi indices. Or did you have something else in mind?
That got me thinking. Perhaps we should relax "phi(all-const)" into phi(any-const)
as it will give SROA some more slices to analyze.
Can be closed now that #83494 has landed? |
Recent changes to normalize GEPs to single-index i8 variant (90ba330) allow more optimizations,
and some of them result in gep(alloca, phi(C1,C2,...)) which SROA could not handle.
This patch splits such GEPs into phi(gep(alloca, C1), gep(alloca, C2)) so SROA can analyze alloca slices.