-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stack array visit #70816
base: main
Are you sure you want to change the base?
Stack array visit #70816
Conversation
Added check preventing AllocationAnalysis visiting an operation more than once
@llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-llvm-transforms Author: Dmitriy Smirnov (d-smirnov) ChangesThis PR fixes compile-time performance degradation observed on 521.wrf_r with -Ofast. Full diff: https://github.com/llvm/llvm-project/pull/70816.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index 9b90aed5a17ae73..41e5dafd04e71bb 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -154,6 +154,9 @@ class AllocationAnalysis
/// Visit control flow operations and decide whether to call visitOperation
/// to apply the transfer function
void processOperation(mlir::Operation *op) override;
+
+private:
+ llvm::DenseSet<mlir::Operation *> visited;
};
/// Drives analysis to find candidate fir.allocmem operations which could be
@@ -326,6 +329,9 @@ std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
void AllocationAnalysis::visitOperation(mlir::Operation *op,
const LatticePoint &before,
LatticePoint *after) {
+ if (!visited.insert(op).second)
+ return;
+
LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
<< "\n");
LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
diff --git a/flang/test/Transforms/if.fir b/flang/test/Transforms/if.fir
new file mode 100644
index 000000000000000..abddd682986ea57
--- /dev/null
+++ b/flang/test/Transforms/if.fir
@@ -0,0 +1,39 @@
+// RUN: fir-opt --stack-arrays --debug-only=stack-arrays %s 2>&1 | grep -v '\-\-' | FileCheck %s
+
+// Check the data-flow-analysis can detect cases where we aren't sure if memory
+// is freed by the end of the function
+func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+ %7 = arith.constant 42 : index
+ %8 = fir.allocmem !fir.array<?xi32>, %7 {uniq_name = "_QFdfa1Earr.alloc"}
+ %9 = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+ %10 = fir.convert %9 : (!fir.logical<4>) -> i1
+ fir.if %10 {
+ fir.freemem %8 : !fir.heap<!fir.array<?xi32>>
+ } else {
+ }
+ return
+}
+
+// 8 visits:
+// CHECK: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+// CHECK-NEXT: StackArrays: Visiting operation:
+///CHECK-NEXT: module {
+// CHECK-NEXT: func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
+// CHECK-NEXT: %[[C42:.*]] = arith.constant 42 : index
+// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[C42]] {uniq_name = "_QFdfa1Earr.alloc"}
+// CHECK-NEXT: %[[LOGICAL:.*]] = fir.load %arg0 : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT: %[[BOOL:.*]] = fir.convert %[[LOGICAL]] : (!fir.logical<4>) -> i1
+// CHECK-NEXT: fir.if %[[BOOL]] {
+// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT: } else {
+// CHECK-NEXT: }
+// CHECK-NEXT: return
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b6895c649f838c1..1cc6248caa76e44 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -138,8 +138,8 @@ MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
static cl::opt<unsigned>
-MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
- cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
+ MaxVFOption("slp-max-vf", cl::init(192), cl::Hidden,
+ cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
/// Limits the size of scheduling regions in a block.
/// It avoid long compile times for _very_ large blocks where vector
@@ -4135,7 +4135,7 @@ static bool areTwoInsertFromSameBuildVector(
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
- SmallSet<int, 8> ReusedIdx;
+ SmallDenseSet<int, 8> ReusedIdx;
bool IsReusedIdx = false;
do {
if (IE2 == VU && !IE1)
|
0e84649
to
4d5f334
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure this is the correct solution. AllocationAnalysis may have to visit the same operation multiple times for the algorithm to converge.
Was the issue you were seeing related to very slow convergence?
This PR fixes compile-time performance degradation observed on 521.wrf_r with -Ofast.