-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Pipelines] Perform mergefunc after constmerge #92498
Conversation
ebc89c7
to
91eadcf
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add a test in llvm/test/Transforms/PhaseOrdering (e.g. with -O2 -enable-merge-functions
)?
Constmerge can fold switch jump tables, possibly making functions identical again. It can help mergefunc. On the otherhand, the opposite seems unlikely. Fixes llvm#92201
91eadcf
to
a6d30c8
Compare
i added a few tests. |
@llvm/pr-subscribers-llvm-transforms Author: YAMAMOTO Takashi (yamt) ChangesConstmerge can fold switch jump tables, possibly making functions identical again. It can help mergefunc. Fixes #92201 Full diff: https://github.com/llvm/llvm-project/pull/92498.diff 4 Files Affected:
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 926515c9508a97..4fd5ee1946bb77 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1527,10 +1527,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (EnableIROutliner)
MPM.addPass(IROutlinerPass());
- // Merge functions if requested.
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
// Now we need to do some global optimization transforms.
// FIXME: It would seem like these should come first in the optimization
// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
@@ -1538,6 +1534,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
MPM.addPass(GlobalDCEPass());
MPM.addPass(ConstantMergePass());
+ // Merge functions if requested. It has a better chance to merge functions
+ // after ConstantMerge folded jump tables.
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
if (PTO.CallGraphProfile && !LTOPreLink)
MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 489aed40c190b4..588337c15625e6 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -281,9 +281,9 @@
; CHECK-HOT-COLD-SPLIT-NEXT: Running pass: HotColdSplittingPass
; CHECK-IR-OUTLINER-NEXT: Running pass: IROutlinerPass
; CHECK-IR-OUTLINER-NEXT: Running analysis: IRSimilarityAnalysis
-; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-O-NEXT: Running pass: ConstantMergePass
+; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
; CHECK-DEFAULT-NEXT: Running pass: CGProfilePass
; CHECK-DEFAULT-NEXT: Running pass: RelLookupTableConverterPass
; CHECK-LTO-NOT: Running pass: RelLookupTableConverterPass
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
new file mode 100644
index 00000000000000..5d650d5f080bac
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
@@ -0,0 +1,67 @@
+; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx12.0.0"
+
+; Function Attrs: noinline nounwind optsize ssp uwtable
+define i32 @f(i32 noundef %x) #0 {
+; CHECK-LABEL: @f(
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, ptr %x.addr, align 4, !tbaa !5
+ %0 = load i32, ptr %x.addr, align 4, !tbaa !5
+ switch i32 %0, label %sw.default [
+ i32 0, label %sw.bb
+ i32 2, label %sw.bb
+ i32 4, label %sw.bb
+ i32 6, label %sw.bb
+ i32 7, label %sw.bb
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry
+ store i32 1, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.default: ; preds = %entry
+ store i32 0, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.default, %sw.bb
+ %1 = load i32, ptr %x.addr, align 4, !tbaa !5
+ ret i32 %1
+}
+
+; Function Attrs: noinline nounwind optsize ssp uwtable
+define i32 @g(i32 noundef %x) #0 {
+; CHECK-LABEL: @g(
+; CHECK-NEXT: [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0:%.*]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, ptr %x.addr, align 4, !tbaa !5
+ %0 = load i32, ptr %x.addr, align 4, !tbaa !5
+ switch i32 %0, label %sw.default [
+ i32 0, label %sw.bb
+ i32 2, label %sw.bb
+ i32 4, label %sw.bb
+ i32 6, label %sw.bb
+ i32 7, label %sw.bb
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry
+ store i32 1, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.default: ; preds = %entry
+ store i32 0, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.default, %sw.bb
+ %1 = load i32, ptr %x.addr, align 4, !tbaa !5
+ ret i32 %1
+}
+
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
new file mode 100644
index 00000000000000..bce8f08ceda5e0
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
@@ -0,0 +1,47 @@
+; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx12.0.0"
+
+@switch.table.f = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
+@switch.table.g = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable
+define range(i32 0, 2) i32 @f(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: @f(
+entry:
+ %0 = icmp ult i32 %x, 8
+ br i1 %0, label %switch.lookup, label %sw.epilog
+
+switch.lookup: ; preds = %entry
+ %1 = zext nneg i32 %x to i64
+ %switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.f, i64 0, i64 %1
+ %switch.load = load i32, ptr %switch.gep, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %switch.lookup
+ %x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
+ ret i32 %x.addr.0
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable
+define range(i32 0, 2) i32 @g(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: @g(
+; CHECK-NEXT: [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0:%.*]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+entry:
+ %0 = icmp ult i32 %x, 8
+ br i1 %0, label %switch.lookup, label %sw.epilog
+
+switch.lookup: ; preds = %entry
+ %1 = zext nneg i32 %x to i64
+ %switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 %1
+ %switch.load = load i32, ptr %switch.gep, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %switch.lookup
+ %x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
+ ret i32 %x.addr.0
+}
+
+attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
; Function Attrs: noinline nounwind optsize ssp uwtable | ||
define i32 @f(i32 noundef %x) #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
; Function Attrs: noinline nounwind optsize ssp uwtable | |
define i32 @f(i32 noundef %x) #0 { | |
define i32 @f(i32 noundef %x) { |
Here and elsewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done. thank you.
can this land? |
Constmerge can fold switch jump tables, possibly making functions identical again. It can help mergefunc. On the other hand, the opposite seems unlikely. Fixes llvm#92201.
Constmerge can fold switch jump tables, possibly making functions identical again. It can help mergefunc.
On the otherhand, the opposite seems unlikely.
Fixes #92201