From 6d4271eb874959870d67b85d0c1f59a9e498b98a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 25 Sep 2025 03:39:49 +0000 Subject: [PATCH 1/3] Reapply "[ControlHeightReduction] Drop lifetime annotations where necessary (#159686)" This reverts commit 4f33d7b7a9f39d733b7572f9afbf178bca8da127. The original landing of this patch had an issue where it would try and hoist allocas into the entry block that were in the entry block. This would end up actually sinking them and moving them after users, resulting in invalid IR. This update fixes this by ensuring that we are only hoisting static allocas that have been sunk into a split basic block. A regression test has been added. --- .../ControlHeightReduction.cpp | 48 +++- .../Transforms/PGOProfile/chr-lifetimes.ll | 245 ++++++++++++++++++ 2 files changed, 285 insertions(+), 8 deletions(-) create mode 100644 llvm/test/Transforms/PGOProfile/chr-lifetimes.ll diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index c14bbecf0d4e1..6cf50b6bfa80d 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1591,7 +1591,16 @@ static void insertTrivialPHIs(CHRScope *Scope, } TrivialPHIs.insert(PN); CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); + bool FoundLifetimeAnnotation = false; for (Instruction *UI : Users) { + // If we found a lifetime annotation, remove it, but set a flag + // to ensure that we remove all other lifetime annotations attached + // to the alloca. + if (UI->isLifetimeStartOrEnd()) { + UI->eraseFromParent(); + FoundLifetimeAnnotation = true; + continue; + } for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { if (UI->getOperand(J) == &I) { UI->setOperand(J, PN); @@ -1599,6 +1608,14 @@ static void insertTrivialPHIs(CHRScope *Scope, } CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); } + // Erase any leftover lifetime annotations for a dynamic alloca. + if (FoundLifetimeAnnotation) { + for (User *U : make_early_inc_range(I.users())) { + if (auto *UI = dyn_cast(U)) + if (UI->isLifetimeStartOrEnd()) + UI->eraseFromParent(); + } + } } } } @@ -1693,14 +1710,12 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { BasicBlock *ExitBlock = LastRegion->getExit(); std::optional ProfileCount = BFI.getBlockProfileCount(EntryBlock); - if (ExitBlock) { - // Insert a trivial phi at the exit block (where the CHR hot path and the - // cold path merges) for a value that's defined in the scope but used - // outside it (meaning it's alive at the exit block). We will add the - // incoming values for the CHR cold paths to it below. Without this, we'd - // miss updating phi's for such values unless there happens to already be a - // phi for that value there. - insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); + SmallVector StaticAllocas; + for (Instruction &I : *EntryBlock) { + if (auto *AI = dyn_cast(&I)) { + if (AI->isStaticAlloca()) + StaticAllocas.push_back(AI); + } } // Split the entry block of the first region. The new block becomes the new @@ -1719,6 +1734,23 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { FirstRegion->replaceEntryRecursive(NewEntryBlock); BasicBlock *PreEntryBlock = EntryBlock; + // Move static allocas into the pre-entry block so they stay static. Do not + // move allocas that have not moved from the entry block as otherwise we + // might end up moving them after users. + for (AllocaInst *AI : StaticAllocas) + if (AI->getParent() != EntryBlock) + AI->moveBefore(EntryBlock->getTerminator()->getIterator()); + + if (ExitBlock) { + // Insert a trivial phi at the exit block (where the CHR hot path and the + // cold path merges) for a value that's defined in the scope but used + // outside it (meaning it's alive at the exit block). We will add the + // incoming values for the CHR cold paths to it below. Without this, we'd + // miss updating phi's for such values unless there happens to already be a + // phi for that value there. + insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); + } + ValueToValueMapTy VMap; // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a // hot path (originals) and a cold path (clones) and update the PHIs at the diff --git a/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll new file mode 100644 index 0000000000000..3e8cebcff3eb1 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll @@ -0,0 +1,245 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='require,chr' -S | FileCheck %s + +declare void @foo() +declare void @bar() +declare void @baz(i64) + +; Test that when we have a static alloca in an entry block that will get split, +; the alloca remains static and we preserve its lifetime annotations. +define void @test_chr_with_lifetimes(ptr %i) !prof !14 { +; CHECK-LABEL: @test_chr_with_lifetimes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = select i1 true, i1 [[TMP9]], i1 false +; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP10]], i1 [[TMP11]], i1 false +; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15:![0-9]+]] +; CHECK: entry.split: +; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16:![0-9]+]] +; CHECK-NEXT: call void @baz(i64 [[TMP6]]) +; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17:![0-9]+]] +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: call void @baz(i64 [[TMP7]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]] +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TEST]]) +; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP2:%.*]] = phi ptr [ [[TMP3:%.*]], [[BB2]] ], [ null, [[BB1]] ] +; CHECK-NEXT: [[TMP3]] = getelementptr i8, ptr [[TMP2]], i64 24 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP2]], [[I]] +; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %1 = load i32, ptr %i + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i64 4, i64 0, !prof !15 + %test = alloca i32, align 8 + call void @baz(i64 %3) + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + call void @llvm.lifetime.start.p0(ptr %test) + store ptr %test, ptr %i, align 8 + br label %bb2 + +bb2: + %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ] + %5 = getelementptr i8, ptr %4, i64 24 + %6 = icmp eq ptr %4, %i + br i1 %6, label %bb3, label %bb2 + +bb3: + ret void +} + +; Test that we remove lifetime markers that would otherwise refer to phi +; nodes given the dynamic allocas they referred to have been duplicated. +define void @test_chr_dynamic_alloca(ptr %i) !prof !14 { +; CHECK-LABEL: @test_chr_dynamic_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEST1:%.*]] = load i32, ptr [[I:%.*]], align 4 +; CHECK-NEXT: [[TEST2:%.*]] = icmp eq i32 [[TEST1]], 5 +; CHECK-NEXT: br i1 [[TEST2]], label [[BB4:%.*]], label [[BB3:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 true, i1 [[TMP2]], i1 false +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false +; CHECK-NEXT: br i1 [[TMP5]], label [[BB4_SPLIT:%.*]], label [[BB4_SPLIT_NONCHR:%.*]], !prof [[PROF15]] +; CHECK: bb4.split: +; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8 +; CHECK-NEXT: call void @baz(i64 [[TMP6]]) +; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17]] +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb4.split.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: [[TEST_NONCHR:%.*]] = alloca i32, align 8 +; CHECK-NEXT: call void @baz(i64 [[TMP7]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]] +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: store ptr [[TEST_NONCHR]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP8:%.*]] = phi ptr [ [[TEST]], [[BB0]] ], [ [[TEST]], [[BB4_SPLIT]] ], [ [[TEST_NONCHR]], [[BB0_NONCHR]] ], [ [[TEST_NONCHR]], [[BB4_SPLIT_NONCHR]] ] +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: store ptr [[TMP8]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP9:%.*]] = phi ptr [ [[TMP10:%.*]], [[BB2]] ], [ null, [[BB1]] ] +; CHECK-NEXT: [[TMP10]] = getelementptr i8, ptr [[TMP9]], i64 24 +; CHECK-NEXT: [[TEST5:%.*]] = load ptr, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq ptr [[TMP9]], [[TEST5]] +; CHECK-NEXT: br i1 [[TMP11]], label [[BB3]], label [[BB2]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %test1 = load i32, ptr %i + %test2 = icmp eq i32 %test1, 5 + br i1 %test2, label %bb4, label %bb3 + +bb4: + %1 = load i32, ptr %i + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i64 4, i64 0, !prof !15 + %test = alloca i32, align 8 + call void @baz(i64 %3) + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + call void @llvm.lifetime.start.p0(ptr %test) + store ptr %test, ptr %i, align 8 + br label %bb1 + +bb1: + call void @bar() + call void @llvm.lifetime.start.p0(ptr %test) + store ptr %test, ptr %i, align 8 + br label %bb2 + +bb2: + %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ] + %5 = getelementptr i8, ptr %4, i64 24 + %test5 = load ptr, ptr %test + call void @llvm.lifetime.end.p0(ptr %test) + %6 = icmp eq ptr %4, %test5 + br i1 %6, label %bb3, label %bb2 + +bb3: + ret void +} + +; Test that we do not move around allocas that occur in the entry block +; before splitting. If we accidentally sink them, we can move them after +; their users. +define void @test_no_move_allocas(ptr %i) !prof !14 { +; CHECK-LABEL: @test_no_move_allocas( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TEST]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 true, i1 [[TMP2]], i1 false +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false +; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] +; CHECK: entry.split: +; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: call void @baz(i64 [[TMP6]]) +; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17]] +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: call void @baz(i64 [[TMP7]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]] +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP8:%.*]] = phi ptr [ [[TMP9:%.*]], [[BB2]] ], [ null, [[BB1]] ] +; CHECK-NEXT: [[TMP9]] = getelementptr i8, ptr [[TMP8]], i64 24 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP8]], [[I]] +; CHECK-NEXT: br i1 [[TMP10]], label [[BB3:%.*]], label [[BB2]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %test = alloca i32, align 8 + call void @llvm.lifetime.start.p0(ptr %test) + %1 = load i32, ptr %i + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i64 4, i64 0, !prof !15 + call void @baz(i64 %3) + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + call void @bar() + br label %bb2 + +bb2: + %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ] + %5 = getelementptr i8, ptr %4, i64 24 + %6 = icmp eq ptr %4, %i + br i1 %6, label %bb3, label %bb2 + +bb3: + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} + +!14 = !{!"function_entry_count", i64 100} +!15 = !{!"branch_weights", i32 0, i32 1} +; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0} From 83c42a51a663eb2dabd354fa5086adb0fc18d87b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 25 Sep 2025 13:19:36 +0000 Subject: [PATCH 2/3] feedback --- .../Transforms/Instrumentation/ControlHeightReduction.cpp | 7 ++----- llvm/test/Transforms/PGOProfile/chr-lifetimes.ll | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 6cf50b6bfa80d..ba35ca4c58e2a 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1734,12 +1734,9 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { FirstRegion->replaceEntryRecursive(NewEntryBlock); BasicBlock *PreEntryBlock = EntryBlock; - // Move static allocas into the pre-entry block so they stay static. Do not - // move allocas that have not moved from the entry block as otherwise we - // might end up moving them after users. + // Move static allocas into the pre-entry block so they stay static. for (AllocaInst *AI : StaticAllocas) - if (AI->getParent() != EntryBlock) - AI->moveBefore(EntryBlock->getTerminator()->getIterator()); + AI->moveBefore(EntryBlock->begin()->getIterator()); if (ExitBlock) { // Insert a trivial phi at the exit block (where the CHR hot path and the diff --git a/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll index 3e8cebcff3eb1..b29834f9fe960 100644 --- a/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll +++ b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll @@ -10,9 +10,9 @@ declare void @baz(i64) define void @test_chr_with_lifetimes(ptr %i) !prof !14 { ; CHECK-LABEL: @test_chr_with_lifetimes( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 -; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8 ; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = select i1 true, i1 [[TMP9]], i1 false ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP1]] From ea162f5077521b63e29896eaf5402e0993cdafcd Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 25 Sep 2025 13:24:36 +0000 Subject: [PATCH 3/3] more feedback --- llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index ba35ca4c58e2a..7c78eb35a865a 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1736,7 +1736,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { // Move static allocas into the pre-entry block so they stay static. for (AllocaInst *AI : StaticAllocas) - AI->moveBefore(EntryBlock->begin()->getIterator()); + AI->moveBefore(EntryBlock->begin()); if (ExitBlock) { // Insert a trivial phi at the exit block (where the CHR hot path and the