diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index c14bbecf0d4e1..6f6245020ae5c 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1591,7 +1591,16 @@ static void insertTrivialPHIs(CHRScope *Scope, } TrivialPHIs.insert(PN); CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); + bool FoundLifetimeAnnotation = false; for (Instruction *UI : Users) { + // If we found a lifetime annotation, remove it, but set a flag + // to ensure that we remove all other lifetime annotations attached + // to the alloca. + if (UI->isLifetimeStartOrEnd()) { + UI->eraseFromParent(); + FoundLifetimeAnnotation = true; + continue; + } for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { if (UI->getOperand(J) == &I) { UI->setOperand(J, PN); @@ -1599,6 +1608,14 @@ static void insertTrivialPHIs(CHRScope *Scope, } CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); } + // Erase any leftover lifetime annotations for a dynamic alloca. + if (FoundLifetimeAnnotation) { + for (User *U : make_early_inc_range(I.users())) { + if (auto *UI = dyn_cast(U)) + if (UI->isLifetimeStartOrEnd()) + UI->eraseFromParent(); + } + } } } } @@ -1693,14 +1710,12 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { BasicBlock *ExitBlock = LastRegion->getExit(); std::optional ProfileCount = BFI.getBlockProfileCount(EntryBlock); - if (ExitBlock) { - // Insert a trivial phi at the exit block (where the CHR hot path and the - // cold path merges) for a value that's defined in the scope but used - // outside it (meaning it's alive at the exit block). We will add the - // incoming values for the CHR cold paths to it below. Without this, we'd - // miss updating phi's for such values unless there happens to already be a - // phi for that value there. - insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); + SmallVector StaticAllocas; + for (Instruction &I : *EntryBlock) { + if (auto *AI = dyn_cast(&I)) { + if (AI->isStaticAlloca()) + StaticAllocas.push_back(AI); + } } // Split the entry block of the first region. The new block becomes the new @@ -1719,6 +1734,20 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { FirstRegion->replaceEntryRecursive(NewEntryBlock); BasicBlock *PreEntryBlock = EntryBlock; + // Move static allocas into the pre-entry block so they stay static. + for (AllocaInst *AI : StaticAllocas) + AI->moveBefore(EntryBlock->getTerminator()->getIterator()); + + if (ExitBlock) { + // Insert a trivial phi at the exit block (where the CHR hot path and the + // cold path merges) for a value that's defined in the scope but used + // outside it (meaning it's alive at the exit block). We will add the + // incoming values for the CHR cold paths to it below. Without this, we'd + // miss updating phi's for such values unless there happens to already be a + // phi for that value there. + insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); + } + ValueToValueMapTy VMap; // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a // hot path (originals) and a cold path (clones) and update the PHIs at the diff --git a/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll new file mode 100644 index 0000000000000..4f926237974fc --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='require,chr' -S | FileCheck %s + +declare void @foo() +declare void @bar() +declare void @baz(i64) + +; Test that when we have a static alloca in an entry block that will get split, +; the alloca remains static and we preserve its lifetime annotations. +define void @test_chr_with_lifetimes(ptr %i) !prof !14 { +; CHECK-LABEL: @test_chr_with_lifetimes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = select i1 true, i1 [[TMP9]], i1 false +; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP10]], i1 [[TMP11]], i1 false +; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15:![0-9]+]] +; CHECK: entry.split: +; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16:![0-9]+]] +; CHECK-NEXT: call void @baz(i64 [[TMP6]]) +; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17:![0-9]+]] +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: call void @baz(i64 [[TMP7]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]] +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TEST]]) +; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP2:%.*]] = phi ptr [ [[TMP3:%.*]], [[BB2]] ], [ null, [[BB1]] ] +; CHECK-NEXT: [[TMP3]] = getelementptr i8, ptr [[TMP2]], i64 24 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP2]], [[I]] +; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %1 = load i32, ptr %i + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i64 4, i64 0, !prof !15 + %test = alloca i32, align 8 + call void @baz(i64 %3) + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + call void @llvm.lifetime.start.p0(ptr %test) + store ptr %test, ptr %i, align 8 + br label %bb2 + +bb2: + %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ] + %5 = getelementptr i8, ptr %4, i64 24 + %6 = icmp eq ptr %4, %i + br i1 %6, label %bb3, label %bb2 + +bb3: + ret void +} + +; Test that we remove lifetime markers that would otherwise refer to phi +; nodes given the dynamic allocas they referred to have been duplicated. +define void @test_chr_dynamic_alloca(ptr %i) !prof !14 { +; CHECK-LABEL: @test_chr_dynamic_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEST1:%.*]] = load i32, ptr [[I:%.*]], align 4 +; CHECK-NEXT: [[TEST2:%.*]] = icmp eq i32 [[TEST1]], 5 +; CHECK-NEXT: br i1 [[TEST2]], label [[BB4:%.*]], label [[BB3:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 true, i1 [[TMP2]], i1 false +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false +; CHECK-NEXT: br i1 [[TMP5]], label [[BB4_SPLIT:%.*]], label [[BB4_SPLIT_NONCHR:%.*]], !prof [[PROF15]] +; CHECK: bb4.split: +; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8 +; CHECK-NEXT: call void @baz(i64 [[TMP6]]) +; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17]] +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb4.split.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]] +; CHECK-NEXT: [[TEST_NONCHR:%.*]] = alloca i32, align 8 +; CHECK-NEXT: call void @baz(i64 [[TMP7]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]] +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: store ptr [[TEST_NONCHR]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP8:%.*]] = phi ptr [ [[TEST]], [[BB0]] ], [ [[TEST]], [[BB4_SPLIT]] ], [ [[TEST_NONCHR]], [[BB0_NONCHR]] ], [ [[TEST_NONCHR]], [[BB4_SPLIT_NONCHR]] ] +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: store ptr [[TMP8]], ptr [[I]], align 8 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP9:%.*]] = phi ptr [ [[TMP10:%.*]], [[BB2]] ], [ null, [[BB1]] ] +; CHECK-NEXT: [[TMP10]] = getelementptr i8, ptr [[TMP9]], i64 24 +; CHECK-NEXT: [[TEST5:%.*]] = load ptr, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq ptr [[TMP9]], [[TEST5]] +; CHECK-NEXT: br i1 [[TMP11]], label [[BB3]], label [[BB2]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %test1 = load i32, ptr %i + %test2 = icmp eq i32 %test1, 5 + br i1 %test2, label %bb4, label %bb3 + +bb4: + %1 = load i32, ptr %i + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i64 4, i64 0, !prof !15 + %test = alloca i32, align 8 + call void @baz(i64 %3) + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + call void @llvm.lifetime.start.p0(ptr %test) + store ptr %test, ptr %i, align 8 + br label %bb1 + +bb1: + call void @bar() + call void @llvm.lifetime.start.p0(ptr %test) + store ptr %test, ptr %i, align 8 + br label %bb2 + +bb2: + %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ] + %5 = getelementptr i8, ptr %4, i64 24 + %test5 = load ptr, ptr %test + call void @llvm.lifetime.end.p0(ptr %test) + %6 = icmp eq ptr %4, %test5 + br i1 %6, label %bb3, label %bb2 + +bb3: + ret void +} + + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} + +!14 = !{!"function_entry_count", i64 100} +!15 = !{!"branch_weights", i32 0, i32 1} +; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}