Skip to content

Commit

Permalink
Reapply Move "auto-init" instructions to the dominator of their users
Browse files Browse the repository at this point in the history
Original patch (50b2a11) ignored the
fact that -ftrivial-auto-var-init could affect function parameters with
the sret attribute.
Just do not move instruction that don't affect alloca.
Also add missing test case for volatile instruction.

Differential Revision: https://reviews.llvm.org/D148507
  • Loading branch information
serge-sans-paille committed Apr 24, 2023
1 parent 5379189 commit afa13ba
Show file tree
Hide file tree
Showing 21 changed files with 725 additions and 0 deletions.
29 changes: 29 additions & 0 deletions llvm/include/llvm/Transforms/Utils/MoveAutoInit.h
@@ -0,0 +1,29 @@
//===- MoveAutoInit.h - Move insts marked as auto-init Pass --*- C++ -*-======//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass moves instructions marked as auto-init closer to their use if
// profitable, generally because it moves them under a guard, potentially
// skipping the overhead of the auto-init under some execution paths.
//
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
#define LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H

#include "llvm/IR/PassManager.h"

namespace llvm {

class MoveAutoInitPass : public PassInfoMixin<MoveAutoInitPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm

#endif // LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Expand Up @@ -245,6 +245,7 @@
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Expand Up @@ -124,6 +124,7 @@
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
Expand Down Expand Up @@ -696,6 +697,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(MemCpyOptPass());

FPM.addPass(DSEPass());
FPM.addPass(MoveAutoInitPass());

FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
Expand Down Expand Up @@ -1809,6 +1812,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,

// Nuke dead stores.
MainFPM.addPass(DSEPass());
MainFPM.addPass(MoveAutoInitPass());
MainFPM.addPass(MergedLoadStoreMotionPass());

LoopPassManager LPM;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Expand Up @@ -331,6 +331,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
FUNCTION_PASS("jump-threading", JumpThreadingPass())
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Utils/CMakeLists.txt
Expand Up @@ -58,6 +58,7 @@ add_llvm_component_library(LLVMTransformUtils
MetaRenamer.cpp
MisExpect.cpp
ModuleUtils.cpp
MoveAutoInit.cpp
NameAnonGlobals.cpp
PredicateInfo.cpp
PromoteMemoryToRegister.cpp
Expand Down
233 changes: 233 additions & 0 deletions llvm/lib/Transforms/Utils/MoveAutoInit.cpp
@@ -0,0 +1,233 @@
//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass moves instruction maked as auto-init closer to the basic block that
// use it, eventually removing it from some control path of the function.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"

using namespace llvm;

#define DEBUG_TYPE "move-auto-init"

STATISTIC(NumMoved, "Number of instructions moved");

static cl::opt<unsigned> MoveAutoInitThreshold(
"move-auto-init-threshold", cl::Hidden, cl::init(128),
cl::desc("Maximum instructions to analyze per moved initialization"));

static bool hasAutoInitMetadata(const Instruction &I) {
return I.hasMetadata(LLVMContext::MD_annotation) &&
any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(),
[](const MDOperand &Op) {
return cast<MDString>(Op.get())->getString() == "auto-init";
});
}

static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) {
MemoryLocation ML;
if (auto *MI = dyn_cast<MemIntrinsic>(&I))
ML = MemoryLocation::getForDest(MI);
else if (auto *SI = dyn_cast<StoreInst>(&I))
ML = MemoryLocation::get(SI);
else
assert(false && "memory location set");

if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr)))
return ML;
else
return {};
}

/// Finds a BasicBlock in the CFG where instruction `I` can be moved to while
/// not changing the Memory SSA ordering and being guarded by at least one
/// condition.
static BasicBlock *usersDominator(const MemoryLocation &ML, Instruction *I,
DominatorTree &DT, MemorySSA &MSSA) {
BasicBlock *CurrentDominator = nullptr;
MemoryUseOrDef &IMA = *MSSA.getMemoryAccess(I);
BatchAAResults AA(MSSA.getAA());

SmallPtrSet<MemoryAccess *, 8> Visited;

auto AsMemoryAccess = [](User *U) { return cast<MemoryAccess>(U); };
SmallVector<MemoryAccess *> WorkList(map_range(IMA.users(), AsMemoryAccess));

while (!WorkList.empty()) {
MemoryAccess *MA = WorkList.pop_back_val();
if (!Visited.insert(MA).second)
continue;

if (Visited.size() > MoveAutoInitThreshold)
return nullptr;

bool FoundClobberingUser = false;
if (auto *M = dyn_cast<MemoryUseOrDef>(MA)) {
Instruction *MI = M->getMemoryInst();

// If this memory instruction may not clobber `I`, we can skip it.
// LifetimeEnd is a valid user, but we do not want it in the user
// dominator.
if (AA.getModRefInfo(MI, ML) != ModRefInfo::NoModRef &&
!MI->isLifetimeStartOrEnd() && MI != I) {
FoundClobberingUser = true;
CurrentDominator = CurrentDominator
? DT.findNearestCommonDominator(CurrentDominator,
MI->getParent())
: MI->getParent();
}
}
if (!FoundClobberingUser) {
auto UsersAsMemoryAccesses = map_range(MA->users(), AsMemoryAccess);
append_range(WorkList, UsersAsMemoryAccesses);
}
}
return CurrentDominator;
}

static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) {
BasicBlock &EntryBB = F.getEntryBlock();
SmallVector<std::pair<Instruction *, BasicBlock *>> JobList;

//
// Compute movable instructions.
//
for (Instruction &I : EntryBB) {
if (!hasAutoInitMetadata(I))
continue;

std::optional<MemoryLocation> ML = writeToAlloca(I);
if (!ML)
continue;

if (I.isVolatile())
continue;

BasicBlock *UsersDominator = usersDominator(ML.value(), &I, DT, MSSA);
if (!UsersDominator)
continue;

if (UsersDominator == &EntryBB)
continue;

// Traverse the CFG to detect cycles `UsersDominator` would be part of.
SmallPtrSet<BasicBlock *, 8> TransitiveSuccessors;
SmallVector<BasicBlock *> WorkList(successors(UsersDominator));
bool HasCycle = false;
while (!WorkList.empty()) {
BasicBlock *CurrBB = WorkList.pop_back_val();
if (CurrBB == UsersDominator)
// No early exit because we want to compute the full set of transitive
// successors.
HasCycle = true;
for (BasicBlock *Successor : successors(CurrBB)) {
if (!TransitiveSuccessors.insert(Successor).second)
continue;
WorkList.push_back(Successor);
}
}

// Don't insert if that could create multiple execution of I,
// but we can insert it in the non back-edge predecessors, if it exists.
if (HasCycle) {
BasicBlock *UsersDominatorHead = UsersDominator;
while (BasicBlock *UniquePredecessor =
UsersDominatorHead->getUniquePredecessor())
UsersDominatorHead = UniquePredecessor;

if (UsersDominatorHead == &EntryBB)
continue;

BasicBlock *DominatingPredecessor = nullptr;
for (BasicBlock *Pred : predecessors(UsersDominatorHead)) {
// If one of the predecessor of the dominator also transitively is a
// successor, moving to the dominator would do the inverse of loop
// hoisting, and we don't want that.
if (TransitiveSuccessors.count(Pred))
continue;

DominatingPredecessor =
DominatingPredecessor
? DT.findNearestCommonDominator(DominatingPredecessor, Pred)
: Pred;
}

if (!DominatingPredecessor || DominatingPredecessor == &EntryBB)
continue;

UsersDominator = DominatingPredecessor;
}

// CatchSwitchInst blocks can only have one instruction, so they are not
// good candidates for insertion.
while (isa<CatchSwitchInst>(UsersDominator->getFirstInsertionPt())) {
for (BasicBlock *Pred : predecessors(UsersDominator))
UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred);
}

// We finally found a place where I can be moved while not introducing extra
// execution, and guarded by at least one condition.
if (UsersDominator != &EntryBB)
JobList.emplace_back(&I, UsersDominator);
}

//
// Perform the actual substitution.
//
if (JobList.empty())
return false;

MemorySSAUpdater MSSAU(&MSSA);

// Reverse insertion to respect relative order between instructions:
// if two instructions are moved from the same BB to the same BB, we insert
// the second one in the front, then the first on top of it.
for (auto &Job : reverse(JobList)) {
Job.first->moveBefore(&*Job.second->getFirstInsertionPt());
MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
MemorySSA::InsertionPlace::Beginning);
}

if (VerifyMemorySSA)
MSSA.verifyMemorySSA();

NumMoved += JobList.size();

return true;
}

PreservedAnalyses MoveAutoInitPass::run(Function &F,
FunctionAnalysisManager &AM) {

auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
if (!runMoveAutoInit(F, DT, MSSA))
return PreservedAnalyses::all();

PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<MemorySSAAnalysis>();
PA.preserveSet<CFGAnalyses>();
return PA;
}
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-defaults.ll
Expand Up @@ -205,6 +205,7 @@
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-lto-defaults.ll
Expand Up @@ -106,6 +106,7 @@
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
Expand Up @@ -146,6 +146,7 @@
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
Expand Up @@ -133,6 +133,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass
Expand Down
Expand Up @@ -140,6 +140,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
Expand Up @@ -146,6 +146,7 @@
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
Expand Up @@ -171,6 +171,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass
Expand Down
Expand Up @@ -136,6 +136,7 @@
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
; CHECK-O23SZ-NEXT: Running pass: DSEPass
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
; CHECK-O23SZ-NEXT: Running pass: LICMPass
Expand Down

0 comments on commit afa13ba

Please sign in to comment.