| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,341 @@ | ||
| //===- TLSVariableHoist.cpp -------- Remove Redundant TLS Loads ---------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This pass identifies/eliminate Redundant TLS Loads if related option is set. | ||
| // The example: Please refer to the comment at the head of TLSVariableHoist.h. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "llvm/ADT/SmallVector.h" | ||
| #include "llvm/IR/BasicBlock.h" | ||
| #include "llvm/IR/Dominators.h" | ||
| #include "llvm/IR/Function.h" | ||
| #include "llvm/IR/InstrTypes.h" | ||
| #include "llvm/IR/Instruction.h" | ||
| #include "llvm/IR/Instructions.h" | ||
| #include "llvm/IR/IntrinsicInst.h" | ||
| #include "llvm/IR/Module.h" | ||
| #include "llvm/IR/Value.h" | ||
| #include "llvm/InitializePasses.h" | ||
| #include "llvm/Pass.h" | ||
| #include "llvm/Support/Casting.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/raw_ostream.h" | ||
| #include "llvm/Transforms/Scalar.h" | ||
| #include "llvm/Transforms/Scalar/TLSVariableHoist.h" | ||
| #include <algorithm> | ||
| #include <cassert> | ||
| #include <cstdint> | ||
| #include <iterator> | ||
| #include <tuple> | ||
| #include <utility> | ||
|
|
||
| using namespace llvm; | ||
| using namespace tlshoist; | ||
|
|
||
| #define DEBUG_TYPE "tlshoist" | ||
|
|
||
| // TODO: Support "strict" model if we need to strictly load TLS address, | ||
| // because "non-optimize" may also do some optimization in other passes. | ||
| static cl::opt<std::string> TLSLoadHoist( | ||
| "tls-load-hoist", | ||
| cl::desc( | ||
| "hoist the TLS loads in PIC model: " | ||
| "tls-load-hoist=optimize: Eleminate redundant TLS load(s)." | ||
| "tls-load-hoist=strict: Strictly load TLS address before every use." | ||
| "tls-load-hoist=non-optimize: Generally load TLS before use(s)."), | ||
| cl::init("non-optimize"), cl::Hidden); | ||
|
|
||
| namespace { | ||
|
|
||
| /// The TLS Variable hoist pass. | ||
| class TLSVariableHoistLegacyPass : public FunctionPass { | ||
| public: | ||
| static char ID; // Pass identification, replacement for typeid | ||
|
|
||
| TLSVariableHoistLegacyPass() : FunctionPass(ID) { | ||
| initializeTLSVariableHoistLegacyPassPass(*PassRegistry::getPassRegistry()); | ||
| } | ||
|
|
||
| bool runOnFunction(Function &Fn) override; | ||
|
|
||
| StringRef getPassName() const override { return "TLS Variable Hoist"; } | ||
|
|
||
| void getAnalysisUsage(AnalysisUsage &AU) const override { | ||
| AU.setPreservesCFG(); | ||
| AU.addRequired<DominatorTreeWrapperPass>(); | ||
| AU.addRequired<LoopInfoWrapperPass>(); | ||
| } | ||
|
|
||
| private: | ||
| TLSVariableHoistPass Impl; | ||
| }; | ||
|
|
||
| } // end anonymous namespace | ||
|
|
||
| char TLSVariableHoistLegacyPass::ID = 0; | ||
|
|
||
| INITIALIZE_PASS_BEGIN(TLSVariableHoistLegacyPass, "tlshoist", | ||
| "TLS Variable Hoist", false, false) | ||
| INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) | ||
| INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) | ||
| INITIALIZE_PASS_END(TLSVariableHoistLegacyPass, "tlshoist", | ||
| "TLS Variable Hoist", false, false) | ||
|
|
||
| FunctionPass *llvm::createTLSVariableHoistPass() { | ||
| return new TLSVariableHoistLegacyPass(); | ||
| } | ||
|
|
||
| /// Perform the TLS Variable Hoist optimization for the given function. | ||
| bool TLSVariableHoistLegacyPass::runOnFunction(Function &Fn) { | ||
| if (skipFunction(Fn)) | ||
| return false; | ||
|
|
||
| LLVM_DEBUG(dbgs() << "********** Begin TLS Variable Hoist **********\n"); | ||
| LLVM_DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); | ||
|
|
||
| bool MadeChange = | ||
| Impl.runImpl(Fn, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), | ||
| getAnalysis<LoopInfoWrapperPass>().getLoopInfo()); | ||
|
|
||
| if (MadeChange) { | ||
| LLVM_DEBUG(dbgs() << "********** Function after TLS Variable Hoist: " | ||
| << Fn.getName() << '\n'); | ||
| LLVM_DEBUG(dbgs() << Fn); | ||
| } | ||
| LLVM_DEBUG(dbgs() << "********** End TLS Variable Hoist **********\n"); | ||
|
|
||
| return MadeChange; | ||
| } | ||
|
|
||
| void TLSVariableHoistPass::collectTLSCandidate(Instruction *Inst) { | ||
| // Skip all cast instructions. They are visited indirectly later on. | ||
| if (Inst->isCast()) | ||
| return; | ||
|
|
||
| // Scan all operands. | ||
| for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) { | ||
| auto *GV = dyn_cast<GlobalVariable>(Inst->getOperand(Idx)); | ||
| if (!GV || !GV->isThreadLocal()) | ||
| continue; | ||
|
|
||
| // Add Candidate to TLSCandMap (GV --> Candidate). | ||
| TLSCandMap[GV].addUser(Inst, Idx); | ||
| } | ||
| } | ||
|
|
||
| void TLSVariableHoistPass::collectTLSCandidates(Function &Fn) { | ||
| // First, quickly check if there is TLS Variable. | ||
| Module *M = Fn.getParent(); | ||
|
|
||
| bool HasTLS = llvm::any_of( | ||
| M->globals(), [](GlobalVariable &GV) { return GV.isThreadLocal(); }); | ||
|
|
||
| // If non, directly return. | ||
| if (!HasTLS) | ||
| return; | ||
|
|
||
| TLSCandMap.clear(); | ||
|
|
||
| // Then, collect TLS Variable info. | ||
| for (BasicBlock &BB : Fn) { | ||
| // Ignore unreachable basic blocks. | ||
| if (!DT->isReachableFromEntry(&BB)) | ||
| continue; | ||
|
|
||
| for (Instruction &Inst : BB) | ||
| collectTLSCandidate(&Inst); | ||
| } | ||
| } | ||
|
|
||
| static bool OneUseOutsideLoop(tlshoist::TLSCandidate &Cand, LoopInfo *LI) { | ||
| if (Cand.Users.size() != 1) | ||
| return false; | ||
|
|
||
| BasicBlock *BB = Cand.Users[0].Inst->getParent(); | ||
| if (LI && LI->getLoopFor(BB)) | ||
| return false; | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| BasicBlock::iterator | ||
| TLSVariableHoistPass::findInsertPosInEntry(Function &Fn, | ||
| tlshoist::TLSCandidate &Cand) { | ||
| BasicBlock &Entry = Fn.getEntryBlock(); | ||
|
|
||
| // The Entry BB is usually small, let quickly check if TLS used in it. | ||
| // If there is, directly use the first user as insert position. | ||
| for (auto &I : Entry) { | ||
| Instruction *Inst = &I; | ||
| bool UsedInEntry = llvm::any_of( | ||
| Cand.Users, [=](tlshoist::TLSUser &User) { return User.Inst == Inst; }); | ||
| if (UsedInEntry) | ||
| return Inst->getIterator(); | ||
| } | ||
|
|
||
| Instruction *Term = Entry.getTerminator(); | ||
| if (Term) | ||
| return Term->getIterator(); | ||
|
|
||
| // Entry is empty. | ||
| return Entry.end(); | ||
| } | ||
|
|
||
| Instruction *TLSVariableHoistPass::getNearestLoopDomInst(BasicBlock *BB) { | ||
| Loop *L = LI->getLoopFor(BB); | ||
| assert(L && "Unexcepted Loop status!"); | ||
|
|
||
| // Get the outmost loop. | ||
| while (Loop *Parent = L->getParentLoop()) | ||
| L = Parent; | ||
|
|
||
| BasicBlock *PreHeader = L->getLoopPredecessor(); | ||
|
|
||
| // There is unique predecessor outside the loop. | ||
| // Note the terminator maybe nullptr, because the PreHeader maybe an empty BB. | ||
| if (PreHeader) | ||
| return PreHeader->getTerminator(); | ||
|
|
||
| BasicBlock *Header = L->getHeader(); | ||
| BasicBlock *Dom = Header; | ||
| for (BasicBlock *PredBB : predecessors(Header)) | ||
| Dom = DT->findNearestCommonDominator(Dom, PredBB); | ||
|
|
||
| assert(Dom && "Not find dominator BB!"); | ||
| Instruction *Term = Dom->getTerminator(); | ||
|
|
||
| assert(Term && "Not find terminator instruction!"); | ||
| return Term; | ||
| } | ||
|
|
||
| Instruction *TLSVariableHoistPass::getDomInst(Instruction *I1, | ||
| Instruction *I2) { | ||
| if (!I1) | ||
| return I2; | ||
| if (DT->dominates(I1, I2)) | ||
| return I1; | ||
| if (DT->dominates(I2, I1)) | ||
| return I2; | ||
|
|
||
| // If there is no dominance relation, use common dominator. | ||
| BasicBlock *DomBB = | ||
| DT->findNearestCommonDominator(I1->getParent(), I2->getParent()); | ||
|
|
||
| Instruction *Dom = DomBB->getTerminator(); | ||
| assert(Dom && "Common dominator not found!"); | ||
|
|
||
| return Dom; | ||
| } | ||
|
|
||
| BasicBlock::iterator TLSVariableHoistPass::findInsertPos(Function &Fn, | ||
| GlobalVariable *GV, | ||
| BasicBlock *&PosBB) { | ||
| tlshoist::TLSCandidate &Cand = TLSCandMap[GV]; | ||
| if (!DT) | ||
| return findInsertPosInEntry(Fn, Cand); | ||
|
|
||
| // We should hoist the TLS use out of loop, so choose its nearest instruction | ||
| // which dominate the loop and the outside loops (if exist). | ||
| Instruction *LastPos = nullptr; | ||
| for (auto &User : Cand.Users) { | ||
| BasicBlock *BB = User.Inst->getParent(); | ||
| Instruction *Pos = User.Inst; | ||
| if (LI && LI->getLoopFor(BB)) { | ||
| Pos = getNearestLoopDomInst(BB); | ||
| // The dominator of loop is empty BB, that rarely happened, so let | ||
| // things be easy, directly insert in entry BB. | ||
| if (!Pos) | ||
| return findInsertPosInEntry(Fn, Cand); | ||
| } | ||
| Pos = getDomInst(LastPos, Pos); | ||
| LastPos = Pos; | ||
| } | ||
|
|
||
| assert(LastPos && "Unexpected insert position!"); | ||
| BasicBlock *Parent = LastPos->getParent(); | ||
| PosBB = Parent; | ||
| return LastPos->getIterator(); | ||
| } | ||
|
|
||
| // Generate a bitcast (no type change) to replace the uses of TLS Candidate. | ||
| Instruction *TLSVariableHoistPass::genBitCastInst(Function &Fn, | ||
| GlobalVariable *GV) { | ||
| BasicBlock *PosBB = &Fn.getEntryBlock(); | ||
| BasicBlock::iterator Iter = findInsertPos(Fn, GV, PosBB); | ||
| Type *Ty = GV->getType(); | ||
| auto *CastInst = new BitCastInst(GV, Ty, "tls_bitcast"); | ||
| PosBB->getInstList().insert(Iter, CastInst); | ||
| return CastInst; | ||
| } | ||
|
|
||
| bool TLSVariableHoistPass::tryReplaceTLSCandidate(Function &Fn, | ||
| GlobalVariable *GV) { | ||
|
|
||
| tlshoist::TLSCandidate &Cand = TLSCandMap[GV]; | ||
|
|
||
| // If only used 1 time and not in loops, we no need to replace it. | ||
| if (OneUseOutsideLoop(Cand, LI)) | ||
| return false; | ||
|
|
||
| // Generate a bitcast (no type change) | ||
| auto *CastInst = genBitCastInst(Fn, GV); | ||
|
|
||
| // to replace the uses of TLS Candidate | ||
| for (auto &User : Cand.Users) | ||
| User.Inst->setOperand(User.OpndIdx, CastInst); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| bool TLSVariableHoistPass::tryReplaceTLSCandidates(Function &Fn) { | ||
| if (TLSCandMap.empty()) | ||
| return false; | ||
|
|
||
| bool Replaced = false; | ||
| for (auto &GV2Cand : TLSCandMap) { | ||
| GlobalVariable *GV = GV2Cand.first; | ||
| Replaced |= tryReplaceTLSCandidate(Fn, GV); | ||
| } | ||
|
|
||
| return Replaced; | ||
| } | ||
|
|
||
| /// Optimize expensive TLS variables in the given function. | ||
| bool TLSVariableHoistPass::runImpl(Function &Fn, DominatorTree &DT, | ||
| LoopInfo &LI) { | ||
| if (Fn.hasOptNone()) | ||
| return false; | ||
|
|
||
| if (TLSLoadHoist != "optimize" && | ||
| !Fn.getAttributes().hasFnAttr("tls-load-hoist")) | ||
| return false; | ||
|
|
||
| this->LI = &LI; | ||
| this->DT = &DT; | ||
| // Collect all TLS variable candidates. | ||
| collectTLSCandidates(Fn); | ||
|
|
||
| bool MadeChange = tryReplaceTLSCandidates(Fn); | ||
|
|
||
| return MadeChange; | ||
| } | ||
|
|
||
| PreservedAnalyses TLSVariableHoistPass::run(Function &F, | ||
| FunctionAnalysisManager &AM) { | ||
|
|
||
| auto &LI = AM.getResult<LoopAnalysis>(F); | ||
| auto &DT = AM.getResult<DominatorTreeAnalysis>(F); | ||
|
|
||
| if (!runImpl(F, DT, LI)) | ||
| return PreservedAnalyses::all(); | ||
|
|
||
| PreservedAnalyses PA; | ||
| PA.preserveSet<CFGAnalyses>(); | ||
| return PA; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,248 @@ | ||
| ; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --tls-load-hoist=optimize --stop-after=tlshoist -o - %s | FileCheck %s | ||
| ; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --stop-after=tlshoist -o - %s | FileCheck %s | ||
|
|
||
| ; This test come from compiling clang/test/CodeGen/intel/tls_loads.cpp with: | ||
| ; (clang tls_loads.cpp -fPIC -ftls-model=global-dynamic -O2 -S -emit-llvm) | ||
|
|
||
| ; // Variable declaration and definition: | ||
| ; thread_local int thl_x; | ||
| ; thread_local int thl_x2; | ||
| ; | ||
| ; struct SS { | ||
| ; char thl_c; | ||
| ; int num; | ||
| ; }; | ||
| ; | ||
| ; int gfunc(); | ||
| ; int gfunc2(int); | ||
|
|
||
| ; // First function (@_Z2f1i): | ||
| ; int f1(int c) { | ||
| ; while (c) | ||
| ; c++; | ||
| ; | ||
| ; int *px = &thl_x; | ||
| ; c -= gfunc(); | ||
| ; | ||
| ; while(c++) { | ||
| ; c = gfunc(); | ||
| ; while (c--) | ||
| ; *px += gfunc2(thl_x2); | ||
| ; } | ||
| ; return *px; | ||
| ; } | ||
|
|
||
| $_ZTW5thl_x = comdat any | ||
|
|
||
| $_ZTW6thl_x2 = comdat any | ||
|
|
||
| @thl_x = thread_local global i32 0, align 4 | ||
| @thl_x2 = thread_local global i32 0, align 4 | ||
| @_ZZ2f2iE2st.0 = internal thread_local unnamed_addr global i8 0, align 4 | ||
| @_ZZ2f2iE2st.1 = internal thread_local unnamed_addr global i32 0, align 4 | ||
|
|
||
| ; Function Attrs: mustprogress uwtable | ||
| define noundef i32 @_Z2f1i(i32 noundef %c) local_unnamed_addr #0 { | ||
| ; CHECK-LABEL: _Z2f1i | ||
| ; CHECK: entry: | ||
| ; CHECK-NEXT: %call = tail call noundef i32 @_Z5gfuncv() | ||
| ; CHECK-NEXT: %phi.cmp = icmp eq i32 %call, 0 | ||
| ; CHECK-NEXT: %tls_bitcast1 = bitcast i32* @thl_x to i32* | ||
| ; CHECK-NEXT: br i1 %phi.cmp, label %while.end11, label %while.body4.preheader | ||
|
|
||
| ; CHECK: while.body4.preheader: | ||
| ; CHECK-NEXT: %tls_bitcast = bitcast i32* @thl_x2 to i32* | ||
| ; CHECK-NEXT: br label %while.body4 | ||
|
|
||
| ; CHECK: while.body4: | ||
| ; CHECK-NEXT: %call5 = tail call noundef i32 @_Z5gfuncv() | ||
| ; CHECK-NEXT: %tobool7.not18 = icmp eq i32 %call5, 0 | ||
| ; CHECK-NEXT: br i1 %tobool7.not18, label %while.body4.backedge, label %while.body8.preheader | ||
|
|
||
| ; CHECK: while.body8.preheader: | ||
| ; CHECK-NEXT: br label %while.body8 | ||
|
|
||
| ; CHECK: while.body4.backedge.loopexit: | ||
| ; CHECK-NEXT: br label %while.body4.backedge | ||
|
|
||
| ; CHECK: while.body4.backedge: | ||
| ; CHECK-NEXT: br label %while.body4, !llvm.loop !4 | ||
|
|
||
| ; CHECK: while.body8: | ||
| ; CHECK-NEXT: %c.addr.219 = phi i32 [ %dec, %while.body8 ], [ %call5, %while.body8.preheader ] | ||
| ; CHECK-NEXT: %dec = add i32 %c.addr.219, -1 | ||
| ; CHECK-NEXT: %0 = load i32, i32* %tls_bitcast, align 4 | ||
| ; CHECK-NEXT: %call9 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0) | ||
| ; CHECK-NEXT: %1 = load i32, i32* %tls_bitcast1, align 4 | ||
| ; CHECK-NEXT: %add = add nsw i32 %1, %call9 | ||
| ; CHECK-NEXT: store i32 %add, i32* %tls_bitcast1, align 4 | ||
| ; CHECK-NEXT: %tobool7.not = icmp eq i32 %dec, 0 | ||
| ; CHECK-NEXT: br i1 %tobool7.not, label %while.body4.backedge.loopexit, label %while.body8, !llvm.loop !4 | ||
|
|
||
| ; CHECK: while.end11: | ||
| ; CHECK-NEXT: %2 = load i32, i32* %tls_bitcast1, align 4 | ||
| ; CHECK-NEXT: ret i32 %2 | ||
|
|
||
| entry: | ||
| %call = tail call noundef i32 @_Z5gfuncv() | ||
| %phi.cmp = icmp eq i32 %call, 0 | ||
| br i1 %phi.cmp, label %while.end11, label %while.body4 | ||
|
|
||
| while.body4: ; preds = %entry, %while.body4.backedge | ||
| %call5 = tail call noundef i32 @_Z5gfuncv() | ||
| %tobool7.not18 = icmp eq i32 %call5, 0 | ||
| br i1 %tobool7.not18, label %while.body4.backedge, label %while.body8 | ||
|
|
||
| while.body4.backedge: ; preds = %while.body8, %while.body4 | ||
| br label %while.body4, !llvm.loop !4 | ||
|
|
||
| while.body8: ; preds = %while.body4, %while.body8 | ||
| %c.addr.219 = phi i32 [ %dec, %while.body8 ], [ %call5, %while.body4 ] | ||
| %dec = add nsw i32 %c.addr.219, -1 | ||
| %0 = load i32, i32* @thl_x2, align 4 | ||
| %call9 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0) | ||
| %1 = load i32, i32* @thl_x, align 4 | ||
| %add = add nsw i32 %1, %call9 | ||
| store i32 %add, i32* @thl_x, align 4 | ||
| %tobool7.not = icmp eq i32 %dec, 0 | ||
| br i1 %tobool7.not, label %while.body4.backedge, label %while.body8, !llvm.loop !4 | ||
|
|
||
| while.end11: ; preds = %entry | ||
| %2 = load i32, i32* @thl_x, align 4 | ||
| ret i32 %2 | ||
| } | ||
|
|
||
| ; // Sencond function (@_Z2f2i): | ||
| ; int f2(int c) { | ||
| ; thread_local struct SS st; | ||
| ; c += gfunc(); | ||
| ; while (c--) { | ||
| ; thl_x += gfunc(); | ||
| ; st.thl_c += (char)gfunc(); | ||
| ; st.num += gfunc(); | ||
| ; } | ||
| ; return thl_x; | ||
| ; } | ||
| declare noundef i32 @_Z5gfuncv() local_unnamed_addr #1 | ||
|
|
||
| declare noundef i32 @_Z6gfunc2i(i32 noundef) local_unnamed_addr #1 | ||
|
|
||
| ; Function Attrs: mustprogress uwtable | ||
| define noundef i32 @_Z2f2i(i32 noundef %c) local_unnamed_addr #0 { | ||
| ; CHECK-LABEL: _Z2f2i | ||
| ; CHECK: entry: | ||
| ; CHECK-NEXT: %call = tail call noundef i32 @_Z5gfuncv() | ||
| ; CHECK-NEXT: %add = add nsw i32 %call, %c | ||
| ; CHECK-NEXT: %tobool.not12 = icmp eq i32 %add, 0 | ||
| ; CHECK-NEXT: %tls_bitcast = bitcast i32* @thl_x to i32* | ||
| ; CHECK-NEXT: br i1 %tobool.not12, label %while.end, label %while.body.preheader | ||
|
|
||
| ; CHECK: while.body.preheader: | ||
| ; CHECK-NEXT: %tls_bitcast1 = bitcast i8* @_ZZ2f2iE2st.0 to i8* | ||
| ; CHECK-NEXT: %tls_bitcast2 = bitcast i32* @_ZZ2f2iE2st.1 to i32* | ||
| ; CHECK-NEXT: br label %while.body | ||
|
|
||
| ; CHECK: while.body: | ||
| ; CHECK-NEXT: %c.addr.013 = phi i32 [ %dec, %while.body ], [ %add, %while.body.preheader ] | ||
| ; CHECK-NEXT: %dec = add i32 %c.addr.013, -1 | ||
| ; CHECK-NEXT: %call1 = tail call noundef i32 @_Z5gfuncv() | ||
| ; CHECK-NEXT: %0 = load i32, i32* %tls_bitcast, align 4 | ||
| ; CHECK-NEXT: %add2 = add nsw i32 %0, %call1 | ||
| ; CHECK-NEXT: store i32 %add2, i32* %tls_bitcast, align 4 | ||
| ; CHECK-NEXT: %call3 = tail call noundef i32 @_Z5gfuncv() | ||
| ; CHECK-NEXT: %1 = load i8, i8* %tls_bitcast1, align 4 | ||
| ; CHECK-NEXT: %2 = trunc i32 %call3 to i8 | ||
| ; CHECK-NEXT: %conv7 = add i8 %1, %2 | ||
| ; CHECK-NEXT: store i8 %conv7, i8* %tls_bitcast1, align 4 | ||
| ; CHECK-NEXT: %call8 = tail call noundef i32 @_Z5gfuncv() | ||
| ; CHECK-NEXT: %3 = load i32, i32* %tls_bitcast2, align 4 | ||
| ; CHECK-NEXT: %add9 = add nsw i32 %3, %call8 | ||
| ; CHECK-NEXT: store i32 %add9, i32* %tls_bitcast2, align 4 | ||
| ; CHECK-NEXT: %tobool.not = icmp eq i32 %dec, 0 | ||
| ; CHECK-NEXT: br i1 %tobool.not, label %while.end.loopexit, label %while.body | ||
|
|
||
| ; CHECK: while.end.loopexit: | ||
| ; CHECK-NEXT: br label %while.end | ||
|
|
||
| ; CHECK: while.end: | ||
| ; CHECK-NEXT: %4 = load i32, i32* %tls_bitcast, align 4 | ||
| ; CHECK-NEXT: ret i32 %4 | ||
| entry: | ||
| %call = tail call noundef i32 @_Z5gfuncv() | ||
| %add = add nsw i32 %call, %c | ||
| %tobool.not12 = icmp eq i32 %add, 0 | ||
| br i1 %tobool.not12, label %while.end, label %while.body | ||
|
|
||
| while.body: ; preds = %entry, %while.body | ||
| %c.addr.013 = phi i32 [ %dec, %while.body ], [ %add, %entry ] | ||
| %dec = add nsw i32 %c.addr.013, -1 | ||
| %call1 = tail call noundef i32 @_Z5gfuncv() | ||
| %0 = load i32, i32* @thl_x, align 4 | ||
| %add2 = add nsw i32 %0, %call1 | ||
| store i32 %add2, i32* @thl_x, align 4 | ||
| %call3 = tail call noundef i32 @_Z5gfuncv() | ||
| %1 = load i8, i8* @_ZZ2f2iE2st.0, align 4 | ||
| %2 = trunc i32 %call3 to i8 | ||
| %conv7 = add i8 %1, %2 | ||
| store i8 %conv7, i8* @_ZZ2f2iE2st.0, align 4 | ||
| %call8 = tail call noundef i32 @_Z5gfuncv() | ||
| %3 = load i32, i32* @_ZZ2f2iE2st.1, align 4 | ||
| %add9 = add nsw i32 %3, %call8 | ||
| store i32 %add9, i32* @_ZZ2f2iE2st.1, align 4 | ||
| %tobool.not = icmp eq i32 %dec, 0 | ||
| br i1 %tobool.not, label %while.end, label %while.body | ||
|
|
||
| while.end: ; preds = %while.body, %entry | ||
| %4 = load i32, i32* @thl_x, align 4 | ||
| ret i32 %4 | ||
| } | ||
|
|
||
| ; // Third function (@_Z2f3i): | ||
| ; int f3(int c) { | ||
| ; int *px = &thl_x; | ||
| ; gfunc2(*px); | ||
| ; gfunc2(*px); | ||
| ; return 1; | ||
| ; } | ||
|
|
||
| ; Function Attrs: mustprogress uwtable | ||
| define noundef i32 @_Z2f3i(i32 noundef %c) local_unnamed_addr #0 { | ||
| ; CHECK-LABEL: _Z2f3i | ||
| ; CHECK: entry: | ||
| ; CHECK-NEXT: %tls_bitcast = bitcast i32* @thl_x to i32* | ||
| ; CHECK-NEXT: %0 = load i32, i32* %tls_bitcast, align 4 | ||
| ; CHECK-NEXT: %call = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0) | ||
| ; CHECK-NEXT: %1 = load i32, i32* %tls_bitcast, align 4 | ||
| ; CHECK-NEXT: %call1 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %1) | ||
| ; CHECK-NEXT: ret i32 1 | ||
| entry: | ||
| %0 = load i32, i32* @thl_x, align 4 | ||
| %call = tail call noundef i32 @_Z6gfunc2i(i32 noundef %0) | ||
| %1 = load i32, i32* @thl_x, align 4 | ||
| %call1 = tail call noundef i32 @_Z6gfunc2i(i32 noundef %1) | ||
| ret i32 1 | ||
| } | ||
|
|
||
| ; Function Attrs: uwtable | ||
| define weak_odr hidden noundef i32* @_ZTW5thl_x() local_unnamed_addr #2 comdat { | ||
| ret i32* @thl_x | ||
| } | ||
|
|
||
| ; Function Attrs: uwtable | ||
| define weak_odr hidden noundef i32* @_ZTW6thl_x2() local_unnamed_addr #2 comdat { | ||
| ret i32* @thl_x2 | ||
| } | ||
|
|
||
| attributes #0 = { mustprogress uwtable "tls-load-hoist" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
| attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
| attributes #2 = { uwtable "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
|
|
||
| !llvm.module.flags = !{!0, !1, !2} | ||
| !llvm.ident = !{!3} | ||
|
|
||
| !0 = !{i32 1, !"wchar_size", i32 4} | ||
| !1 = !{i32 7, !"PIC Level", i32 2} | ||
| !2 = !{i32 7, !"uwtable", i32 2} | ||
| !3 = !{!"clang version 15.0.0"} | ||
| !4 = distinct !{!4, !5} | ||
| !5 = !{!"llvm.loop.mustprogress"} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| ; RUN: opt -S -mtriple=x86_64-unknown-unknown -tlshoist --relocation-model=pic --tls-load-hoist=optimize -o - %s | FileCheck %s --check-prefix=HOIST0 | ||
| ; RUN: opt -S -mtriple=x86_64-unknown-unknown -tlshoist --relocation-model=pic --tls-load-hoist=non-optimize -o - %s | FileCheck %s --check-prefix=HOIST2 | ||
| ; RUN: opt -S -mtriple=x86_64-unknown-unknown -tlshoist --relocation-model=pic -o - %s | FileCheck %s --check-prefix=HOIST2 | ||
|
|
||
| $_ZTW5thl_x = comdat any | ||
|
|
||
| @thl_x = thread_local global i32 0, align 4 | ||
|
|
||
| ; Function Attrs: mustprogress uwtable | ||
| define i32 @_Z2f1i(i32 %c) local_unnamed_addr #0 { | ||
| entry: | ||
| %0 = load i32, i32* @thl_x, align 4 | ||
| %call = tail call i32 @_Z5gfunci(i32 %0) | ||
| %1 = load i32, i32* @thl_x, align 4 | ||
| %call1 = tail call i32 @_Z5gfunci(i32 %1) | ||
| ret i32 1 | ||
| } | ||
|
|
||
| ;HOIST0-LABEL: _Z2f1i | ||
| ;HOIST0: entry: | ||
| ;HOIST0-NEXT: %tls_bitcast = bitcast i32* @thl_x to i32* | ||
| ;HOIST0-NEXT: %0 = load i32, i32* %tls_bitcast, align 4 | ||
| ;HOIST0-NEXT: %call = tail call i32 @_Z5gfunci(i32 %0) | ||
| ;HOIST0-NEXT: %1 = load i32, i32* %tls_bitcast, align 4 | ||
| ;HOIST0-NEXT: %call1 = tail call i32 @_Z5gfunci(i32 %1) | ||
| ;HOIST0-NEXT: ret i32 1 | ||
|
|
||
| ;HOIST2-LABEL: _Z2f1i | ||
| ;HOIST2: entry: | ||
| ;HOIST2-NEXT: %0 = load i32, i32* @thl_x, align 4 | ||
| ;HOIST2-NEXT: %call = tail call i32 @_Z5gfunci(i32 %0) | ||
| ;HOIST2-NEXT: %1 = load i32, i32* @thl_x, align 4 | ||
| ;HOIST2-NEXT: %call1 = tail call i32 @_Z5gfunci(i32 %1) | ||
| ;HOIST2-NEXT: ret i32 1 | ||
|
|
||
| declare i32 @_Z5gfunci(i32) local_unnamed_addr #1 | ||
|
|
||
| ; Function Attrs: uwtable | ||
| define weak_odr hidden i32* @_ZTW5thl_x() local_unnamed_addr #2 comdat { | ||
| ret i32* @thl_x | ||
| } | ||
|
|
||
| attributes #0 = { mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
| attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
| attributes #2 = { uwtable "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
|
|
||
| !llvm.module.flags = !{!0, !1, !2} | ||
|
|
||
| !0 = !{i32 1, !"wchar_size", i32 4} | ||
| !1 = !{i32 7, !"PIC Level", i32 2} | ||
| !2 = !{i32 7, !"uwtable", i32 1} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,358 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
| ; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --tls-load-hoist=optimize -o - %s | FileCheck %s --check-prefix=HOIST0 | ||
| ; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --tls-load-hoist=non-optimize -o - %s | FileCheck %s --check-prefix=HOIST2 | ||
| ; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic -o - %s | FileCheck %s --check-prefix=HOIST2 | ||
|
|
||
| ; This test has no module flag {"tls-load-hoist", i32 0}, so use --tls-load-hoist=x | ||
| ; to choose the way of loading thread_local address. | ||
|
|
||
| ; This test come from compiling clang/test/CodeGen/intel/tls_loads.cpp with: | ||
| ; (clang tls_loads.cpp -fPIC -ftls-model=global-dynamic -O2 -S -emit-llvm) | ||
|
|
||
| $_ZTW5thl_x = comdat any | ||
|
|
||
| $_ZTW6thl_x2 = comdat any | ||
|
|
||
| @thl_x = thread_local global i32 0, align 4 | ||
| @thl_x2 = thread_local global i32 0, align 4 | ||
| @_ZZ2f2iE2st.0 = internal thread_local unnamed_addr global i8 0, align 4 | ||
| @_ZZ2f2iE2st.1 = internal thread_local unnamed_addr global i32 0, align 4 | ||
|
|
||
| ; For HOIST0, check call __tls_get_addr@PLT only one time for each thread_local variable. | ||
| ; For HOIST2, Check the default way: usually call __tls_get_addr@PLT every time when use thread_local variable. | ||
|
|
||
| ; Function Attrs: mustprogress uwtable | ||
| define i32 @_Z2f1i(i32 %c) local_unnamed_addr #0 { | ||
| ; HOIST0-LABEL: _Z2f1i: | ||
| ; HOIST0: # %bb.0: # %entry | ||
| ; HOIST0-NEXT: pushq %r15 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST0-NEXT: pushq %r14 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST0-NEXT: pushq %rbx | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 32 | ||
| ; HOIST0-NEXT: .cfi_offset %rbx, -32 | ||
| ; HOIST0-NEXT: .cfi_offset %r14, -24 | ||
| ; HOIST0-NEXT: .cfi_offset %r15, -16 | ||
| ; HOIST0-NEXT: movl %edi, %ebx | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: rex64 | ||
| ; HOIST0-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST0-NEXT: movq %rax, %r14 | ||
| ; HOIST0-NEXT: testl %ebx, %ebx | ||
| ; HOIST0-NEXT: je .LBB0_4 | ||
| ; HOIST0-NEXT: # %bb.1: # %while.body.preheader | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: leaq thl_x2@TLSGD(%rip), %rdi | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: rex64 | ||
| ; HOIST0-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST0-NEXT: movq %rax, %r15 | ||
| ; HOIST0-NEXT: .p2align 4, 0x90 | ||
| ; HOIST0-NEXT: .LBB0_2: # %while.body | ||
| ; HOIST0-NEXT: # =>This Inner Loop Header: Depth=1 | ||
| ; HOIST0-NEXT: movl (%r15), %edi | ||
| ; HOIST0-NEXT: callq _Z6gfunc2i@PLT | ||
| ; HOIST0-NEXT: addl (%r14), %eax | ||
| ; HOIST0-NEXT: movl %eax, (%r14) | ||
| ; HOIST0-NEXT: decl %ebx | ||
| ; HOIST0-NEXT: jne .LBB0_2 | ||
| ; HOIST0-NEXT: jmp .LBB0_3 | ||
| ; HOIST0-NEXT: .LBB0_4: # %entry.while.end_crit_edge | ||
| ; HOIST0-NEXT: movl (%r14), %eax | ||
| ; HOIST0-NEXT: .LBB0_3: # %while.end | ||
| ; HOIST0-NEXT: popq %rbx | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST0-NEXT: popq %r14 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST0-NEXT: popq %r15 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 8 | ||
| ; HOIST0-NEXT: retq | ||
| ; | ||
| ; HOIST2-LABEL: _Z2f1i: | ||
| ; HOIST2: # %bb.0: # %entry | ||
| ; HOIST2-NEXT: pushq %rbp | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST2-NEXT: pushq %rbx | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST2-NEXT: pushq %rax | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 32 | ||
| ; HOIST2-NEXT: .cfi_offset %rbx, -24 | ||
| ; HOIST2-NEXT: .cfi_offset %rbp, -16 | ||
| ; HOIST2-NEXT: testl %edi, %edi | ||
| ; HOIST2-NEXT: je .LBB0_4 | ||
| ; HOIST2-NEXT: # %bb.1: | ||
| ; HOIST2-NEXT: movl %edi, %ebx | ||
| ; HOIST2-NEXT: .p2align 4, 0x90 | ||
| ; HOIST2-NEXT: .LBB0_2: # %while.body | ||
| ; HOIST2-NEXT: # =>This Inner Loop Header: Depth=1 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: leaq thl_x2@TLSGD(%rip), %rdi | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: rex64 | ||
| ; HOIST2-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST2-NEXT: movl (%rax), %edi | ||
| ; HOIST2-NEXT: callq _Z6gfunc2i@PLT | ||
| ; HOIST2-NEXT: movl %eax, %ebp | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: rex64 | ||
| ; HOIST2-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST2-NEXT: addl (%rax), %ebp | ||
| ; HOIST2-NEXT: movl %ebp, (%rax) | ||
| ; HOIST2-NEXT: decl %ebx | ||
| ; HOIST2-NEXT: jne .LBB0_2 | ||
| ; HOIST2-NEXT: jmp .LBB0_3 | ||
| ; HOIST2-NEXT: .LBB0_4: # %entry.while.end_crit_edge | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: rex64 | ||
| ; HOIST2-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST2-NEXT: movl (%rax), %ebp | ||
| ; HOIST2-NEXT: .LBB0_3: # %while.end | ||
| ; HOIST2-NEXT: movl %ebp, %eax | ||
| ; HOIST2-NEXT: addq $8, %rsp | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST2-NEXT: popq %rbx | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST2-NEXT: popq %rbp | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 8 | ||
| ; HOIST2-NEXT: retq | ||
| entry: | ||
| %tobool.not3 = icmp eq i32 %c, 0 | ||
| br i1 %tobool.not3, label %entry.while.end_crit_edge, label %while.body | ||
|
|
||
| entry.while.end_crit_edge: ; preds = %entry | ||
| %.pre = load i32, i32* @thl_x, align 4 | ||
| br label %while.end | ||
|
|
||
| while.body: ; preds = %entry, %while.body | ||
| %c.addr.04 = phi i32 [ %dec, %while.body ], [ %c, %entry ] | ||
| %dec = add nsw i32 %c.addr.04, -1 | ||
| %0 = load i32, i32* @thl_x2, align 4 | ||
| %call = tail call i32 @_Z6gfunc2i(i32 %0) | ||
| %1 = load i32, i32* @thl_x, align 4 | ||
| %add = add nsw i32 %1, %call | ||
| store i32 %add, i32* @thl_x, align 4 | ||
| %tobool.not = icmp eq i32 %dec, 0 | ||
| br i1 %tobool.not, label %while.end, label %while.body | ||
|
|
||
| while.end: ; preds = %while.body, %entry.while.end_crit_edge | ||
| %2 = phi i32 [ %.pre, %entry.while.end_crit_edge ], [ %add, %while.body ] | ||
| ret i32 %2 | ||
| } | ||
|
|
||
| declare i32 @_Z6gfunc2i(i32) local_unnamed_addr #1 | ||
|
|
||
| ; Function Attrs: mustprogress uwtable | ||
| define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 { | ||
| ; HOIST0-LABEL: _Z2f2i: | ||
| ; HOIST0: # %bb.0: # %entry | ||
| ; HOIST0-NEXT: pushq %r15 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST0-NEXT: pushq %r14 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST0-NEXT: pushq %r12 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 32 | ||
| ; HOIST0-NEXT: pushq %rbx | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 40 | ||
| ; HOIST0-NEXT: pushq %rax | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 48 | ||
| ; HOIST0-NEXT: .cfi_offset %rbx, -40 | ||
| ; HOIST0-NEXT: .cfi_offset %r12, -32 | ||
| ; HOIST0-NEXT: .cfi_offset %r14, -24 | ||
| ; HOIST0-NEXT: .cfi_offset %r15, -16 | ||
| ; HOIST0-NEXT: movl %edi, %ebx | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: rex64 | ||
| ; HOIST0-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST0-NEXT: movq %rax, %r14 | ||
| ; HOIST0-NEXT: testl %ebx, %ebx | ||
| ; HOIST0-NEXT: je .LBB1_3 | ||
| ; HOIST0-NEXT: # %bb.1: # %while.body.preheader | ||
| ; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@TLSLD(%rip), %rdi | ||
| ; HOIST0-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST0-NEXT: movq %rax, %rcx | ||
| ; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@DTPOFF(%rax), %r15 | ||
| ; HOIST0-NEXT: leaq _ZZ2f2iE2st.1@DTPOFF(%rax), %r12 | ||
| ; HOIST0-NEXT: .p2align 4, 0x90 | ||
| ; HOIST0-NEXT: .LBB1_2: # %while.body | ||
| ; HOIST0-NEXT: # =>This Inner Loop Header: Depth=1 | ||
| ; HOIST0-NEXT: callq _Z5gfuncv@PLT | ||
| ; HOIST0-NEXT: addl %eax, (%r14) | ||
| ; HOIST0-NEXT: callq _Z5gfuncv@PLT | ||
| ; HOIST0-NEXT: addb %al, (%r15) | ||
| ; HOIST0-NEXT: callq _Z5gfuncv@PLT | ||
| ; HOIST0-NEXT: addl %eax, (%r12) | ||
| ; HOIST0-NEXT: decl %ebx | ||
| ; HOIST0-NEXT: jne .LBB1_2 | ||
| ; HOIST0-NEXT: .LBB1_3: # %while.end | ||
| ; HOIST0-NEXT: movl (%r14), %eax | ||
| ; HOIST0-NEXT: addq $8, %rsp | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 40 | ||
| ; HOIST0-NEXT: popq %rbx | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 32 | ||
| ; HOIST0-NEXT: popq %r12 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST0-NEXT: popq %r14 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST0-NEXT: popq %r15 | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 8 | ||
| ; HOIST0-NEXT: retq | ||
| ; | ||
| ; HOIST2-LABEL: _Z2f2i: | ||
| ; HOIST2: # %bb.0: # %entry | ||
| ; HOIST2-NEXT: pushq %rbp | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST2-NEXT: pushq %r14 | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST2-NEXT: pushq %rbx | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 32 | ||
| ; HOIST2-NEXT: .cfi_offset %rbx, -32 | ||
| ; HOIST2-NEXT: .cfi_offset %r14, -24 | ||
| ; HOIST2-NEXT: .cfi_offset %rbp, -16 | ||
| ; HOIST2-NEXT: testl %edi, %edi | ||
| ; HOIST2-NEXT: je .LBB1_3 | ||
| ; HOIST2-NEXT: # %bb.1: # %while.body.preheader | ||
| ; HOIST2-NEXT: movl %edi, %ebx | ||
| ; HOIST2-NEXT: .p2align 4, 0x90 | ||
| ; HOIST2-NEXT: .LBB1_2: # %while.body | ||
| ; HOIST2-NEXT: # =>This Inner Loop Header: Depth=1 | ||
| ; HOIST2-NEXT: callq _Z5gfuncv@PLT | ||
| ; HOIST2-NEXT: movl %eax, %ebp | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: rex64 | ||
| ; HOIST2-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST2-NEXT: addl %ebp, (%rax) | ||
| ; HOIST2-NEXT: callq _Z5gfuncv@PLT | ||
| ; HOIST2-NEXT: movl %eax, %ebp | ||
| ; HOIST2-NEXT: leaq _ZZ2f2iE2st.0@TLSLD(%rip), %rdi | ||
| ; HOIST2-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST2-NEXT: movq %rax, %r14 | ||
| ; HOIST2-NEXT: addb %bpl, _ZZ2f2iE2st.0@DTPOFF(%rax) | ||
| ; HOIST2-NEXT: callq _Z5gfuncv@PLT | ||
| ; HOIST2-NEXT: movl %eax, %ecx | ||
| ; HOIST2-NEXT: movq %r14, %rax | ||
| ; HOIST2-NEXT: addl %ecx, _ZZ2f2iE2st.1@DTPOFF(%r14) | ||
| ; HOIST2-NEXT: decl %ebx | ||
| ; HOIST2-NEXT: jne .LBB1_2 | ||
| ; HOIST2-NEXT: .LBB1_3: # %while.end | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: rex64 | ||
| ; HOIST2-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST2-NEXT: movl (%rax), %eax | ||
| ; HOIST2-NEXT: popq %rbx | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 24 | ||
| ; HOIST2-NEXT: popq %r14 | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST2-NEXT: popq %rbp | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 8 | ||
| ; HOIST2-NEXT: retq | ||
| entry: | ||
| %tobool.not9 = icmp eq i32 %c, 0 | ||
| br i1 %tobool.not9, label %while.end, label %while.body | ||
|
|
||
| while.body: ; preds = %entry, %while.body | ||
| %c.addr.010 = phi i32 [ %dec, %while.body ], [ %c, %entry ] | ||
| %dec = add nsw i32 %c.addr.010, -1 | ||
| %call = tail call i32 @_Z5gfuncv() | ||
| %0 = load i32, i32* @thl_x, align 4 | ||
| %add = add nsw i32 %0, %call | ||
| store i32 %add, i32* @thl_x, align 4 | ||
| %call1 = tail call i32 @_Z5gfuncv() | ||
| %1 = load i8, i8* @_ZZ2f2iE2st.0, align 4 | ||
| %2 = trunc i32 %call1 to i8 | ||
| %conv5 = add i8 %1, %2 | ||
| store i8 %conv5, i8* @_ZZ2f2iE2st.0, align 4 | ||
| %call6 = tail call i32 @_Z5gfuncv() | ||
| %3 = load i32, i32* @_ZZ2f2iE2st.1, align 4 | ||
| %add7 = add nsw i32 %3, %call6 | ||
| store i32 %add7, i32* @_ZZ2f2iE2st.1, align 4 | ||
| %tobool.not = icmp eq i32 %dec, 0 | ||
| br i1 %tobool.not, label %while.end, label %while.body | ||
|
|
||
| while.end: ; preds = %while.body, %entry | ||
| %4 = load i32, i32* @thl_x, align 4 | ||
| ret i32 %4 | ||
| } | ||
|
|
||
| declare i32 @_Z5gfuncv() local_unnamed_addr #1 | ||
|
|
||
| ; Function Attrs: mustprogress uwtable | ||
| define i32 @_Z2f3i(i32 %c) local_unnamed_addr #0 { | ||
| ; HOIST0-LABEL: _Z2f3i: | ||
| ; HOIST0: # %bb.0: # %entry | ||
| ; HOIST0-NEXT: pushq %rbx | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST0-NEXT: .cfi_offset %rbx, -16 | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: data16 | ||
| ; HOIST0-NEXT: rex64 | ||
| ; HOIST0-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST0-NEXT: movq %rax, %rbx | ||
| ; HOIST0-NEXT: movl (%rax), %edi | ||
| ; HOIST0-NEXT: callq _Z6gfunc2i@PLT | ||
| ; HOIST0-NEXT: movl (%rbx), %edi | ||
| ; HOIST0-NEXT: callq _Z6gfunc2i@PLT | ||
| ; HOIST0-NEXT: movl $1, %eax | ||
| ; HOIST0-NEXT: popq %rbx | ||
| ; HOIST0-NEXT: .cfi_def_cfa_offset 8 | ||
| ; HOIST0-NEXT: retq | ||
| ; | ||
| ; HOIST2-LABEL: _Z2f3i: | ||
| ; HOIST2: # %bb.0: # %entry | ||
| ; HOIST2-NEXT: pushq %rbx | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 16 | ||
| ; HOIST2-NEXT: .cfi_offset %rbx, -16 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: leaq thl_x@TLSGD(%rip), %rdi | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: data16 | ||
| ; HOIST2-NEXT: rex64 | ||
| ; HOIST2-NEXT: callq __tls_get_addr@PLT | ||
| ; HOIST2-NEXT: movq %rax, %rbx | ||
| ; HOIST2-NEXT: movl (%rax), %edi | ||
| ; HOIST2-NEXT: callq _Z6gfunc2i@PLT | ||
| ; HOIST2-NEXT: movl (%rbx), %edi | ||
| ; HOIST2-NEXT: callq _Z6gfunc2i@PLT | ||
| ; HOIST2-NEXT: movl $1, %eax | ||
| ; HOIST2-NEXT: popq %rbx | ||
| ; HOIST2-NEXT: .cfi_def_cfa_offset 8 | ||
| ; HOIST2-NEXT: retq | ||
| entry: | ||
| %0 = load i32, i32* @thl_x, align 4 | ||
| %call = tail call i32 @_Z6gfunc2i(i32 %0) | ||
| %1 = load i32, i32* @thl_x, align 4 | ||
| %call1 = tail call i32 @_Z6gfunc2i(i32 %1) | ||
| ret i32 1 | ||
| } | ||
|
|
||
| attributes #0 = { nounwind mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
| attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
| attributes #2 = { uwtable "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } | ||
|
|
||
| !llvm.module.flags = !{!0, !1, !2} | ||
|
|
||
| !0 = !{i32 1, !"wchar_size", i32 4} | ||
| !1 = !{i32 7, !"PIC Level", i32 2} | ||
| !2 = !{i32 7, !"uwtable", i32 1} |