diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index f2e00aab8d5da..4c2815679efc9 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -596,6 +596,42 @@ class TargetLoweringBase { /// avoided. bool isJumpExpensive() const { return JumpIsExpensive; } + // Costs parameters used by + // SelectionDAGBuilder::shouldKeepJumpConditionsTogether. + // shouldKeepJumpConditionsTogether will use these parameter value to + // determine if two conditions in the form `br (and/or cond1, cond2)` should + // be split into two branches or left as one. + // + // BaseCost is the cost threshold (in latency). If the estimated latency of + // computing both `cond1` and `cond2` is below the cost of just computing + // `cond1` + BaseCost, the two conditions will be kept together. Otherwise + // they will be split. + // + // LikelyBias increases BaseCost if branch probability info indicates that it + // is likely that both `cond1` and `cond2` will be computed. + // + // UnlikelyBias decreases BaseCost if branch probability info indicates that + // it is likely that both `cond1` and `cond2` will be computed. + // + // Set any field to -1 to make it ignored (setting BaseCost to -1 results in + // `shouldKeepJumpConditionsTogether` always returning false). + struct CondMergingParams { + int BaseCost; + int LikelyBias; + int UnlikelyBias; + }; + // Return params for deciding if we should keep two branch conditions merged + // or split them into two separate branches. + // Arg0: The binary op joining the two conditions (and/or). + // Arg1: The first condition (cond1) + // Arg2: The second condition (cond2) + virtual CondMergingParams + getJumpConditionMergingParams(Instruction::BinaryOps, const Value *, + const Value *) const { + // -1 will always result in splitting. + return {-1, -1, -1}; + } + /// Return true if selects are only cheaper than branches if the branch is /// unlikely to be predicted right. bool isPredictableSelectExpensive() const { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ab2f42d2024cc..48476b0ef9705 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" @@ -93,6 +94,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -2446,6 +2448,152 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, SL->SwitchCases.push_back(CB); } +// Collect dependencies on V recursively. This is used for the cost analysis in +// `shouldKeepJumpConditionsTogether`. +static bool collectInstructionDeps( + SmallMapVector *Deps, const Value *V, + SmallMapVector *Necessary = nullptr, + unsigned Depth = 0) { + // Return false if we have an incomplete count. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return false; + + auto *I = dyn_cast(V); + if (I == nullptr) + return true; + + if (Necessary != nullptr) { + // This instruction is necessary for the other side of the condition so + // don't count it. + if (Necessary->contains(I)) + return true; + } + + // Already added this dep. + if (!Deps->try_emplace(I, false).second) + return true; + + for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx) + if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, + Depth + 1)) + return false; + return true; +} + +bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( + const FunctionLoweringInfo &FuncInfo, const BranchInst &I, + Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, + TargetLoweringBase::CondMergingParams Params) const { + if (I.getNumSuccessors() != 2) + return false; + + if (!I.isConditional()) + return false; + + if (Params.BaseCost < 0) + return false; + + // Baseline cost. + InstructionCost CostThresh = Params.BaseCost; + + BranchProbabilityInfo *BPI = nullptr; + if (Params.LikelyBias || Params.UnlikelyBias) + BPI = FuncInfo.BPI; + if (BPI != nullptr) { + // See if we are either likely to get an early out or compute both lhs/rhs + // of the condition. + BasicBlock *IfFalse = I.getSuccessor(0); + BasicBlock *IfTrue = I.getSuccessor(1); + + std::optional Likely; + if (BPI->isEdgeHot(I.getParent(), IfTrue)) + Likely = true; + else if (BPI->isEdgeHot(I.getParent(), IfFalse)) + Likely = false; + + if (Likely) { + if (Opc == (*Likely ? Instruction::And : Instruction::Or)) + // Its likely we will have to compute both lhs and rhs of condition + CostThresh += Params.LikelyBias; + else { + if (Params.UnlikelyBias < 0) + return false; + // Its likely we will get an early out. + CostThresh -= Params.UnlikelyBias; + } + } + } + + if (CostThresh <= 0) + return false; + + // Collect "all" instructions that lhs condition is dependent on. + // Use map for stable iteration (to avoid non-determanism of iteration of + // SmallPtrSet). The `bool` value is just a dummy. + SmallMapVector LhsDeps, RhsDeps; + collectInstructionDeps(&LhsDeps, Lhs); + // Collect "all" instructions that rhs condition is dependent on AND are + // dependencies of lhs. This gives us an estimate on which instructions we + // stand to save by splitting the condition. + if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps)) + return false; + // Add the compare instruction itself unless its a dependency on the LHS. + if (const auto *RhsI = dyn_cast(Rhs)) + if (!LhsDeps.contains(RhsI)) + RhsDeps.try_emplace(RhsI, false); + + const auto &TLI = DAG.getTargetLoweringInfo(); + const auto &TTI = + TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); + + InstructionCost CostOfIncluding = 0; + // See if this instruction will need to computed independently of whether RHS + // is. + Value *BrCond = I.getCondition(); + auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) { + for (const auto *U : Ins->users()) { + // If user is independent of RHS calculation we don't need to count it. + if (auto *UIns = dyn_cast(U)) + if (UIns != BrCond && !RhsDeps.contains(UIns)) + return false; + } + return true; + }; + + // Prune instructions from RHS Deps that are dependencies of unrelated + // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly + // arbitrary and just meant to cap the how much time we spend in the pruning + // loop. Its highly unlikely to come into affect. + const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth; + // Stop after a certain point. No incorrectness from including too many + // instructions. + for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) { + const Instruction *ToDrop = nullptr; + for (const auto &InsPair : RhsDeps) { + if (!ShouldCountInsn(InsPair.first)) { + ToDrop = InsPair.first; + break; + } + } + if (ToDrop == nullptr) + break; + RhsDeps.erase(ToDrop); + } + + for (const auto &InsPair : RhsDeps) { + // Finally accumulate latency that we can only attribute to computing the + // RHS condition. Use latency because we are essentially trying to calculate + // the cost of the dependency chain. + // Possible TODO: We could try to estimate ILP and make this more precise. + CostOfIncluding += + TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency); + + if (CostOfIncluding > CostThresh) + return false; + } + return true; +} + void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -2660,8 +2808,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) Opcode = Instruction::Or; - if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && - match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + if (Opcode && + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) && + !shouldKeepJumpConditionsTogether( + FuncInfo, I, Opcode, BOp0, BOp1, + DAG.getTargetLoweringInfo().getJumpConditionMergingParams( + Opcode, BOp0, BOp1))) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 47657313cb6a3..2084de473b806 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -385,6 +385,11 @@ class SelectionDAGBuilder { N = NewN; } + bool shouldKeepJumpConditionsTogether( + const FunctionLoweringInfo &FuncInfo, const BranchInst &I, + Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, + TargetLoweringBase::CondMergingParams Params) const; + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 866a2a94a0bfe..5ac00a7785ea4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -77,6 +77,40 @@ static cl::opt ExperimentalPrefInnermostLoopAlignment( "alignment set by x86-experimental-pref-loop-alignment."), cl::Hidden); +static cl::opt BrMergingBaseCostThresh( + "x86-br-merging-base-cost", cl::init(2), + cl::desc( + "Sets the cost threshold for when multiple conditionals will be merged " + "into one branch versus be split in multiple branches. Merging " + "conditionals saves branches at the cost of additional instructions. " + "This value sets the instruction cost limit, below which conditionals " + "will be merged, and above which conditionals will be split. Set to -1 " + "to never merge branches."), + cl::Hidden); + +static cl::opt BrMergingLikelyBias( + "x86-br-merging-likely-bias", cl::init(0), + cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely " + "that all conditionals will be executed. For example for merging " + "the conditionals (a == b && c > d), if its known that a == b is " + "likely, then it is likely that if the conditionals are split " + "both sides will be executed, so it may be desirable to increase " + "the instruction cost threshold. Set to -1 to never merge likely " + "branches."), + cl::Hidden); + +static cl::opt BrMergingUnlikelyBias( + "x86-br-merging-unlikely-bias", cl::init(-1), + cl::desc( + "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely " + "that all conditionals will be executed. For example for merging " + "the conditionals (a == b && c > d), if its known that a == b is " + "unlikely, then it is unlikely that if the conditionals are split " + "both sides will be executed, so it may be desirable to decrease " + "the instruction cost threshold. Set to -1 to never merge unlikely " + "branches."), + cl::Hidden); + static cl::opt MulConstantOptimization( "mul-constant-optimization", cl::init(true), cl::desc("Replace 'mul x, Const' with more effective instructions like " @@ -3333,6 +3367,24 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand( return ISD::SRL; } +TargetLoweringBase::CondMergingParams +X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, + const Value *Lhs, + const Value *Rhs) const { + using namespace llvm::PatternMatch; + int BaseCost = BrMergingBaseCostThresh.getValue(); + // a == b && a == c is a fast pattern on x86. + ICmpInst::Predicate Pred; + if (BaseCost >= 0 && Opc == Instruction::And && + match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) && + Pred == ICmpInst::ICMP_EQ && + match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) && + Pred == ICmpInst::ICMP_EQ) + BaseCost += 1; + return {BaseCost, BrMergingLikelyBias.getValue(), + BrMergingUnlikelyBias.getValue()}; +} + bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const { return N->getOpcode() != ISD::FP_EXTEND; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f93c54781846b..fe1943b576084 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1150,6 +1150,10 @@ namespace llvm { bool preferScalarizeSplat(SDNode *N) const override; + CondMergingParams + getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, + const Value *Rhs) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; diff --git a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll index 0044d1c356837..e6f28c2057f77 100644 --- a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll +++ b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll @@ -18,15 +18,16 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp ; CHECK-NEXT: movl _block, %esi ; CHECK-NEXT: movb %al, 1(%esi,%edx) ; CHECK-NEXT: cmpl %ecx, _last -; CHECK-NEXT: jge LBB0_3 -; CHECK-NEXT: ## %bb.1: ## %label.0 +; CHECK-NEXT: setl %cl ; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101 -; CHECK-NEXT: je LBB0_3 -; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %al, %cl +; CHECK-NEXT: je LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl -; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub +; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll index 7bdc4e19a1cf6..28b4541c1bfc7 100644 --- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll @@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: callq __ubyte_convert_to_ctype ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_4 +; CHECK-NEXT: js LBB0_6 ; CHECK-NEXT: ## %bb.1: ## %cond_next.i ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: movq %rbx, %rdi @@ -53,81 +53,84 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: sarl $31, %ecx ; CHECK-NEXT: andl %eax, %ecx ; CHECK-NEXT: cmpl $-2, %ecx -; CHECK-NEXT: je LBB0_8 +; CHECK-NEXT: je LBB0_10 ; CHECK-NEXT: ## %bb.2: ## %cond_next.i ; CHECK-NEXT: cmpl $-1, %ecx -; CHECK-NEXT: jne LBB0_6 -; CHECK-NEXT: LBB0_3: ## %bb4 +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: LBB0_8: ## %bb4 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 16(%rax), %rax -; CHECK-NEXT: jmp LBB0_10 -; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: jmp LBB0_9 +; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit ; CHECK-NEXT: cmpl $-2, %eax -; CHECK-NEXT: je LBB0_8 -; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: je LBB0_10 +; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit ; CHECK-NEXT: cmpl $-1, %eax -; CHECK-NEXT: je LBB0_3 -; CHECK-NEXT: LBB0_6: ## %bb35 +; CHECK-NEXT: je LBB0_8 +; CHECK-NEXT: LBB0_3: ## %bb35 ; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14 ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *216(%rax) ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_11 -; CHECK-NEXT: ## %bb.7: ## %cond_false.i +; CHECK-NEXT: je LBB0_4 +; CHECK-NEXT: ## %bb.12: ## %cond_false.i +; CHECK-NEXT: setne %dil ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-NEXT: movzbl %sil, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movl %eax, %r15d ; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB0_12 -; CHECK-NEXT: jmp LBB0_14 -; CHECK-NEXT: LBB0_8: ## %bb17 +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %dil, %al +; CHECK-NEXT: jne LBB0_5 +; CHECK-NEXT: LBB0_13: ## %cond_true.i200 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: jne LBB0_15 +; CHECK-NEXT: ## %bb.14: ## %cond_true14.i +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: callq _feraiseexcept +; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: jmp LBB0_16 +; CHECK-NEXT: LBB0_10: ## %bb17 ; CHECK-NEXT: callq _PyErr_Occurred ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne LBB0_27 -; CHECK-NEXT: ## %bb.9: ## %cond_next +; CHECK-NEXT: jne LBB0_23 +; CHECK-NEXT: ## %bb.11: ## %cond_next ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 80(%rax), %rax -; CHECK-NEXT: LBB0_10: ## %bb4 +; CHECK-NEXT: LBB0_9: ## %bb4 ; CHECK-NEXT: movq 96(%rax), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq *40(%rax) -; CHECK-NEXT: jmp LBB0_28 -; CHECK-NEXT: LBB0_11: ## %cond_true.i +; CHECK-NEXT: jmp LBB0_24 +; CHECK-NEXT: LBB0_4: ## %cond_true.i ; CHECK-NEXT: movl $4, %edi ; CHECK-NEXT: callq _feraiseexcept ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi -; CHECK-NEXT: xorl %r15d, %r15d ; CHECK-NEXT: testb %sil, %sil -; CHECK-NEXT: je LBB0_14 -; CHECK-NEXT: LBB0_12: ## %cond_false.i +; CHECK-NEXT: sete %al ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_14 -; CHECK-NEXT: ## %bb.13: ## %cond_next17.i +; CHECK-NEXT: sete %cl +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: jne LBB0_13 +; CHECK-NEXT: LBB0_5: ## %cond_next17.i ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movzbl %ah, %ebx -; CHECK-NEXT: jmp LBB0_18 -; CHECK-NEXT: LBB0_14: ## %cond_true.i200 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne LBB0_17 -; CHECK-NEXT: ## %bb.16: ## %cond_true14.i -; CHECK-NEXT: movl $4, %edi -; CHECK-NEXT: callq _feraiseexcept -; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *224(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.19: ## %cond_true61 +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.17: ## %cond_true61 ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi @@ -136,8 +139,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: callq *200(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_27 -; CHECK-NEXT: ## %bb.20: ## %cond_next73 +; CHECK-NEXT: js LBB0_23 +; CHECK-NEXT: ## %bb.18: ## %cond_next73 ; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi @@ -146,13 +149,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: movl %ebp, %edx ; CHECK-NEXT: callq *232(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne LBB0_27 -; CHECK-NEXT: LBB0_21: ## %cond_next89 +; CHECK-NEXT: jne LBB0_23 +; CHECK-NEXT: LBB0_19: ## %cond_next89 ; CHECK-NEXT: movl $2, %edi ; CHECK-NEXT: callq _PyTuple_New ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_27 -; CHECK-NEXT: ## %bb.22: ## %cond_next97 +; CHECK-NEXT: je LBB0_23 +; CHECK-NEXT: ## %bb.20: ## %cond_next97 ; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12 ; CHECK-NEXT: movq (%r12), %rax @@ -160,8 +163,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_25 -; CHECK-NEXT: ## %bb.23: ## %cond_next135 +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.25: ## %cond_next135 ; CHECK-NEXT: movb %r15b, 16(%rax) ; CHECK-NEXT: movq %rax, 24(%r14) ; CHECK-NEXT: movq (%r12), %rax @@ -169,22 +172,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_25 -; CHECK-NEXT: ## %bb.24: ## %cond_next182 +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.26: ## %cond_next182 ; CHECK-NEXT: movb %bl, 16(%rax) ; CHECK-NEXT: movq %rax, 32(%r14) ; CHECK-NEXT: movq %r14, %rax -; CHECK-NEXT: jmp LBB0_28 -; CHECK-NEXT: LBB0_25: ## %cond_true113 +; CHECK-NEXT: jmp LBB0_24 +; CHECK-NEXT: LBB0_21: ## %cond_true113 ; CHECK-NEXT: decq (%r14) -; CHECK-NEXT: jne LBB0_27 -; CHECK-NEXT: ## %bb.26: ## %cond_true126 +; CHECK-NEXT: jne LBB0_23 +; CHECK-NEXT: ## %bb.22: ## %cond_true126 ; CHECK-NEXT: movq 8(%r14), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: callq *48(%rax) -; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock ; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll index 4482c5aec8e81..d9d4424267d73 100644 --- a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll +++ b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll @@ -16,15 +16,12 @@ define void @_ada_c34007g() { ; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: sete %cl ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: orl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: .LBB0_3: # %bb5507 +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %cl, %al ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 diff --git a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll index dd60e641df254..6ffafc5587479 100644 --- a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll +++ b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll @@ -217,4 +217,4 @@ bb456: ; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371 ret void } -declare i32 @printf(ptr, ...) nounwind +declare i32 @printf(ptr, ...) nounwind diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll index 502bbf3f5d118..4ab9c545ed90d 100644 --- a/llvm/test/CodeGen/X86/avx-cmp.ll +++ b/llvm/test/CodeGen/X86/avx-cmp.ll @@ -26,40 +26,33 @@ declare void @scale() nounwind define void @render(double %a0) nounwind { ; CHECK-LABEL: render: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB2_6 +; CHECK-NEXT: jne .LBB2_5 ; CHECK-NEXT: # %bb.1: # %for.cond5.preheader -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: movb $1, %bpl +; CHECK-NEXT: movb $1, %bl ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB2_2: # %for.cond5 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB2_2 -; CHECK-NEXT: # %bb.3: # %for.cond5 -; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: testb %bpl, %bpl -; CHECK-NEXT: jne .LBB2_2 -; CHECK-NEXT: # %bb.4: # %for.body33.preheader +; CHECK-NEXT: # %bb.3: # %for.body33.preheader ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: vmovsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-NEXT: jne .LBB2_5 +; CHECK-NEXT: jne .LBB2_4 ; CHECK-NEXT: jnp .LBB2_2 -; CHECK-NEXT: .LBB2_5: # %if.then +; CHECK-NEXT: .LBB2_4: # %if.then ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: callq scale@PLT ; CHECK-NEXT: jmp .LBB2_2 -; CHECK-NEXT: .LBB2_6: # %for.end52 -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB2_5: # %for.end52 +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: br i1 undef, label %for.cond5, label %for.end52 diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index b134d8a369634..e479ad20d6e4c 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -359,7 +359,6 @@ define void @unnatural_cfg2(ptr %p0, i32 %a0) { ; CHECK: %loop.body2 ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin -; CHECK: %loop.inner2.begin ; CHECK: %loop.body3 ; CHECK: %loop.inner1.begin ; CHECK: %bail diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index 1372bd8047351..fa45afbb634c4 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -178,13 +178,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: subl $136, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl %edx, %edi +; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: orl %esi, %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: sete %bl @@ -195,30 +195,33 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: sete %al ; X86-NEXT: orb %bl, %al ; X86-NEXT: movb %al, (%esp) # 1-byte Spill -; X86-NEXT: bsrl %esi, %edx +; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %edi, %ecx +; X86-NEXT: bsrl %esi, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx -; X86-NEXT: testl %esi, %esi +; X86-NEXT: testl %edi, %edi +; X86-NEXT: movl %edi, %ebx ; X86-NEXT: cmovnel %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: bsrl %eax, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %esi, %ebx +; X86-NEXT: bsrl %ebp, %ebp +; X86-NEXT: movl %esi, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: xorl $31, %ebp ; X86-NEXT: addl $32, %ebp ; X86-NEXT: testl %eax, %eax ; X86-NEXT: cmovnel %edx, %ebp ; X86-NEXT: addl $64, %ebp -; X86-NEXT: orl %ebx, %edi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: orl %ebx, %edx ; X86-NEXT: cmovnel %ecx, %ebp ; X86-NEXT: bsrl %esi, %edx +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: bsrl %ebx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bsrl %eax, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx ; X86-NEXT: testl %esi, %esi @@ -230,51 +233,51 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: xorl $31, %edx ; X86-NEXT: addl $32, %edx ; X86-NEXT: testl %edi, %edi -; X86-NEXT: movl %edi, %eax ; X86-NEXT: cmovnel %esi, %edx ; X86-NEXT: addl $64, %edx -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl %ebx, %eax ; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: subl %edx, %ebp +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %edx, %edx ; X86-NEXT: movl $0, %esi ; X86-NEXT: sbbl %esi, %esi ; X86-NEXT: movl $0, %edi ; X86-NEXT: sbbl %edi, %edi -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: movl $127, %edx +; X86-NEXT: movl $127, %ecx +; X86-NEXT: cmpl %ebp, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: setb %cl +; X86-NEXT: orb (%esp), %cl # 1-byte Folded Reload ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebp, %edx -; X86-NEXT: movl $0, %edx +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: xorl $127, %eax ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: movl $0, %edx +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: setb %dl -; X86-NEXT: orb (%esp), %dl # 1-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: sete %al +; X86-NEXT: testb %cl, %cl +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovnel %edi, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovnel %ecx, %esi -; X86-NEXT: cmovnel %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovnel %edi, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmovnel %ecx, %ebp -; X86-NEXT: jne .LBB4_8 -; X86-NEXT: # %bb.1: # %_udiv-special-cases -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: xorl $127, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: je .LBB4_8 -; X86-NEXT: # %bb.2: # %udiv-bb1 +; X86-NEXT: cmovnel %edi, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: cmovnel %edi, %ebx +; X86-NEXT: orb %cl, %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: jne .LBB4_7 +; X86-NEXT: # %bb.1: # %udiv-bb1 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -287,9 +290,8 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: xorb $127, %al ; X86-NEXT: movb %al, %ch ; X86-NEXT: andb $7, %ch @@ -301,7 +303,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl 132(%esp,%eax), %esi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll %cl, %edx ; X86-NEXT: notb %cl ; X86-NEXT: movl 124(%esp,%eax), %ebp @@ -309,68 +311,69 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: shrl %esi ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: orl %edx, %esi -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl 120(%esp,%eax), %ebp +; X86-NEXT: movl 120(%esp,%eax), %eax ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %ebp, %edx -; X86-NEXT: shll %cl, %ebp -; X86-NEXT: addl $1, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %eax, %ebp +; X86-NEXT: shll %cl, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: addl $1, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: adcl $0, %ebx -; X86-NEXT: jae .LBB4_3 -; X86-NEXT: # %bb.6: +; X86-NEXT: jae .LBB4_2 +; X86-NEXT: # %bb.5: ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: jmp .LBB4_7 -; X86-NEXT: .LBB4_3: # %udiv-preheader -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: jmp .LBB4_6 +; X86-NEXT: .LBB4_2: # %udiv-preheader +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movb %al, %ch ; X86-NEXT: andb $7, %ch -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 84(%esp,%eax), %ebp -; X86-NEXT: movl %esi, %edi -; X86-NEXT: movl 80(%esp,%eax), %ebx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %ebp, %esi +; X86-NEXT: movl 84(%esp,%eax), %ebx ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 72(%esp,%eax), %esi -; X86-NEXT: movl 76(%esp,%eax), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: notb %cl -; X86-NEXT: addl %ebx, %ebx -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: orl %edx, %ebx +; X86-NEXT: movl 80(%esp,%eax), %esi +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, %ebp -; X86-NEXT: shrdl %cl, %eax, %esi +; X86-NEXT: shrdl %cl, %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 72(%esp,%eax), %ebp +; X86-NEXT: movl 76(%esp,%eax), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: notb %cl +; X86-NEXT: addl %esi, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: orl %eax, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shrl %cl, %ebx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: shrdl %cl, %edx, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -380,25 +383,25 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: adcl $-1, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: adcl $-1, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB4_4: # %udiv-do-while +; X86-NEXT: .LBB4_3: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %ebp -; X86-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X86-NEXT: shldl $1, %ebx, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: shldl $1, %ebp, %ebx -; X86-NEXT: shldl $1, %esi, %ebp +; X86-NEXT: shldl $1, %ebp, %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl $1, %ebx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl $1, %esi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: shldl $1, %edi, %esi -; X86-NEXT: orl %ecx, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: shldl $1, %eax, %edi ; X86-NEXT: orl %ecx, %edi @@ -407,14 +410,16 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: shldl $1, %edi, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %edi, %edi -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: shldl $1, %edx, %edi +; X86-NEXT: orl %ecx, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: addl %edx, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmpl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: sbbl %ebp, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload ; X86-NEXT: sarl $31, %ecx @@ -429,84 +434,81 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: subl %ecx, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl %ecx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: sbbl %edi, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: sbbl %eax, (%esp) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: adcl $-1, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: adcl $-1, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl $-1, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edi, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl (%esp), %ebp # 4-byte Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: movl (%esp), %edi # 4-byte Reload ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: jne .LBB4_4 -; X86-NEXT: # %bb.5: -; X86-NEXT: movl %esi, (%esp) # 4-byte Spill -; X86-NEXT: movl %edi, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: jne .LBB4_3 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: .LBB4_7: # %udiv-loop-exit -; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: .LBB4_6: # %udiv-loop-exit +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: shldl $1, %esi, %edx ; X86-NEXT: orl %ecx, %edx -; X86-NEXT: shldl $1, %ebx, %esi +; X86-NEXT: shldl $1, %ebp, %esi ; X86-NEXT: orl %ecx, %esi -; X86-NEXT: shldl $1, %ebp, %ebx -; X86-NEXT: orl %ecx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ebp, %ebp -; X86-NEXT: orl %eax, %ebp -; X86-NEXT: .LBB4_8: # %udiv-end +; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: shldl $1, %ebx, %ebp +; X86-NEXT: orl %ecx, %ebp +; X86-NEXT: addl %ebx, %ebx +; X86-NEXT: orl %eax, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebp, (%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: .LBB4_7: # %udiv-end +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: movl %ebp, 4(%eax) ; X86-NEXT: movl %esi, 8(%eax) ; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X86-NEXT: movl %esi, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %esi -; X86-NEXT: imull %ecx, %esi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: imull %ebp, %esi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: imull %ecx, %edi -; X86-NEXT: addl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: imull %ecx, %ebp +; X86-NEXT: addl %edx, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %ebx ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: imull %esi, %ebp -; X86-NEXT: addl %edx, %ebp +; X86-NEXT: imull %esi, %edi +; X86-NEXT: addl %edx, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: imull %eax, %ebx -; X86-NEXT: addl %ebp, %ebx -; X86-NEXT: addl (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: adcl %edi, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: addl %edi, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: movl (%esp), %ebp # 4-byte Reload ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -522,7 +524,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: adcl %edi, %ebp ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -530,11 +532,11 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %ebx, %edx ; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: sbbl %eax, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx diff --git a/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll b/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll index 0250b1b4a7f86..f42c2f8f14476 100644 --- a/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll +++ b/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll @@ -28,78 +28,70 @@ define i32 @decode_sb(ptr %t, i32 %bl, i32 %_msprop1966, i32 %sub.i, i64 %idxpro ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: movl %r9d, %ebx ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx -; CHECK-NEXT: movabsq $87960930222080, %r15 # imm = 0x500000000000 -; CHECK-NEXT: movl 0, %r11d -; CHECK-NEXT: movl %esi, %r12d -; CHECK-NEXT: # implicit-def: $r13d +; CHECK-NEXT: movabsq $87960930222080, %r14 # imm = 0x500000000000 +; CHECK-NEXT: movl 0, %r13d +; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: # implicit-def: $r12d ; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: jne .LBB0_7 +; CHECK-NEXT: jne .LBB0_6 ; CHECK-NEXT: # %bb.1: # %if.else -; CHECK-NEXT: movq %r8, %r14 -; CHECK-NEXT: movl %ecx, %r13d -; CHECK-NEXT: andl $1, %r13d -; CHECK-NEXT: movzbl 544(%r13), %r8d -; CHECK-NEXT: andl $1, %r8d -; CHECK-NEXT: movl %r15d, %r9d +; CHECK-NEXT: movl %ecx, %r12d +; CHECK-NEXT: andl $1, %r12d +; CHECK-NEXT: movzbl 544(%r12), %r9d ; CHECK-NEXT: andl $1, %r9d ; CHECK-NEXT: movl %r14d, %r10d ; CHECK-NEXT: andl $1, %r10d +; CHECK-NEXT: andl $1, %r8d ; CHECK-NEXT: movabsq $17592186044416, %rax # imm = 0x100000000000 -; CHECK-NEXT: orq %r10, %rax -; CHECK-NEXT: movl %esi, %r10d +; CHECK-NEXT: orq %r8, %rax +; CHECK-NEXT: movl %esi, %r8d ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shrl %cl, %r10d -; CHECK-NEXT: andl $2, %r10d +; CHECK-NEXT: shrl %cl, %r8d +; CHECK-NEXT: andl $2, %r8d ; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: cmoveq %r9, %rax -; CHECK-NEXT: orl %r8d, %edx -; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movq %r11, %rcx +; CHECK-NEXT: cmoveq %r10, %rax +; CHECK-NEXT: orl %r9d, %edx +; CHECK-NEXT: movq %r13, %rcx ; CHECK-NEXT: orq $1, %rcx -; CHECK-NEXT: orl %esi, %r10d +; CHECK-NEXT: orl %esi, %r8d ; CHECK-NEXT: movl $1, %r8d ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.2: # %if.else ; CHECK-NEXT: movl (%rax), %r8d ; CHECK-NEXT: .LBB0_3: # %if.else ; CHECK-NEXT: shlq $5, %rdx -; CHECK-NEXT: movq %r12, %rax +; CHECK-NEXT: movq %r15, %rax ; CHECK-NEXT: shlq $7, %rax ; CHECK-NEXT: leaq (%rax,%rdx), %rsi ; CHECK-NEXT: addq $1248, %rsi # imm = 0x4E0 ; CHECK-NEXT: movq %rcx, 0 -; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: movl %r8d, (%rdi) ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: callq *%rax -; CHECK-NEXT: xorq $1, %r14 -; CHECK-NEXT: cmpl $0, (%r14) -; CHECK-NEXT: je .LBB0_6 -; CHECK-NEXT: # %bb.4: # %if.else ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB0_5 -; CHECK-NEXT: .LBB0_6: # %bb19 +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: # %bb.5: # %bb19 ; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: movq %r15, %rdi -; CHECK-NEXT: movabsq $87960930222080, %r15 # imm = 0x500000000000 -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; CHECK-NEXT: jne .LBB0_8 -; CHECK-NEXT: .LBB0_7: # %if.end69 -; CHECK-NEXT: movl %r11d, 0 +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: movabsq $87960930222080, %r14 # imm = 0x500000000000 +; CHECK-NEXT: jne .LBB0_7 +; CHECK-NEXT: .LBB0_6: # %if.end69 +; CHECK-NEXT: movl %r13d, 0 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %r8d, %r8d ; CHECK-NEXT: callq *%rax -; CHECK-NEXT: xorq %r15, %r12 -; CHECK-NEXT: movslq %r13d, %rax -; CHECK-NEXT: movzbl (%r12), %ecx +; CHECK-NEXT: xorq %r14, %r15 +; CHECK-NEXT: movslq %r12d, %rax +; CHECK-NEXT: movzbl (%r15), %ecx ; CHECK-NEXT: movb %cl, 544(%rax) -; CHECK-NEXT: .LBB0_8: # %land.lhs.true56 +; CHECK-NEXT: .LBB0_7: # %land.lhs.true56 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx @@ -110,7 +102,7 @@ define i32 @decode_sb(ptr %t, i32 %bl, i32 %_msprop1966, i32 %sub.i, i64 %idxpro ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_5: # %bb +; CHECK-NEXT: .LBB0_4: # %bb entry: %i = load i32, ptr null, align 8 br i1 %cmp54, label %if.end69, label %if.else diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index a7564c9622c5c..e8b3121ecfb52 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -4440,16 +4440,14 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] ; SSE2-NEXT: movmskpd %xmm0, %eax ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: jne .LBB97_2 -; SSE2-NEXT: # %bb.1: # %entry -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: testb $1, %al -; SSE2-NEXT: jne .LBB97_2 -; SSE2-NEXT: # %bb.3: # %middle.block -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: retq -; SSE2-NEXT: .LBB97_2: +; SSE2-NEXT: setne %al +; SSE2-NEXT: movd %xmm1, %ecx +; SSE2-NEXT: orb %al, %cl +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: je .LBB97_2 +; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: movw $0, 0 +; SSE2-NEXT: .LBB97_2: # %middle.block ; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: retq ; @@ -4460,16 +4458,14 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) { ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 ; SSE41-NEXT: movmskpd %xmm0, %eax ; SSE41-NEXT: testl %eax, %eax -; SSE41-NEXT: jne .LBB97_2 -; SSE41-NEXT: # %bb.1: # %entry -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: testb $1, %al -; SSE41-NEXT: jne .LBB97_2 -; SSE41-NEXT: # %bb.3: # %middle.block -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: retq -; SSE41-NEXT: .LBB97_2: +; SSE41-NEXT: setne %al +; SSE41-NEXT: movd %xmm0, %ecx +; SSE41-NEXT: orb %al, %cl +; SSE41-NEXT: testb $1, %cl +; SSE41-NEXT: je .LBB97_2 +; SSE41-NEXT: # %bb.1: ; SSE41-NEXT: movw $0, 0 +; SSE41-NEXT: .LBB97_2: # %middle.block ; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: retq ; @@ -4479,16 +4475,14 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) { ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vtestpd %xmm0, %xmm0 -; AVX1-NEXT: jne .LBB97_2 -; AVX1-NEXT: # %bb.1: # %entry -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: testb $1, %al -; AVX1-NEXT: jne .LBB97_2 -; AVX1-NEXT: # %bb.3: # %middle.block -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: retq -; AVX1-NEXT: .LBB97_2: +; AVX1-NEXT: setne %al +; AVX1-NEXT: vmovd %xmm0, %ecx +; AVX1-NEXT: orb %al, %cl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je .LBB97_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: movw $0, 0 +; AVX1-NEXT: .LBB97_2: # %middle.block ; AVX1-NEXT: xorl %eax, %eax ; AVX1-NEXT: retq ; @@ -4498,16 +4492,14 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) { ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vtestpd %xmm0, %xmm0 -; AVX2-NEXT: jne .LBB97_2 -; AVX2-NEXT: # %bb.1: # %entry -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: testb $1, %al -; AVX2-NEXT: jne .LBB97_2 -; AVX2-NEXT: # %bb.3: # %middle.block -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: retq -; AVX2-NEXT: .LBB97_2: +; AVX2-NEXT: setne %al +; AVX2-NEXT: vmovd %xmm0, %ecx +; AVX2-NEXT: orb %al, %cl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je .LBB97_2 +; AVX2-NEXT: # %bb.1: ; AVX2-NEXT: movw $0, 0 +; AVX2-NEXT: .LBB97_2: # %middle.block ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: retq ; @@ -4517,18 +4509,15 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) { ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: testb $3, %al -; KNL-NEXT: jne .LBB97_2 -; KNL-NEXT: # %bb.1: # %entry -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: testb $3, %cl +; KNL-NEXT: setne %cl +; KNL-NEXT: orb %cl, %al ; KNL-NEXT: testb $1, %al -; KNL-NEXT: jne .LBB97_2 -; KNL-NEXT: # %bb.3: # %middle.block -; KNL-NEXT: xorl %eax, %eax -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; KNL-NEXT: .LBB97_2: +; KNL-NEXT: je .LBB97_2 +; KNL-NEXT: # %bb.1: ; KNL-NEXT: movw $0, 0 +; KNL-NEXT: .LBB97_2: # %middle.block ; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -4539,16 +4528,14 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) { ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SKX-NEXT: vptestnmq %xmm0, %xmm0, %k0 ; SKX-NEXT: kortestb %k0, %k0 -; SKX-NEXT: jne .LBB97_2 -; SKX-NEXT: # %bb.1: # %entry -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: testb $1, %al -; SKX-NEXT: jne .LBB97_2 -; SKX-NEXT: # %bb.3: # %middle.block -; SKX-NEXT: xorl %eax, %eax -; SKX-NEXT: retq -; SKX-NEXT: .LBB97_2: +; SKX-NEXT: setne %al +; SKX-NEXT: kmovd %k0, %ecx +; SKX-NEXT: orb %al, %cl +; SKX-NEXT: testb $1, %cl +; SKX-NEXT: je .LBB97_2 +; SKX-NEXT: # %bb.1: ; SKX-NEXT: movw $0, 0 +; SKX-NEXT: .LBB97_2: # %middle.block ; SKX-NEXT: xorl %eax, %eax ; SKX-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/or-branch.ll b/llvm/test/CodeGen/X86/or-branch.ll index 5d5cc2cb32f1c..c6df237393e4a 100644 --- a/llvm/test/CodeGen/X86/or-branch.ll +++ b/llvm/test/CodeGen/X86/or-branch.ll @@ -5,12 +5,13 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP2-LABEL: foo: ; JUMP2: # %bb.0: # %entry -; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP2-NEXT: jl bar@PLT # TAILCALL -; JUMP2-NEXT: # %bb.1: # %entry ; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; JUMP2-NEXT: setne %al +; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) +; JUMP2-NEXT: setge %cl +; JUMP2-NEXT: testb %al, %cl ; JUMP2-NEXT: je bar@PLT # TAILCALL -; JUMP2-NEXT: # %bb.2: # %UnifiedReturnBlock +; JUMP2-NEXT: # %bb.1: # %UnifiedReturnBlock ; JUMP2-NEXT: retl ; ; JUMP1-LABEL: foo: diff --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll index 9069688c8037c..f3741dc202dc5 100644 --- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -284,7 +284,7 @@ define i64 @test_two_live_flags(ptr %foo0, i64 %bar0, i64 %baz0, ptr %foo1, i64 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK32-NEXT: lock cmpxchg8b (%esi) -; CHECK32-NEXT: setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK32-NEXT: sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK32-NEXT: movl %ebp, %edx ; CHECK32-NEXT: movl %edi, %ecx @@ -292,17 +292,15 @@ define i64 @test_two_live_flags(ptr %foo0, i64 %bar0, i64 %baz0, ptr %foo1, i64 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK32-NEXT: lock cmpxchg8b (%esi) ; CHECK32-NEXT: sete %al -; CHECK32-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; CHECK32-NEXT: jne .LBB5_4 -; CHECK32-NEXT: # %bb.1: # %entry -; CHECK32-NEXT: testb %al, %al -; CHECK32-NEXT: je .LBB5_4 -; CHECK32-NEXT: # %bb.2: # %t +; CHECK32-NEXT: andb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload +; CHECK32-NEXT: cmpb $1, %al +; CHECK32-NEXT: jne .LBB5_3 +; CHECK32-NEXT: # %bb.1: # %t ; CHECK32-NEXT: movl $42, %eax -; CHECK32-NEXT: jmp .LBB5_3 -; CHECK32-NEXT: .LBB5_4: # %f +; CHECK32-NEXT: jmp .LBB5_2 +; CHECK32-NEXT: .LBB5_3: # %f ; CHECK32-NEXT: xorl %eax, %eax -; CHECK32-NEXT: .LBB5_3: # %t +; CHECK32-NEXT: .LBB5_2: # %t ; CHECK32-NEXT: xorl %edx, %edx ; CHECK32-NEXT: addl $4, %esp ; CHECK32-NEXT: popl %esi @@ -315,19 +313,17 @@ define i64 @test_two_live_flags(ptr %foo0, i64 %bar0, i64 %baz0, ptr %foo1, i64 ; CHECK64: # %bb.0: # %entry ; CHECK64-NEXT: movq %rsi, %rax ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) -; CHECK64-NEXT: setne %dl +; CHECK64-NEXT: sete %dl ; CHECK64-NEXT: movq %r8, %rax ; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx) ; CHECK64-NEXT: sete %al -; CHECK64-NEXT: testb %dl, %dl -; CHECK64-NEXT: jne .LBB5_3 -; CHECK64-NEXT: # %bb.1: # %entry -; CHECK64-NEXT: testb %al, %al -; CHECK64-NEXT: je .LBB5_3 -; CHECK64-NEXT: # %bb.2: # %t +; CHECK64-NEXT: andb %dl, %al +; CHECK64-NEXT: cmpb $1, %al +; CHECK64-NEXT: jne .LBB5_2 +; CHECK64-NEXT: # %bb.1: # %t ; CHECK64-NEXT: movl $42, %eax ; CHECK64-NEXT: retq -; CHECK64-NEXT: .LBB5_3: # %f +; CHECK64-NEXT: .LBB5_2: # %f ; CHECK64-NEXT: xorl %eax, %eax ; CHECK64-NEXT: retq entry: @@ -353,7 +349,6 @@ define i1 @asm_clobbering_flags(ptr %mem) nounwind { ; CHECK32-NEXT: testl %edx, %edx ; CHECK32-NEXT: setg %al ; CHECK32-NEXT: #APP -; CHECK32-NOT: rep ; CHECK32-NEXT: bsfl %edx, %edx ; CHECK32-NEXT: #NO_APP ; CHECK32-NEXT: movl %edx, (%ecx) @@ -365,7 +360,6 @@ define i1 @asm_clobbering_flags(ptr %mem) nounwind { ; CHECK64-NEXT: testl %ecx, %ecx ; CHECK64-NEXT: setg %al ; CHECK64-NEXT: #APP -; CHECK64-NOT: rep ; CHECK64-NEXT: bsfl %ecx, %ecx ; CHECK64-NEXT: #NO_APP ; CHECK64-NEXT: movl %ecx, (%rdi) diff --git a/llvm/test/CodeGen/X86/pr33747.ll b/llvm/test/CodeGen/X86/pr33747.ll index e261486dd5924..c8ba2b2e3a790 100644 --- a/llvm/test/CodeGen/X86/pr33747.ll +++ b/llvm/test/CodeGen/X86/pr33747.ll @@ -5,18 +5,19 @@ define void @PR33747(ptr nocapture) { ; CHECK-LABEL: PR33747: ; CHECK: # %bb.0: ; CHECK-NEXT: movl 24(%rdi), %eax +; CHECK-NEXT: leal 1(%rax), %ecx +; CHECK-NEXT: cmpl $3, %ecx +; CHECK-NEXT: setb %cl ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: incl %eax -; CHECK-NEXT: cmpl $3, %eax -; CHECK-NEXT: jae .LBB0_3 +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %cl, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp .LBB0_2 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp .LBB0_3 %2 = getelementptr inbounds i32, ptr %0, i64 6 %3 = load i32, ptr %2, align 4 %4 = add i32 %3, 1 diff --git a/llvm/test/CodeGen/X86/pr37025.ll b/llvm/test/CodeGen/X86/pr37025.ll index a758ddc91541b..8ac28d6286a60 100644 --- a/llvm/test/CodeGen/X86/pr37025.ll +++ b/llvm/test/CodeGen/X86/pr37025.ll @@ -18,11 +18,13 @@ define void @test_dec_select(ptr nocapture %0, ptr readnone %1) { ; CHECK-LABEL: test_dec_select: ; CHECK: # %bb.0: ; CHECK-NEXT: lock decq (%rdi) -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sete %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: setne %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: cmpb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 @@ -44,11 +46,11 @@ define void @test_dec_select_commute(ptr nocapture %0, ptr readnone %1) { ; CHECK-NEXT: lock decq (%rdi) ; CHECK-NEXT: sete %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je .LBB1_2 +; CHECK-NEXT: setne %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: cmpb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 @@ -69,12 +71,13 @@ define void @test_dec_and(ptr nocapture %0, ptr readnone %1) { ; CHECK: # %bb.0: ; CHECK-NEXT: lock decq (%rdi) ; CHECK-NEXT: sete %al +; CHECK-NEXT: notb %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 @@ -94,11 +97,14 @@ define void @test_dec_and_commute(ptr nocapture %0, ptr readnone %1) { ; CHECK-LABEL: test_dec_and_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: lock decq (%rdi) -; CHECK-NEXT: jne .LBB3_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sete %al +; CHECK-NEXT: notb %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index 03629a353d84d..f64c70e8fc79a 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -25,141 +25,126 @@ define dso_local void @fn() { ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: # implicit-def: $ecx ; CHECK-NEXT: # implicit-def: $edi +; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $al ; CHECK-NEXT: # kill: killed $al -; CHECK-NEXT: # implicit-def: $al ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_16: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb %dh, %al +; CHECK-NEXT: .LBB0_15: # %for.inc +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_22 Depth 2 -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: cmpb $8, %al -; CHECK-NEXT: ja .LBB0_3 -; CHECK-NEXT: # %bb.2: # %for.cond -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.4: # %if.end +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movl $.str, (%esp) +; CHECK-NEXT: calll printf +; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_3: # %if.end ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl a -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movb %cl, %dh +; CHECK-NEXT: movl %ecx, %edx ; CHECK-NEXT: movl $0, h -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; CHECK-NEXT: cmpb $8, %al -; CHECK-NEXT: jg .LBB0_8 -; CHECK-NEXT: # %bb.5: # %if.then13 +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload +; CHECK-NEXT: cmpb $8, %dh +; CHECK-NEXT: jg .LBB0_7 +; CHECK-NEXT: # %bb.4: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: je .LBB0_6 -; CHECK-NEXT: jmp .LBB0_18 +; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: jne .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # %if.then -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: calll printf -; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; CHECK-NEXT: .LBB0_6: # %for.cond35 +; CHECK-NEXT: # %bb.5: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_7 -; CHECK-NEXT: .LBB0_11: # %af +; CHECK-NEXT: je .LBB0_6 +; CHECK-NEXT: .LBB0_10: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_12 -; CHECK-NEXT: .LBB0_19: # %if.end39 +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: .LBB0_16: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_21 -; CHECK-NEXT: # %bb.20: # %if.then41 +; CHECK-NEXT: je .LBB0_18 +; CHECK-NEXT: # %bb.17: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_21: # %for.end46 +; CHECK-NEXT: .LBB0_18: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $al ; CHECK-NEXT: # implicit-def: $dh +; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_22 +; CHECK-NEXT: jmp .LBB0_19 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_8: # %if.end21 +; CHECK-NEXT: .LBB0_7: # %if.end21 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_9 +; CHECK-NEXT: jmp .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_6: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb %dl, %dh -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_22: # %for.cond47 +; CHECK-NEXT: .LBB0_19: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_22 -; CHECK-NEXT: # %bb.23: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_22 Depth=2 -; CHECK-NEXT: jne .LBB0_22 -; CHECK-NEXT: .LBB0_9: # %ae +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: .LBB0_8: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: # %bb.12: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB0_14 -; CHECK-NEXT: # %bb.15: # %if.end26 +; CHECK-NEXT: testb %dh, %dh +; CHECK-NEXT: je .LBB0_15 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_16 -; CHECK-NEXT: # %bb.17: # %if.then31 +; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: # %bb.14: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: .LBB0_18: # %for.inc -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dh, %al -; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: jmp .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_9: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl ; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_14: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb %dh, %al -; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: jmp .LBB0_6 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll index 3faa493ebccd0..c98aae7fbf405 100644 --- a/llvm/test/CodeGen/X86/setcc-logic.ll +++ b/llvm/test/CodeGen/X86/setcc-logic.ll @@ -132,15 +132,12 @@ return: define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: js .LBB9_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: js .LBB9_3 -; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: js .LBB9_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB9_3: # %return +; CHECK-NEXT: .LBB9_2: # %return ; CHECK-NEXT: movl $192, %eax ; CHECK-NEXT: retq entry: @@ -159,15 +156,13 @@ return: define i32 @all_bits_set_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_bits_set_branch: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: cmpl $-1, %edi -; CHECK-NEXT: jne .LBB10_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: cmpl $-1, %esi -; CHECK-NEXT: jne .LBB10_3 -; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: jne .LBB10_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB10_3: # %return +; CHECK-NEXT: .LBB10_2: # %return ; CHECK-NEXT: movl $192, %eax ; CHECK-NEXT: retq entry: @@ -186,15 +181,12 @@ return: define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_set_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jns .LBB11_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: jns .LBB11_3 -; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: testl %esi, %edi +; CHECK-NEXT: jns .LBB11_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB11_3: # %return +; CHECK-NEXT: .LBB11_2: # %return ; CHECK-NEXT: movl $192, %eax ; CHECK-NEXT: retq entry: @@ -238,17 +230,14 @@ return: define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_set_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: js .LBB13_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: js .LBB13_2 -; CHECK-NEXT: # %bb.3: # %return -; CHECK-NEXT: movl $192, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB13_2: # %bb1 +; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: jns .LBB13_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB13_2: # %return +; CHECK-NEXT: movl $192, %eax +; CHECK-NEXT: retq entry: %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 @@ -265,17 +254,15 @@ return: define i32 @any_bits_clear_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_bits_clear_branch: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: cmpl $-1, %edi -; CHECK-NEXT: jne .LBB14_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: cmpl $-1, %esi -; CHECK-NEXT: jne .LBB14_2 -; CHECK-NEXT: # %bb.3: # %return -; CHECK-NEXT: movl $192, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB14_2: # %bb1 +; CHECK-NEXT: je .LBB14_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB14_2: # %return +; CHECK-NEXT: movl $192, %eax +; CHECK-NEXT: retq entry: %a = icmp ne i32 %P, -1 %b = icmp ne i32 %Q, -1 @@ -292,17 +279,14 @@ return: define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jns .LBB15_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: jns .LBB15_2 -; CHECK-NEXT: # %bb.3: # %return -; CHECK-NEXT: movl $192, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB15_2: # %bb1 +; CHECK-NEXT: testl %esi, %edi +; CHECK-NEXT: js .LBB15_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB15_2: # %return +; CHECK-NEXT: movl $192, %eax +; CHECK-NEXT: retq entry: %a = icmp sgt i32 %P, -1 %b = icmp sgt i32 %Q, -1 diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll index 75252309790b1..1489b0295e935 100644 --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -1259,12 +1259,7 @@ entry: define swiftcc void @dont_crash_on_new_isel_blocks(ptr nocapture swifterror, i1, ptr) { ; CHECK-APPLE-LABEL: dont_crash_on_new_isel_blocks: ; CHECK-APPLE: ## %bb.0: ## %entry -; CHECK-APPLE-NEXT: xorl %eax, %eax -; CHECK-APPLE-NEXT: testb %al, %al -; CHECK-APPLE-NEXT: jne LBB15_2 -; CHECK-APPLE-NEXT: ## %bb.1: ## %entry ; CHECK-APPLE-NEXT: testb $1, %dil -; CHECK-APPLE-NEXT: LBB15_2: ## %cont ; CHECK-APPLE-NEXT: pushq %rax ; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 16 ; CHECK-APPLE-NEXT: callq *%rax @@ -1290,12 +1285,7 @@ define swiftcc void @dont_crash_on_new_isel_blocks(ptr nocapture swifterror, i1, ; ; CHECK-i386-LABEL: dont_crash_on_new_isel_blocks: ; CHECK-i386: ## %bb.0: ## %entry -; CHECK-i386-NEXT: xorl %eax, %eax -; CHECK-i386-NEXT: testb %al, %al -; CHECK-i386-NEXT: jne LBB15_2 -; CHECK-i386-NEXT: ## %bb.1: ## %entry ; CHECK-i386-NEXT: testb $1, 8(%esp) -; CHECK-i386-NEXT: LBB15_2: ## %cont ; CHECK-i386-NEXT: jmpl *%eax ## TAILCALL entry: %3 = or i1 false, %1 diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll index 9cd3731518120..8d84e887d3f27 100644 --- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -91,116 +91,97 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3 ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_24 +; CHECK-NEXT: jne .LBB1_12 ; CHECK-NEXT: # %bb.1: # %if.end19 -; CHECK-NEXT: movl %esi, %ebp -; CHECK-NEXT: movq %rdi, %r15 -; CHECK-NEXT: movl (%rax), %r13d -; CHECK-NEXT: leal (,%r13,4), %ebx -; CHECK-NEXT: movl %ebx, %r12d +; CHECK-NEXT: movl (%rax), %r12d +; CHECK-NEXT: leal (,%r12,4), %ebp +; CHECK-NEXT: movl %ebp, %r15d ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movq %r12, %rdi +; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq cli_calloc@PLT -; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: je .LBB1_23 -; CHECK-NEXT: # %bb.2: # %if.end19 -; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB1_23 -; CHECK-NEXT: # %bb.3: # %if.end19 ; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_23 -; CHECK-NEXT: # %bb.4: # %if.end19 -; CHECK-NEXT: cmpq %r15, %r14 -; CHECK-NEXT: jb .LBB1_23 -; CHECK-NEXT: # %bb.5: # %if.end50 +; CHECK-NEXT: jne .LBB1_12 +; CHECK-NEXT: # %bb.2: # %if.end50 ; CHECK-NEXT: movq %r14, %rdi -; CHECK-NEXT: movq %r12, %rdx +; CHECK-NEXT: movq %r15, %rdx ; CHECK-NEXT: callq memcpy@PLT -; CHECK-NEXT: cmpl $4, %ebx -; CHECK-NEXT: jb .LBB1_26 -; CHECK-NEXT: # %bb.6: # %shared_preheader +; CHECK-NEXT: cmpl $4, %ebp +; CHECK-NEXT: jb .LBB1_19 +; CHECK-NEXT: # %bb.3: # %shared_preheader ; CHECK-NEXT: movb $32, %cl ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: jmp .LBB1_8 +; CHECK-NEXT: jmp .LBB1_4 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: .LBB1_15: # %merge_predecessor_split +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: movb $32, %cl -; CHECK-NEXT: .LBB1_8: # %outer_loop_header +; CHECK-NEXT: .LBB1_4: # %outer_loop_header ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB1_9 Depth 2 -; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB1_16 +; CHECK-NEXT: # Child Loop BB1_8 Depth 2 +; CHECK-NEXT: testl %r12d, %r12d +; CHECK-NEXT: je .LBB1_5 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_9: # %shared_loop_header -; CHECK-NEXT: # Parent Loop BB1_8 Depth=1 +; CHECK-NEXT: .LBB1_8: # %shared_loop_header +; CHECK-NEXT: # Parent Loop BB1_4 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: jne .LBB1_25 -; CHECK-NEXT: # %bb.10: # %inner_loop_body -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=2 +; CHECK-NEXT: jne .LBB1_18 +; CHECK-NEXT: # %bb.9: # %inner_loop_body +; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=2 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB1_9 -; CHECK-NEXT: # %bb.11: # %if.end96.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 -; CHECK-NEXT: cmpl $3, %r13d -; CHECK-NEXT: jae .LBB1_20 -; CHECK-NEXT: # %bb.12: # %if.end287.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: je .LBB1_8 +; CHECK-NEXT: # %bb.10: # %if.end96.i +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 +; CHECK-NEXT: cmpl $3, %r12d +; CHECK-NEXT: jae .LBB1_11 +; CHECK-NEXT: # %bb.13: # %if.end287.i +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: jne .LBB1_8 -; CHECK-NEXT: # %bb.13: # %if.end308.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jne .LBB1_4 +; CHECK-NEXT: # %bb.14: # %if.end308.i +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB1_7 -; CHECK-NEXT: # %bb.14: # %if.end335.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: je .LBB1_15 +; CHECK-NEXT: # %bb.16: # %if.end335.i +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne .LBB1_8 -; CHECK-NEXT: # %bb.15: # %merge_other -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jne .LBB1_4 +; CHECK-NEXT: # %bb.17: # %merge_other +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: jmp .LBB1_8 -; CHECK-NEXT: .LBB1_23: -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: jmp .LBB1_24 -; CHECK-NEXT: .LBB1_16: # %while.cond.us1412.i +; CHECK-NEXT: jmp .LBB1_4 +; CHECK-NEXT: .LBB1_5: # %while.cond.us1412.i ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: jne .LBB1_18 -; CHECK-NEXT: # %bb.17: # %while.cond.us1412.i +; CHECK-NEXT: jne .LBB1_7 +; CHECK-NEXT: # %bb.6: # %while.cond.us1412.i ; CHECK-NEXT: decb %cl -; CHECK-NEXT: jne .LBB1_24 -; CHECK-NEXT: .LBB1_18: # %if.end41.us1436.i -; CHECK-NEXT: .LBB1_20: # %if.then99.i +; CHECK-NEXT: jne .LBB1_12 +; CHECK-NEXT: .LBB1_7: # %if.end41.us1436.i +; CHECK-NEXT: .LBB1_11: # %if.then99.i ; CHECK-NEXT: movq .str.6@GOTPCREL(%rip), %rdi ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq cli_dbgmsg@PLT -; CHECK-NEXT: .LBB1_24: # %cleanup +; CHECK-NEXT: .LBB1_12: # %cleanup ; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB1_25: # %wunpsect.exit.thread.loopexit389 -; CHECK-NEXT: .LBB1_26: # %wunpsect.exit.thread.loopexit391 +; CHECK-NEXT: .LBB1_18: # %wunpsect.exit.thread.loopexit389 +; CHECK-NEXT: .LBB1_19: # %wunpsect.exit.thread.loopexit391 entry: %0 = load i32, ptr undef, align 4 %mul = shl nsw i32 %0, 2 diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll index d54110d1fa811..d9ab2f7d1f5fb 100644 --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -300,10 +300,9 @@ define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind { ; CHECK-NEXT: cmpl $23, %ecx ; CHECK-NEXT: jne .LBB3_9 ; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4 -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB3_9 -; CHECK-NEXT: # %bb.17: # %lvalue_p.exit4 ; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl entry: %tmp4 = load i8, ptr null, align 8 ; [#uses=3] switch i8 %tmp4, label %bb3 [ diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll index ed43cabbdaee1..953a0d65c5386 100644 --- a/llvm/test/CodeGen/X86/test-shrink-bug.ll +++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll @@ -48,37 +48,39 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) { ; CHECK-X86: ## %bb.0: ; CHECK-X86-NEXT: subl $12, %esp ; CHECK-X86-NEXT: .cfi_def_cfa_offset 16 -; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-X86-NEXT: cmpb $123, {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: sete %al -; CHECK-X86-NEXT: testl $263, %ecx ## imm = 0x107 -; CHECK-X86-NEXT: je LBB1_3 -; CHECK-X86-NEXT: ## %bb.1: -; CHECK-X86-NEXT: testb %al, %al -; CHECK-X86-NEXT: jne LBB1_3 -; CHECK-X86-NEXT: ## %bb.2: ## %no +; CHECK-X86-NEXT: setne %cl +; CHECK-X86-NEXT: testl $263, %eax ## imm = 0x107 +; CHECK-X86-NEXT: setne %al +; CHECK-X86-NEXT: testb %cl, %al +; CHECK-X86-NEXT: jne LBB1_2 +; CHECK-X86-NEXT: ## %bb.1: ## %yes +; CHECK-X86-NEXT: addl $12, %esp +; CHECK-X86-NEXT: retl +; CHECK-X86-NEXT: LBB1_2: ## %no ; CHECK-X86-NEXT: calll _bar -; CHECK-X86-NEXT: LBB1_3: ## %yes ; CHECK-X86-NEXT: addl $12, %esp ; CHECK-X86-NEXT: retl ; ; CHECK-X64-LABEL: fail: ; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 -; CHECK-X64-NEXT: je .LBB1_3 -; CHECK-X64-NEXT: # %bb.1: ; CHECK-X64-NEXT: pslld $8, %xmm0 ; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax -; CHECK-X64-NEXT: testb $1, %al -; CHECK-X64-NEXT: jne .LBB1_3 -; CHECK-X64-NEXT: # %bb.2: # %no +; CHECK-X64-NEXT: xorb $1, %al +; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 +; CHECK-X64-NEXT: setne %cl +; CHECK-X64-NEXT: testb %al, %cl +; CHECK-X64-NEXT: jne .LBB1_2 +; CHECK-X64-NEXT: # %bb.1: # %yes +; CHECK-X64-NEXT: retq +; CHECK-X64-NEXT: .LBB1_2: # %no ; CHECK-X64-NEXT: pushq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-X64-NEXT: callq bar@PLT ; CHECK-X64-NEXT: popq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X64-NEXT: .LBB1_3: # %yes ; CHECK-X64-NEXT: retq %1 = icmp eq <2 x i8> %b, %2 = extractelement <2 x i1> %1, i32 1 diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index b9e490888d9bf..3349d31cad4b9 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -181,38 +181,40 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; CHECK-LABEL: segmentedStack: ; CHECK: ## %bb.0: ; CHECK-NEXT: cmpq %gs:816, %rsp -; CHECK-NEXT: jbe LBB3_7 +; CHECK-NEXT: jbe LBB3_6 ; CHECK-NEXT: LBB3_1: ## %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: sete %al -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: je LBB3_5 -; CHECK-NEXT: ## %bb.2: ## %entry -; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je LBB3_5 -; CHECK-NEXT: ## %bb.3: ## %if.end4.i +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: jne LBB3_4 +; CHECK-NEXT: ## %bb.2: ## %if.end4.i ; CHECK-NEXT: movq 8(%rdi), %rdx ; CHECK-NEXT: cmpq 8(%rsi), %rdx -; CHECK-NEXT: jne LBB3_6 -; CHECK-NEXT: ## %bb.4: ## %land.rhs.i.i +; CHECK-NEXT: jne LBB3_5 +; CHECK-NEXT: ## %bb.3: ## %land.rhs.i.i ; CHECK-NEXT: movq (%rsi), %rsi ; CHECK-NEXT: movq (%rdi), %rdi ; CHECK-NEXT: callq _memcmp ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: sete %al -; CHECK-NEXT: LBB3_5: ## %__go_ptr_strings_equal.exit +; CHECK-NEXT: LBB3_4: ## %__go_ptr_strings_equal.exit ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_6: +; CHECK-NEXT: LBB3_5: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_7: +; CHECK-NEXT: LBB3_6: ; CHECK-NEXT: movl $8, %r10d ; CHECK-NEXT: movl $0, %r11d ; CHECK-NEXT: callq ___morestack @@ -222,41 +224,43 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; NOCOMPACTUNWIND-LABEL: segmentedStack: ; NOCOMPACTUNWIND: # %bb.0: ; NOCOMPACTUNWIND-NEXT: cmpq %fs:112, %rsp -; NOCOMPACTUNWIND-NEXT: jbe .LBB3_7 +; NOCOMPACTUNWIND-NEXT: jbe .LBB3_6 ; NOCOMPACTUNWIND-NEXT: .LBB3_1: # %entry ; NOCOMPACTUNWIND-NEXT: pushq %rax ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 +; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi +; NOCOMPACTUNWIND-NEXT: sete %al +; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi +; NOCOMPACTUNWIND-NEXT: sete %cl +; NOCOMPACTUNWIND-NEXT: orb %al, %cl ; NOCOMPACTUNWIND-NEXT: movq %rdi, %rax ; NOCOMPACTUNWIND-NEXT: orq %rsi, %rax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi -; NOCOMPACTUNWIND-NEXT: je .LBB3_5 -; NOCOMPACTUNWIND-NEXT: # %bb.2: # %entry -; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi -; NOCOMPACTUNWIND-NEXT: je .LBB3_5 -; NOCOMPACTUNWIND-NEXT: # %bb.3: # %if.end4.i +; NOCOMPACTUNWIND-NEXT: testb %cl, %cl +; NOCOMPACTUNWIND-NEXT: jne .LBB3_4 +; NOCOMPACTUNWIND-NEXT: # %bb.2: # %if.end4.i ; NOCOMPACTUNWIND-NEXT: movq 8(%rdi), %rdx ; NOCOMPACTUNWIND-NEXT: cmpq 8(%rsi), %rdx -; NOCOMPACTUNWIND-NEXT: jne .LBB3_6 -; NOCOMPACTUNWIND-NEXT: # %bb.4: # %land.rhs.i.i +; NOCOMPACTUNWIND-NEXT: jne .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.3: # %land.rhs.i.i ; NOCOMPACTUNWIND-NEXT: movq (%rsi), %rsi ; NOCOMPACTUNWIND-NEXT: movq (%rdi), %rdi ; NOCOMPACTUNWIND-NEXT: callq memcmp@PLT ; NOCOMPACTUNWIND-NEXT: testl %eax, %eax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: .LBB3_5: # %__go_ptr_strings_equal.exit +; NOCOMPACTUNWIND-NEXT: .LBB3_4: # %__go_ptr_strings_equal.exit ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_6: +; NOCOMPACTUNWIND-NEXT: .LBB3_5: ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 ; NOCOMPACTUNWIND-NEXT: xorl %eax, %eax ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_7: +; NOCOMPACTUNWIND-NEXT: .LBB3_6: ; NOCOMPACTUNWIND-NEXT: movl $8, %r10d ; NOCOMPACTUNWIND-NEXT: movl $0, %r11d ; NOCOMPACTUNWIND-NEXT: callq __morestack