diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 61615cb0f7b301..c892a3936cd6da 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4965,6 +4965,11 @@ class TargetLowering : public TargetLoweringBase { /// Memory, Other, Unknown. TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; + /// The register may be folded. This is used if the constraint is "rm", + /// where we prefer using a register, but can fall back to a memory slot + /// under register pressure. + bool MayFoldRegister = false; + /// If this is the result output operand or a clobber, this is null, /// otherwise it is the incoming operand to the CallInst. This gets /// modified as the asm is processed. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8450553743074c..f055f2d3b23be0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1018,7 +1018,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, } void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, - unsigned MatchingIdx, const SDLoc &dl, + unsigned MatchingIdx, + bool MayFoldRegister, const SDLoc &dl, SelectionDAG &DAG, std::vector &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -1034,7 +1035,9 @@ void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, // from the def. const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); + Flag.setRegClass(RC->getID()); + Flag.setRegMayBeFolded(MayFoldRegister); } SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); @@ -10063,8 +10066,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, AsmNodeOperands.push_back(OpInfo.CallOperand); } else { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass, and a target-defined fashion for - // C_Immediate/C_Other). Find a register that we can use. + // C_RegisterClass, and a target-defined fashion for C_Immediate / + // C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( Call, "couldn't allocate output register for constraint '" + @@ -10080,7 +10083,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, OpInfo.AssignedRegs.AddInlineAsmOperands( OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber : InlineAsm::Kind::RegDef, - false, 0, getCurSDLoc(), DAG, AsmNodeOperands); + false, 0, OpInfo.MayFoldRegister, getCurSDLoc(), DAG, + AsmNodeOperands); } break; @@ -10122,9 +10126,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call); - MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, true, - OpInfo.getMatchedOperand(), dl, DAG, - AsmNodeOperands); + MatchedRegs.AddInlineAsmOperands( + InlineAsm::Kind::RegUse, true, OpInfo.getMatchedOperand(), + OpInfo.MayFoldRegister, dl, DAG, AsmNodeOperands); break; } @@ -10256,7 +10260,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, &Call); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, false, - 0, dl, DAG, AsmNodeOperands); + 0, OpInfo.MayFoldRegister, dl, + DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: @@ -10264,8 +10269,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::Clobber, - false, 0, getCurSDLoc(), DAG, - AsmNodeOperands); + false, 0, false, getCurSDLoc(), + DAG, AsmNodeOperands); break; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 3f8a3e7ffb65bb..0b6251b190ee19 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -787,8 +787,9 @@ struct RegsForValue { /// code marker, matching input operand index (if applicable), and includes /// the number of values added into it. void AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, - unsigned MatchingIdx, const SDLoc &dl, - SelectionDAG &DAG, std::vector &Ops) const; + unsigned MatchingIdx, bool MayFoldRegister, + const SDLoc &dl, SelectionDAG &DAG, + std::vector &Ops) const; /// Check if the total RegCount is greater than one. bool occupiesMultipleRegs() const { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 758b3a5fc526e7..3dcb3bdefbbd59 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -33,6 +34,7 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/Triple.h" #include using namespace llvm; @@ -5706,6 +5708,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, unsigned ResNo = 0; // ResNo - The result number of the next output. unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number. + const Triple &T = getTargetMachine().getTargetTriple(); for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { ConstraintOperands.emplace_back(std::move(CI)); AsmOperandInfo &OpInfo = ConstraintOperands.back(); @@ -5716,6 +5719,16 @@ TargetLowering::ParseConstraints(const DataLayout &DL, OpInfo.ConstraintVT = MVT::Other; + // Special treatment for all platforms (currently only x86) that can fold a + // register into a spill. This is used for the "rm" constraint, where we + // would vastly prefer to use 'r' over 'm', but can't because of LLVM's + // architecture picks the most "conservative" constraint to ensure that (in + // the case of "rm") register pressure cause bad things to happen. + if (T.isX86() && !OpInfo.hasMatchingInput() && OpInfo.Codes.size() == 2 && + llvm::is_contained(OpInfo.Codes, "r") && + llvm::is_contained(OpInfo.Codes, "m")) + OpInfo.MayFoldRegister = true; + // Compute the value type for each operand. switch (OpInfo.Type) { case InlineAsm::isOutput: @@ -5995,7 +6008,12 @@ TargetLowering::ConstraintWeight /// 1) If there is an 'other' constraint, and if the operand is valid for /// that constraint, use it. This makes us take advantage of 'i' /// constraints when available. -/// 2) Otherwise, pick the most general constraint present. This prefers +/// 2) Special processing is done for the "rm" constraint. If specified, we +/// opt for the 'r' constraint, but mark the operand as being "foldable." +/// In the face of register exhaustion, the register allocator is free to +/// choose to use a stack slot. This only applies to the greedy and default +/// register allocators. FIXME: Support other allocators (fast?). +/// 3) Otherwise, pick the most general constraint present. This prefers /// 'm' over 'r', for example. /// TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences( @@ -6003,6 +6021,16 @@ TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences( ConstraintGroup Ret; Ret.reserve(OpInfo.Codes.size()); + + // If we can fold the register (i.e. it has an "rm" constraint), opt for the + // 'r' constraint, and allow the register allocator to spill if need be. + // Applies only to the greedy and default register allocators. + if (OpInfo.MayFoldRegister) { + Ret.emplace_back(ConstraintPair("r", getConstraintType("r"))); + Ret.emplace_back(ConstraintPair("m", getConstraintType("m"))); + return Ret; + } + for (StringRef Code : OpInfo.Codes) { TargetLowering::ConstraintType CType = getConstraintType(Code); diff --git a/llvm/test/CodeGen/X86/asm-constraints-rm.ll b/llvm/test/CodeGen/X86/asm-constraints-rm.ll new file mode 100644 index 00000000000000..6031eb7b22e6d9 --- /dev/null +++ b/llvm/test/CodeGen/X86/asm-constraints-rm.ll @@ -0,0 +1,363 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter "^\t#" --version 4 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -regalloc=greedy < %s | FileCheck --check-prefix=GREEDY-X86_64 %s +; RUN: llc -mtriple=i386-unknown-linux-gnu -regalloc=greedy < %s | FileCheck --check-prefix=GREEDY-I386 %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -regalloc=basic < %s | FileCheck --check-prefix=BASIC-X86_64 %s +; RUN: llc -mtriple=i386-unknown-linux-gnu -regalloc=basic < %s | FileCheck --check-prefix=BASIC-I386 %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -regalloc=fast < %s | FileCheck --check-prefix=FAST-X86_64 %s +; RUN: llc -mtriple=i386-unknown-linux-gnu -regalloc=fast < %s | FileCheck --check-prefix=FAST-I386 %s + +; The Greedy register allocator should use registers when there isn't register +; pressure. + +define dso_local i32 @test1(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test1: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # 'rm' input no pressure -> %eax %ecx +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test1: +; GREEDY-I386: #APP +; GREEDY-I386: # 'rm' input no pressure -> %ecx %edx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test1: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'rm' input no pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test1: +; BASIC-I386: #APP +; BASIC-I386: # 'rm' input no pressure -> {{[0-9]+}}(%esp) (%esp) +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test1: +; FAST-X86_64: #APP +; FAST-X86_64: # 'rm' input no pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test1: +; FAST-I386: #APP +; FAST-I386: # 'rm' input no pressure -> {{[0-9]+}}(%esp) (%esp) +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %i = load i32, ptr %b, align 4 + %d = getelementptr inbounds i8, ptr %ptr, i64 12 + %i1 = load i32, ptr %d, align 4 + tail call void asm sideeffect "# 'rm' input no pressure -> $0 $1", "rm,rm,~{dirflag},~{fpsr},~{flags}"(i32 %i, i32 %i1) #1 + %i2 = load i32, ptr %ptr, align 4 + ret i32 %i2 +} + +define dso_local i32 @test2(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test2: +; GREEDY-X86_64: #APP # 8-byte Folded Reload +; GREEDY-X86_64: # 'rm' input pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test2: +; GREEDY-I386: #APP # 8-byte Folded Reload +; GREEDY-I386: # 'rm' input pressure -> {{[0-9]+}}(%esp) (%esp) +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test2: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'rm' input pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test2: +; BASIC-I386: #APP +; BASIC-I386: # 'rm' input pressure -> {{[0-9]+}}(%esp) (%esp) +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test2: +; FAST-X86_64: #APP +; FAST-X86_64: # 'rm' input pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp) +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test2: +; FAST-I386: #APP +; FAST-I386: # 'rm' input pressure -> {{[0-9]+}}(%esp) {{[0-9]+}}(%esp) +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %i = load i32, ptr %b, align 4 + %d = getelementptr inbounds i8, ptr %ptr, i64 12 + %i1 = load i32, ptr %d, align 4 + tail call void asm sideeffect "# 'rm' input pressure -> $0 $1", "rm,rm,~{ax},~{cx},~{dx},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{bx},~{bp},~{r14},~{r15},~{r12},~{r13},~{dirflag},~{fpsr},~{flags}"(i32 %i, i32 %i1) #1 + %i2 = load i32, ptr %ptr, align 4 + ret i32 %i2 +} + +define dso_local i32 @test3(ptr noundef %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test3: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # 'rm' output no pressure -> %eax %ecx +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test3: +; GREEDY-I386: #APP +; GREEDY-I386: # 'rm' output no pressure -> %ecx %edx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test3: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'rm' output no pressure -> 4(%rdi) 12(%rdi) +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test3: +; BASIC-I386: #APP +; BASIC-I386: # 'rm' output no pressure -> 4(%eax) 12(%eax) +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test3: +; FAST-X86_64: #APP +; FAST-X86_64: # 'rm' output no pressure -> 4(%rdi) 12(%rdi) +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test3: +; FAST-I386: #APP +; FAST-I386: # 'rm' output no pressure -> 4(%eax) 12(%eax) +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %d = getelementptr inbounds i8, ptr %ptr, i64 12 + tail call void asm sideeffect "# 'rm' output no pressure -> $0 $1", "=*rm,=*rm,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %d) #1 + %i = load i32, ptr %ptr, align 4 + ret i32 %i +} + +define dso_local i32 @test4(ptr noundef %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test4: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # tied 'rm' no pressure -> %eax %ecx %eax %ecx +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test4: +; GREEDY-I386: #APP +; GREEDY-I386: # tied 'rm' no pressure -> %ecx %edx %ecx %edx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test4: +; BASIC-X86_64: #APP +; BASIC-X86_64: # tied 'rm' no pressure -> %eax %ecx %eax %ecx +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test4: +; BASIC-I386: #APP +; BASIC-I386: # tied 'rm' no pressure -> %eax %ecx %eax %ecx +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test4: +; FAST-X86_64: #APP +; FAST-X86_64: # tied 'rm' no pressure -> %ecx %eax %ecx %eax +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test4: +; FAST-I386: #APP +; FAST-I386: # tied 'rm' no pressure -> %edx %ecx %edx %ecx +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %i = load i32, ptr %b, align 4 + %d = getelementptr inbounds i8, ptr %ptr, i64 12 + %i1 = load i32, ptr %d, align 4 + tail call void asm sideeffect "# tied 'rm' no pressure -> $0 $1 $2 $3", "=*rm,=*rm,0,1,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %d, i32 %i, i32 %i1) #1 + %i2 = load i32, ptr %ptr, align 4 + ret i32 %i2 +} + +define dso_local i32 @test5(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test5: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # 'rm' input -> %eax +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test5: +; GREEDY-I386: #APP +; GREEDY-I386: # 'rm' input -> %ecx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test5: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'rm' input -> -{{[0-9]+}}(%rsp) +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test5: +; BASIC-I386: #APP +; BASIC-I386: # 'rm' input -> (%esp) +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test5: +; FAST-X86_64: #APP +; FAST-X86_64: # 'rm' input -> -{{[0-9]+}}(%rsp) +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test5: +; FAST-I386: #APP +; FAST-I386: # 'rm' input -> (%esp) +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %i = load i32, ptr %b, align 4 + tail call void asm sideeffect "# 'rm' input -> $0", "rm,~{dirflag},~{fpsr},~{flags}"(i32 %i) #1 + %i1 = load i32, ptr %ptr, align 4 + ret i32 %i1 +} + +define dso_local i32 @test6(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test6: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # 'rm' and 'r' input -> %eax %ecx +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test6: +; GREEDY-I386: #APP +; GREEDY-I386: # 'rm' and 'r' input -> %ecx %edx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test6: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'rm' and 'r' input -> -{{[0-9]+}}(%rsp) %ecx +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test6: +; BASIC-I386: #APP +; BASIC-I386: # 'rm' and 'r' input -> (%esp) %ecx +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test6: +; FAST-X86_64: #APP +; FAST-X86_64: # 'rm' and 'r' input -> -{{[0-9]+}}(%rsp) %eax +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test6: +; FAST-I386: #APP +; FAST-I386: # 'rm' and 'r' input -> (%esp) %ecx +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %i = load i32, ptr %b, align 4 + %d = getelementptr inbounds i8, ptr %ptr, i64 12 + %i1 = load i32, ptr %d, align 4 + tail call void asm sideeffect "# 'rm' and 'r' input -> $0 $1", "rm,r,~{dirflag},~{fpsr},~{flags}"(i32 %i, i32 %i1) #1 + %i2 = load i32, ptr %ptr, align 4 + ret i32 %i2 +} + +define dso_local i32 @test7(ptr noundef %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test7: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # 'rm' output -> %eax +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test7: +; GREEDY-I386: #APP +; GREEDY-I386: # 'rm' output -> %ecx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test7: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'rm' output -> 4(%rdi) +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test7: +; BASIC-I386: #APP +; BASIC-I386: # 'rm' output -> 4(%eax) +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test7: +; FAST-X86_64: #APP +; FAST-X86_64: # 'rm' output -> 4(%rdi) +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test7: +; FAST-I386: #APP +; FAST-I386: # 'rm' output -> 4(%eax) +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + tail call void asm sideeffect "# 'rm' output -> $0", "=*rm,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %b) #1 + %i = load i32, ptr %ptr, align 4 + ret i32 %i +} + +define dso_local i32 @test8(ptr noundef %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test8: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # 'rm' tied -> %eax +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test8: +; GREEDY-I386: #APP +; GREEDY-I386: # 'rm' tied -> %ecx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test8: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'rm' tied -> %eax +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test8: +; BASIC-I386: #APP +; BASIC-I386: # 'rm' tied -> %eax +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test8: +; FAST-X86_64: #APP +; FAST-X86_64: # 'rm' tied -> %eax +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test8: +; FAST-I386: #APP +; FAST-I386: # 'rm' tied -> %ecx +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %i = load i32, ptr %b, align 4 + tail call void asm sideeffect "# 'rm' tied -> $0", "=*rm,0,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %b, i32 %i) #1 + %i1 = load i32, ptr %ptr, align 4 + ret i32 %i1 +} + +define dso_local i32 @test9(ptr nocapture noundef %ptr) local_unnamed_addr #0 { +; GREEDY-X86_64-LABEL: test9: +; GREEDY-X86_64: #APP +; GREEDY-X86_64: # 'r' output == input location -> %eax +; GREEDY-X86_64: #NO_APP +; +; GREEDY-I386-LABEL: test9: +; GREEDY-I386: #APP +; GREEDY-I386: # 'r' output == input location -> %ecx +; GREEDY-I386: #NO_APP +; +; BASIC-X86_64-LABEL: test9: +; BASIC-X86_64: #APP +; BASIC-X86_64: # 'r' output == input location -> %eax +; BASIC-X86_64: #NO_APP +; +; BASIC-I386-LABEL: test9: +; BASIC-I386: #APP +; BASIC-I386: # 'r' output == input location -> %eax +; BASIC-I386: #NO_APP +; +; FAST-X86_64-LABEL: test9: +; FAST-X86_64: #APP +; FAST-X86_64: # 'r' output == input location -> %eax +; FAST-X86_64: #NO_APP +; +; FAST-I386-LABEL: test9: +; FAST-I386: #APP +; FAST-I386: # 'r' output == input location -> %ecx +; FAST-I386: #NO_APP +entry: + %b = getelementptr inbounds i8, ptr %ptr, i64 4 + %i = load i32, ptr %b, align 4 + %i1 = tail call i32 asm sideeffect "# 'r' output == input location -> $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %i) #1 + store i32 %i1, ptr %b, align 4 + %i2 = load i32, ptr %ptr, align 4 + ret i32 %i2 +} + +attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll b/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll index be4d1c29332f77..a322bd3003a58b 100644 --- a/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll +++ b/llvm/test/CodeGen/X86/inlineasm-sched-bug.ll @@ -6,16 +6,13 @@ define i32 @foo(i32 %treemap) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: negl %ecx ; CHECK-NEXT: andl %eax, %ecx -; CHECK-NEXT: movl %ecx, (%esp) ; CHECK-NEXT: #APP -; CHECK-NEXT: bsfl (%esp), %eax +; CHECK-NEXT: bsfl %ecx, %eax ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: popl %ecx ; CHECK-NEXT: retl entry: %sub = sub i32 0, %treemap