diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 628b8f5040d99..7ba90f6c1bea0 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -1454,6 +1454,7 @@ class DarwinX86AsmBackend : public X86AsmBackend { unsigned StackAdjust = 0; unsigned StackSize = 0; unsigned NumDefCFAOffsets = 0; + int MinAbsOffset = std::numeric_limits::max(); for (unsigned i = 0, e = Instrs.size(); i != e; ++i) { const MCCFIInstruction &Inst = Instrs[i]; @@ -1482,6 +1483,7 @@ class DarwinX86AsmBackend : public X86AsmBackend { memset(SavedRegs, 0, sizeof(SavedRegs)); StackAdjust = 0; SavedRegIdx = 0; + MinAbsOffset = std::numeric_limits::max(); InstrOffset += MoveInstrSize; break; } @@ -1525,6 +1527,7 @@ class DarwinX86AsmBackend : public X86AsmBackend { unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); SavedRegs[SavedRegIdx++] = Reg; StackAdjust += OffsetSize; + MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); InstrOffset += PushInstrSize(Reg); break; } @@ -1538,6 +1541,11 @@ class DarwinX86AsmBackend : public X86AsmBackend { // Offset was too big for a compact unwind encoding. return CU::UNWIND_MODE_DWARF; + // We don't attempt to track a real StackAdjust, so if the saved registers + // aren't adjacent to rbp we can't cope. + if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) + return CU::UNWIND_MODE_DWARF; + // Get the encoding of the saved registers when we have a frame pointer. uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index cf8d5d6c5b773..8a6b9e75efe03 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3068,6 +3068,7 @@ bool X86FastISel::fastLowerArguments() { Arg.hasAttribute(Attribute::InReg) || Arg.hasAttribute(Attribute::StructRet) || Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftAsync) || Arg.hasAttribute(Attribute::SwiftError) || Arg.hasAttribute(Attribute::Nest)) return false; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 3c093661680a9..8ddcd52795dd7 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -409,7 +409,12 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, return 0; PI = MBB.erase(PI); - if (PI != MBB.end() && PI->isCFIInstruction()) PI = MBB.erase(PI); + if (PI != MBB.end() && PI->isCFIInstruction()) { + auto CIs = MBB.getParent()->getFrameInstructions(); + MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()]; + if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset) + PI = MBB.erase(PI); + } if (!doMergeWithPrevious) MBBI = skipDebugInstructionsForward(PI, MBB.end()); @@ -1356,6 +1361,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, STI.getTargetLowering()->hasStackProbeSymbol(MF); unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); + if (HasFP && X86FI->hasSwiftAsyncContext()) { + BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), + MachineFramePtr) + .addUse(MachineFramePtr) + .addImm(60) + .setMIFlag(MachineInstr::FrameSetup); + } + // Re-align the stack on 64-bit if the x86-interrupt calling convention is // used and an error code was pushed, since the x86-64 ABI requires a 16-byte // stack alignment. @@ -1470,11 +1483,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (!IsWin64Prologue && !IsFunclet) { // Update EBP with the new base value. - BuildMI(MBB, MBBI, DL, - TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), - FramePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); + if (!X86FI->hasSwiftAsyncContext()) { + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), + FramePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // Before we update the live frame pointer we have to ensure there's a + // valid (or null) asynchronous context in its slot just before FP in + // the frame record, so store it now. + const auto &Attrs = MF.getFunction().getAttributes(); + + if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { + // We have an initial context in r11, store it just before the frame + // pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) + .addReg(X86::R14) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // No initial context, store null so that there's no pointer that + // could be misused. + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8)) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + } + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr) + .addUse(X86::RSP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(8) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) + .addUse(X86::RSP) + .addImm(8) + .setMIFlag(MachineInstr::FrameSetup); + } if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. @@ -1979,10 +2024,26 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // AfterPop is the position to insert .cfi_restore. MachineBasicBlock::iterator AfterPop = MBBI; if (HasFP) { + if (X86FI->hasSwiftAsyncContext()) { + // Discard the context. + int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); + emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true); + } // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) .setMIFlag(MachineInstr::FrameDestroy); + + // We need to reset FP to its untagged state on return. Bit 60 is currently + // used to show the presence of an extended frame. + if (X86FI->hasSwiftAsyncContext()) { + BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), + MachineFramePtr) + .addUse(MachineFramePtr) + .addImm(60) + .setMIFlag(MachineInstr::FrameDestroy); + } + if (NeedsDwarfCFI) { unsigned DwarfStackPtr = TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); @@ -2007,7 +2068,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && - (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy))) + (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) break; FirstCSPop = PI; } @@ -2039,6 +2102,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, uint64_t LEAAmount = IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; + if (X86FI->hasSwiftAsyncContext()) + LEAAmount -= 16; + // There are only two legal forms of epilogue: // - add SEHAllocationSize, %rsp // - lea SEHAllocationSize(%FramePtr), %rsp @@ -2367,6 +2433,14 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( SpillSlotOffset -= SlotSize; MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + // The async context lives directly before the frame pointer, and we + // allocate a second slot to preserve stack alignment. + if (X86FI->hasSwiftAsyncContext()) { + SpillSlotOffset -= SlotSize; + MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + SpillSlotOffset -= SlotSize; + } + // Since emitPrologue and emitEpilogue will handle spilling and restoring of // the frame register, we can delete it from CSI list and not have to worry // about avoiding it later. @@ -3267,7 +3341,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { assert(MBB.getParent() && "Block is not attached to a function!"); const MachineFunction &MF = *MBB.getParent(); - return !TRI->hasStackRealignment(MF) || !MBB.isLiveIn(X86::EFLAGS); + if (!MBB.isLiveIn(X86::EFLAGS)) + return true; + + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); } bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { @@ -3280,6 +3358,12 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) return false; + // Swift async context epilogue has a BTR instruction that clobbers parts of + // EFLAGS. + const MachineFunction &MF = *MBB.getParent(); + if (MF.getInfo()->hasSwiftAsyncContext()) + return !flagsNeedToBePreservedBeforeTheTerminators(MBB); + if (canUseLEAForSPInEpilogue(*MBB.getParent())) return true; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66df93319b6a7..4057e7817fcce 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3747,6 +3747,20 @@ SDValue X86TargetLowering::LowerFormalArguments( } for (unsigned I = 0, E = Ins.size(); I != E; ++I) { + if (Ins[I].Flags.isSwiftAsync()) { + auto X86FI = MF.getInfo(); + if (Subtarget.is64Bit()) + X86FI->setHasSwiftAsyncContext(true); + else { + int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false); + X86FI->setSwiftAsyncContextFrameIdx(FI); + SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I], + DAG.getFrameIndex(FI, MVT::i32), + MachinePointerInfo::getFixedStack(MF, FI)); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); + } + } + // Swift calling convention does not require we copy the sret argument // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) @@ -25856,7 +25870,27 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } - + case Intrinsic::swift_async_context_addr: { + auto &MF = DAG.getMachineFunction(); + auto X86FI = MF.getInfo(); + if (Subtarget.is64Bit()) { + MF.getFrameInfo().setFrameAddressIsTaken(true); + X86FI->setHasSwiftAsyncContext(true); + return SDValue( + DAG.getMachineNode( + X86::SUB64ri8, dl, MVT::i64, + DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), + DAG.getTargetConstant(8, dl, MVT::i32)), + 0); + } else { + // 32-bit so no special extended frame, create or reuse an existing stack + // slot. + if (!X86FI->getSwiftAsyncContextFrameIdx()) + X86FI->setSwiftAsyncContextFrameIdx( + MF.getFrameInfo().CreateStackObject(4, Align(4), false)); + return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); + } + } case Intrinsic::x86_avx512_vp2intersect_q_512: case Intrinsic::x86_avx512_vp2intersect_q_256: case Intrinsic::x86_avx512_vp2intersect_q_128: diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index ecb86bb9e8c1f..46d2e2a66fd62 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -108,6 +108,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// True if this function has any preallocated calls. bool HasPreallocatedCall = false; + /// Whether this function has an extended frame record [Ctx, RBP, Return + /// addr]. If so, bit 60 of the in-memory frame pointer will be 1 to enable + /// other tools to detect the extended record. + bool HasSwiftAsyncContext = false; + + Optional SwiftAsyncContextFrameIdx; + ValueMap PreallocatedIds; SmallVector PreallocatedStackSizes; SmallVector, 0> PreallocatedArgOffsets; @@ -197,6 +204,14 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { bool hasPreallocatedCall() const { return HasPreallocatedCall; } void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; } + bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; } + void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; } + + Optional getSwiftAsyncContextFrameIdx() const { + return SwiftAsyncContextFrameIdx; + } + void setSwiftAsyncContextFrameIdx(int v) { SwiftAsyncContextFrameIdx = v; } + size_t getPreallocatedIdForCallSite(const Value *CS) { auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()}); if (Insert.second) { diff --git a/llvm/test/CodeGen/X86/swift-async-reg.ll b/llvm/test/CodeGen/X86/swift-async-reg.ll new file mode 100644 index 0000000000000..59b41cc19a51d --- /dev/null +++ b/llvm/test/CodeGen/X86/swift-async-reg.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - -fast-isel | FileCheck %s + +define i8* @argument(i8* swiftasync %in) { +; CHECK-LABEL: argument: +; CHECK: movq %r14, %rax + + ret i8* %in +} + +define void @call(i8* %in) { +; CHECK-LABEL: call: +; CHECK: movq %rdi, %r14 + + call i8* @argument(i8* swiftasync %in) + ret void +} diff --git a/llvm/test/CodeGen/X86/swift-async.ll b/llvm/test/CodeGen/X86/swift-async.ll new file mode 100644 index 0000000000000..9716fe7364896 --- /dev/null +++ b/llvm/test/CodeGen/X86/swift-async.ll @@ -0,0 +1,111 @@ +; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s +; RUN: llc -mtriple=i686-apple-darwin %s -o - | FileCheck %s --check-prefix=CHECK-32 + + +define void @simple(i8* swiftasync %ctx) "frame-pointer"="all" { +; CHECK-LABEL: simple: +; CHECK: btsq $60, %rbp +; CHECK: pushq %rbp +; CHECK: pushq %r14 +; CHECK: leaq 8(%rsp), %rbp +; CHECK: pushq +; [...] + +; CHECK: addq $16, %rsp +; CHECK: popq %rbp +; CHECK: btrq $60, %rbp +; CHECK: retq + +; CHECK-32-LABEL: simple: +; CHECK-32: movl 8(%ebp), [[TMP:%.*]] +; CHECK-32: movl [[TMP]], {{.*}}(%ebp) + + ret void +} + +define void @more_csrs(i8* swiftasync %ctx) "frame-pointer"="all" { +; CHECK-LABEL: more_csrs: +; CHECK: btsq $60, %rbp +; CHECK: pushq %rbp +; CHECK: .cfi_offset %rbp, -16 +; CHECK: pushq %r14 +; CHECK: leaq 8(%rsp), %rbp +; CHECK: subq $8, %rsp +; CHECK: pushq %r15 +; CHECK: .cfi_offset %r15, -40 + +; [...] + +; CHECK: popq %r15 +; CHECK: addq $16, %rsp +; CHECK: popq %rbp +; CHECK: btrq $60, %rbp +; CHECK: retq + call void asm sideeffect "", "~{r15}"() + ret void +} + +define void @locals(i8* swiftasync %ctx) "frame-pointer"="all" { +; CHECK-LABEL: locals: +; CHECK: btsq $60, %rbp +; CHECK: pushq %rbp +; CHECK: .cfi_def_cfa_offset 16 +; CHECK: .cfi_offset %rbp, -16 +; CHECK: pushq %r14 +; CHECK: leaq 8(%rsp), %rbp +; CHECK: .cfi_def_cfa_register %rbp +; CHECK: subq $56, %rsp + +; CHECK: leaq -48(%rbp), %rdi +; CHECK: callq _bar + +; CHECK: addq $48, %rsp +; CHECK: addq $16, %rsp +; CHECK: popq %rbp +; CHECK: btrq $60, %rbp +; CHECK: retq + + %var = alloca i32, i32 10 + call void @bar(i32* %var) + ret void +} + +define void @use_input_context(i8* swiftasync %ctx, i8** %ptr) "frame-pointer"="all" { +; CHECK-LABEL: use_input_context: +; CHECK: movq %r14, (%rdi) + + store i8* %ctx, i8** %ptr + ret void +} + +define i8** @context_in_func() "frame-pointer"="non-leaf" { +; CHECK-LABEL: context_in_func: +; CHECK: leaq -8(%rbp), %rax + +; CHECK-32-LABEL: context_in_func +; CHECK-32: movl %esp, %eax + + %ptr = call i8** @llvm.swift.async.context.addr() + ret i8** %ptr +} + +define void @write_frame_context(i8* swiftasync %ctx, i8* %newctx) "frame-pointer"="non-leaf" { +; CHECK-LABEL: write_frame_context: +; CHECK: movq %rbp, [[TMP:%.*]] +; CHECK: subq $8, [[TMP]] +; CHECK: movq %rdi, ([[TMP]]) + + %ptr = call i8** @llvm.swift.async.context.addr() + store i8* %newctx, i8** %ptr + ret void +} + +define void @simple_fp_elim(i8* swiftasync %ctx) "frame-pointer"="non-leaf" { +; CHECK-LABEL: simple_fp_elim: +; CHECK-NOT: btsq + + ret void +} + +declare void @bar(i32*) +declare i8** @llvm.swift.async.context.addr()