diff --git a/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp b/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp index 0b7b406e2ba8e..541789fc9d339 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp @@ -40,7 +40,7 @@ void case_calls_dll_import() NO_TAIL { // CHECK: .seh_endprologue // CHECK: .Limpcall{{[0-9]+}}: // CHECK-NEXT: rex64 -// CHECK-NEXT: call __imp_some_dll_import +// CHECK-NEXT: call qword ptr [rip + __imp_some_dll_import] // CHECK-NEXT: nop dword ptr {{\[.*\]}} // CHECK-NEXT: nop // CHECK-NEXT: .seh_startepilogue diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 4a9b824b0db14..f8582cd5cc750 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -63,6 +63,8 @@ class X86ExpandPseudo : public MachineFunctionPass { MachineBasicBlock::iterator MBBI); void expandCALL_RVMARKER(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + void expandCALL_ImpCall(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandMBB(MachineBasicBlock &MBB); @@ -254,6 +256,20 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB, std::next(RtCall->getIterator())); } +void X86ExpandPseudo::expandCALL_ImpCall(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + // Expand CALL64_ImpCall pseudo to CALL64m. + MachineInstr &MI = *MBBI; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64m)) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, + MI.getOperand(0).getTargetFlags()) + .addReg(0); + MI.eraseFromParent(); +} + /// If \p MBBI is a pseudo instruction, this method expands /// it to the corresponding (sequence of) actual instruction(s). /// \returns true if \p MBBI has been expanded. @@ -886,6 +902,9 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, case X86::CALL64r_ImpCall: MI.setDesc(TII->get(X86::CALL64r)); return true; + case X86::CALL64_ImpCall: + expandCALL_ImpCall(MBB, MBBI); + return true; case X86::ADD32mi_ND: case X86::ADD64mi32_ND: case X86::SUB32mi_ND: diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index f007886115d35..5f8830469e893 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3317,11 +3317,6 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (Flag.isSwiftError() || Flag.isPreallocated()) return false; - // Can't handle import call optimization. - if (Is64Bit && - MF->getFunction().getParent()->getModuleFlag("import-call-optimization")) - return false; - SmallVector OutVTs; SmallVector ArgTys; SmallVector ArgRegs; @@ -3563,6 +3558,17 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r; + + const Module *M = FuncInfo.MF->getFunction().getParent(); + if (CalleeOp != X86::RAX && Is64Bit && + M->getModuleFlag("import-call-optimization")) { + // Import call optimization requires all indirect calls to be via RAX. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(TargetOpcode::COPY), X86::RAX) + .addReg(CalleeOp); + CalleeOp = X86::RAX; + } + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc)) .addReg(CalleeOp); } else { diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 0fd44b74fd449..cbcc958df2d52 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1313,9 +1313,6 @@ def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)), def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)), (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>; -def : Pat<(X86imp_call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; - // Tailcall stuff. The TCRETURN instructions execute after the epilog, so they // can never use callee-saved registers. That is the purpose of the GR64_TC // register classes. @@ -1350,25 +1347,25 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>, - Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>; + Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>; def : Pat<(X86tcret GR64_TCW64:$dst, timm:$off), (TCRETURN_WIN64ri GR64_TCW64:$dst, timm:$off)>, - Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>; + Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (TCRETURNri64_ImpCall ptr_rc_tailcall:$dst, timm:$off)>, - Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabled]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabledAndCFGuardDisabled]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off), (TCRETURNmi64 addr:$dst, timm:$off)>, - Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls]>; + Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>; def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off), (TCRETURN_WINmi64 addr:$dst, timm:$off)>, - Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls]>; + Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>, diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index e8527cd73abb5..6a0cbfb52dce4 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -331,11 +331,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationDisabled]>; + Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationDisabledOrCFGuardEnabled]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, - NotUseIndirectThunkCalls]>; + NotUseIndirectThunkCalls,ImportCallOptimizationDisabledOrCFGuardEnabled]>; // Non-tracking calls for IBT, use with caution. let isCodeGenOnly = 1 in { @@ -433,9 +433,13 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, PseudoI<(outs), (ins i64imm:$rvfunc, i64i32imm_brtarget:$dst), []>, Requires<[In64BitMode]>; + def CALL64_ImpCall : + PseudoI<(outs), (ins i64imm:$dst), [(X86imp_call tglobaladdr:$dst)]>, + Requires<[In64BitMode]>; + def CALL64r_ImpCall : PseudoI<(outs), (ins GR64_A:$dst), [(X86call GR64_A:$dst)]>, - Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabled]>; + Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabledAndCFGuardDisabled]>; } // Conditional tail calls are similar to the above, but they are branches diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index c20bb05018b4d..f99cd37376fa5 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -234,8 +234,10 @@ let RecomputePerFunction = 1 in { "shouldOptForSize(MF)">; def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || " "!Subtarget->hasSSE41()">; - def ImportCallOptimizationEnabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">; - def ImportCallOptimizationDisabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">; + def ImportCallOptimizationEnabledAndCFGuardDisabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\") &&" + "!MF->getFunction().getParent()->getModuleFlag(\"cfguard\")">; + def ImportCallOptimizationDisabledOrCFGuardEnabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\") ||" + "MF->getFunction().getParent()->getModuleFlag(\"cfguard\")">; def IsWin64CCFunc : Predicate<"Subtarget->isCallingConvWin64(MF->getFunction().getCallingConv())">; def IsNotWin64CCFunc : Predicate<"!Subtarget->isCallingConvWin64(MF->getFunction().getCallingConv())">; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 481a9be8374ab..21e3589a6d8dd 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2346,7 +2346,8 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { case X86::TAILJMPr64_REX: { if (EnableImportCallOptimization) { - assert(MI->getOperand(0).getReg() == X86::RAX && + assert((MI->getOperand(0).getReg() == X86::RAX || + MF->getFunction().getParent()->getModuleFlag("cfguard")) && "Indirect tail calls with impcall enabled must go through RAX (as " "enforced by TCRETURNImpCallri64)"); emitLabelAndRecordForImportCallOptimization( @@ -2547,28 +2548,18 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); - if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) { - emitLabelAndRecordForImportCallOptimization( - IMAGE_RETPOLINE_AMD64_IMPORT_CALL); - - MCInst TmpInst; - MCInstLowering.Lower(MI, TmpInst); - - // For Import Call Optimization to work, we need a the call instruction - // with a rex prefix, and a 5-byte nop after the call instruction. - EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); - emitCallInstruction(TmpInst); - emitNop(*OutStreamer, 5, Subtarget); - maybeEmitNopAfterCallForWindowsEH(MI); - return; - } + assert(!EnableImportCallOptimization || + !isImportedFunction(MI->getOperand(0)) && + "Calls to imported functions with import call optimization " + "should be lowered to CALL64m via CALL64_ImpCall"); break; case X86::CALL64r: if (EnableImportCallOptimization) { assert(MI->getOperand(0).getReg() == X86::RAX && - "Indirect calls with impcall enabled must go through RAX (as " + "Indirect calls with import call optimization enabled must go " + "through RAX (as " "enforced by CALL64r_ImpCall)"); emitLabelAndRecordForImportCallOptimization( @@ -2586,9 +2577,25 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { break; case X86::CALL64m: - if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) { - emitLabelAndRecordForImportCallOptimization( - IMAGE_RETPOLINE_AMD64_CFG_CALL); + if (EnableImportCallOptimization) { + if (isCallToCFGuardFunction(MI)) { + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_CFG_CALL); + } else if (isImportedFunction(MI->getOperand(3))) { + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_IMPORT_CALL); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + // For Import Call Optimization to work, we need a the call instruction + // with a rex prefix, and a 5-byte nop after the call instruction. + EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); + emitCallInstruction(TmpInst); + emitNop(*OutStreamer, 5, Subtarget); + maybeEmitNopAfterCallForWindowsEH(MI); + return; + } } break; diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll b/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll index 12be910d68ee9..6cda56ea98b0e 100644 --- a/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll +++ b/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll @@ -1,13 +1,48 @@ -; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s + +; FIXME: FastISel is emitting calls to the CFG dispatch function as indirect +; calls via registers. Normally this would work, but for Import Call it is the +; incorrect pattern. + +@global_func_ptr = external dso_local local_unnamed_addr global ptr, align 8 +declare dllimport void @a() local_unnamed_addr +declare dllimport void @b() local_unnamed_addr define dso_local void @normal_call(ptr noundef readonly %func_ptr) local_unnamed_addr section "nc_sect" { entry: + call void @a() + call void @a() call void %func_ptr() + %0 = load ptr, ptr @global_func_ptr, align 8 + call void %0() ret void } ; CHECK-LABEL: normal_call: -; CHECK: .Limpcall0: +; CHECK: movq %rcx, %rsi +; CHECK-NEXT: .Limpcall0: +; CHECK-NEXT: rex64 +; CHECK-NEXT: callq *__imp_a(%rip) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Limpcall1: +; CHECK-NEXT: rex64 +; CHECK-NEXT: callq *__imp_a(%rip) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: .Limpcall2: +; CHECK-NEXT: callq *__guard_dispatch_icall_fptr(%rip) +; CHECK-NEXT: movq global_func_ptr(%rip), %rax +; CHECK-NEXT: .Limpcall3: ; CHECK-NEXT: callq *__guard_dispatch_icall_fptr(%rip) +; CHECK-NEXT: nop + +define dso_local void @tail_call() local_unnamed_addr section "tc_sect" { +entry: + tail call void @b() + ret void +} +; CHECK-LABEL: tail_call: +; CHECK: .Limpcall4: +; CHECK-NEXT: jmp __imp_b define dso_local void @tail_call_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" { entry: @@ -15,19 +50,41 @@ entry: ret void } ; CHECK-LABEL: tail_call_fp: -; CHECK: .Limpcall1: +; CHECK: movq %rcx, %rax +; CHECK-NEXT: .Limpcall5: ; CHECK-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip) +define dso_local void @tail_call_global_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" { +entry: + %0 = load ptr, ptr @global_func_ptr, align 8 + tail call void %0() + ret void +} +; CHECK-LABEL: tail_call_global_fp: +; CHECK: movq global_func_ptr(%rip), %rax +; CHECK-NEXT: .Limpcall6: +; CHECK-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip) + ; CHECK-LABEL .section .retplne,"yi" ; CHECK-NEXT .asciz "RetpolineV1" -; CHECK-NEXT .long 16 -; CHECK-NEXT .secnum tc_sect -; CHECK-NEXT .long 10 -; CHECK-NEXT .secoffset .Limpcall1 -; CHECK-NEXT .long 16 +; CHECK-NEXT .long 40 ; CHECK-NEXT .secnum nc_sect -; CHECK-NEXT .long 9 +; CHECK-NEXT .long 3 ; CHECK-NEXT .secoffset .Limpcall0 +; CHECK-NEXT .long 3 +; CHECK-NEXT .secoffset .Limpcall1 +; CHECK-NEXT .long 9 +; CHECK-NEXT .secoffset .Limpcall2 +; CHECK-NEXT .long 9 +; CHECK-NEXT .secoffset .Limpcall3 +; CHECK-NEXT .long 32 +; CHECK-NEXT .secnum tc_sect +; CHECK-NEXT .long 2 +; CHECK-NEXT .secoffset .Limpcall4 +; CHECK-NEXT .long 4 +; CHECK-NEXT .secoffset .Limpcall5 +; CHECK-NEXT .long 4 +; CHECK-NEXT .secoffset .Limpcall6 !llvm.module.flags = !{!0, !1} !0 = !{i32 1, !"import-call-optimization", i32 1} diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll b/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll index fe22b251685e6..c2389a10415d1 100644 --- a/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll +++ b/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s +; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s ; CHECK-LABEL: uses_rax: ; CHECK: .Limpcall0: diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization.ll b/llvm/test/CodeGen/X86/win-import-call-optimization.ll index cc7e1a9f81e34..eba0dab7e1559 100644 --- a/llvm/test/CodeGen/X86/win-import-call-optimization.ll +++ b/llvm/test/CodeGen/X86/win-import-call-optimization.ll @@ -1,27 +1,36 @@ -; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc --global-isel --global-isel-abort=2 -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s +; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s +; RUN: llc --global-isel --global-isel-abort=2 -mtriple=x86_64-pc-windows-msvc -o - %s | \ +; RUN: FileCheck %s + +@global_func_ptr = external dso_local local_unnamed_addr global ptr, align 8 define dso_local void @normal_call(ptr noundef readonly %func_ptr) local_unnamed_addr section "nc_sect" { entry: call void @a() call void @a() call void %func_ptr() + %0 = load ptr, ptr @global_func_ptr, align 8 + call void %0() ret void } ; CHECK-LABEL: normal_call: ; CHECK: .Limpcall0: ; CHECK-NEXT: rex64 -; CHECK-NEXT: callq __imp_a +; CHECK-NEXT: callq *__imp_a(%rip) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Limpcall1: ; CHECK-NEXT: rex64 -; CHECK-NEXT: callq __imp_a +; CHECK-NEXT: callq *__imp_a(%rip) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: .Limpcall2: ; CHECK-NEXT: callq *%rax ; CHECK-NEXT: nopl (%rax) +; CHECK-NEXT: movq global_func_ptr(%rip), %rax +; CHECK-NEXT: .Limpcall3: +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: nopl (%rax) ; CHECK-NEXT: nop define dso_local void @tail_call() local_unnamed_addr section "tc_sect" { @@ -30,7 +39,7 @@ entry: ret void } ; CHECK-LABEL: tail_call: -; CHECK: .Limpcall3: +; CHECK: .Limpcall4: ; CHECK-NEXT: jmp __imp_b define dso_local void @tail_call_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" { @@ -40,7 +49,18 @@ entry: } ; CHECK-LABEL: tail_call_fp: ; CHECK: movq %rcx, %rax -; CHECK-NEXT: .Limpcall4: +; CHECK-NEXT: .Limpcall5: +; CHECK-NEXT: rex64 jmpq *%rax + +define dso_local void @tail_call_global_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" { +entry: + %0 = load ptr, ptr @global_func_ptr, align 8 + tail call void %0() + ret void +} +; CHECK-LABEL: tail_call_global_fp: +; CHECK: movq global_func_ptr(%rip), %rax +; CHECK-NEXT: .Limpcall6: ; CHECK-NEXT: rex64 jmpq *%rax declare dllimport void @a() local_unnamed_addr @@ -48,13 +68,7 @@ declare dllimport void @b() local_unnamed_addr ; CHECK-LABEL .section .retplne,"yi" ; CHECK-NEXT .asciz "RetpolineV1" -; CHECK-NEXT .long 24 -; CHECK-NEXT .secnum tc_sect -; CHECK-NEXT .long 3 -; CHECK-NEXT .secoffset .Limpcall3 -; CHECK-NEXT .long 5 -; CHECK-NEXT .secoffset .Limpcall4 -; CHECK-NEXT .long 32 +; CHECK-NEXT .long 40 ; CHECK-NEXT .secnum nc_sect ; CHECK-NEXT .long 3 ; CHECK-NEXT .secoffset .Limpcall0 @@ -62,6 +76,16 @@ declare dllimport void @b() local_unnamed_addr ; CHECK-NEXT .secoffset .Limpcall1 ; CHECK-NEXT .long 5 ; CHECK-NEXT .secoffset .Limpcall2 +; CHECK-NEXT .long 5 +; CHECK-NEXT .secoffset .Limpcall3 +; CHECK-NEXT .long 32 +; CHECK-NEXT .secnum tc_sect +; CHECK-NEXT .long 2 +; CHECK-NEXT .secoffset .Limpcall4 +; CHECK-NEXT .long 4 +; CHECK-NEXT .secoffset .Limpcall5 +; CHECK-NEXT .long 4 +; CHECK-NEXT .secoffset .Limpcall6 !llvm.module.flags = !{!0} !0 = !{i32 1, !"import-call-optimization", i32 1}