Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void case_calls_dll_import() NO_TAIL {
// CHECK: .seh_endprologue
// CHECK: .Limpcall{{[0-9]+}}:
// CHECK-NEXT: rex64
// CHECK-NEXT: call __imp_some_dll_import
// CHECK-NEXT: call qword ptr [rip + __imp_some_dll_import]
// CHECK-NEXT: nop dword ptr {{\[.*\]}}
// CHECK-NEXT: nop
// CHECK-NEXT: .seh_startepilogue
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ class X86ExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI);
void expandCALL_RVMARKER(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
void expandCALL_ImpCall(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandMBB(MachineBasicBlock &MBB);

Expand Down Expand Up @@ -254,6 +256,20 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
std::next(RtCall->getIterator()));
}

void X86ExpandPseudo::expandCALL_ImpCall(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
// Expand CALL64_ImpCall pseudo to CALL64m.
MachineInstr &MI = *MBBI;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64m))
.addReg(X86::RIP)
.addImm(1)
.addReg(0)
.addGlobalAddress(MI.getOperand(0).getGlobal(), 0,
MI.getOperand(0).getTargetFlags())
.addReg(0);
MI.eraseFromParent();
}

/// If \p MBBI is a pseudo instruction, this method expands
/// it to the corresponding (sequence of) actual instruction(s).
/// \returns true if \p MBBI has been expanded.
Expand Down Expand Up @@ -886,6 +902,9 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::CALL64r_ImpCall:
MI.setDesc(TII->get(X86::CALL64r));
return true;
case X86::CALL64_ImpCall:
expandCALL_ImpCall(MBB, MBBI);
return true;
case X86::ADD32mi_ND:
case X86::ADD64mi32_ND:
case X86::SUB32mi_ND:
Expand Down
16 changes: 11 additions & 5 deletions llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3317,11 +3317,6 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (Flag.isSwiftError() || Flag.isPreallocated())
return false;

// Can't handle import call optimization.
if (Is64Bit &&
MF->getFunction().getParent()->getModuleFlag("import-call-optimization"))
return false;

SmallVector<MVT, 16> OutVTs;
SmallVector<Type *, 16> ArgTys;
SmallVector<Register, 16> ArgRegs;
Expand Down Expand Up @@ -3563,6 +3558,17 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (CalleeOp) {
// Register-indirect call.
unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;

const Module *M = FuncInfo.MF->getFunction().getParent();
if (CalleeOp != X86::RAX && Is64Bit &&
M->getModuleFlag("import-call-optimization")) {
// Import call optimization requires all indirect calls to be via RAX.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), X86::RAX)
.addReg(CalleeOp);
CalleeOp = X86::RAX;
}

MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc))
.addReg(CalleeOp);
} else {
Expand Down
13 changes: 5 additions & 8 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -1313,9 +1313,6 @@ def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)),
def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
(CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>;

def : Pat<(X86imp_call (i64 tglobaladdr:$dst)),
(CALL64pcrel32 tglobaladdr:$dst)>;

// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
// can never use callee-saved registers. That is the purpose of the GR64_TC
// register classes.
Expand Down Expand Up @@ -1350,25 +1347,25 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off),

def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>;
Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;

def : Pat<(X86tcret GR64_TCW64:$dst, timm:$off),
(TCRETURN_WIN64ri GR64_TCW64:$dst, timm:$off)>,
Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>;
Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;

def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri64_ImpCall ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabled]>;
Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabledAndCFGuardDisabled]>;

// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
(TCRETURNmi64 addr:$dst, timm:$off)>,
Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls]>;
Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;

def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
(TCRETURN_WINmi64 addr:$dst, timm:$off)>,
Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls]>;
Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;

def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>,
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/X86/X86InstrControl.td
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationDisabled]>;
Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationDisabledOrCFGuardEnabled]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
NotUseIndirectThunkCalls]>;
NotUseIndirectThunkCalls,ImportCallOptimizationDisabledOrCFGuardEnabled]>;

// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
Expand Down Expand Up @@ -433,9 +433,13 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
PseudoI<(outs), (ins i64imm:$rvfunc, i64i32imm_brtarget:$dst), []>,
Requires<[In64BitMode]>;

def CALL64_ImpCall :
PseudoI<(outs), (ins i64imm:$dst), [(X86imp_call tglobaladdr:$dst)]>,
Requires<[In64BitMode]>;

def CALL64r_ImpCall :
PseudoI<(outs), (ins GR64_A:$dst), [(X86call GR64_A:$dst)]>,
Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabled]>;
Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabledAndCFGuardDisabled]>;
}

// Conditional tail calls are similar to the above, but they are branches
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/X86/X86InstrPredicates.td
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,10 @@ let RecomputePerFunction = 1 in {
"shouldOptForSize(MF)">;
def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || "
"!Subtarget->hasSSE41()">;
def ImportCallOptimizationEnabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">;
def ImportCallOptimizationDisabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">;
def ImportCallOptimizationEnabledAndCFGuardDisabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\") &&"
"!MF->getFunction().getParent()->getModuleFlag(\"cfguard\")">;
def ImportCallOptimizationDisabledOrCFGuardEnabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\") ||"
"MF->getFunction().getParent()->getModuleFlag(\"cfguard\")">;

def IsWin64CCFunc : Predicate<"Subtarget->isCallingConvWin64(MF->getFunction().getCallingConv())">;
def IsNotWin64CCFunc : Predicate<"!Subtarget->isCallingConvWin64(MF->getFunction().getCallingConv())">;
Expand Down
47 changes: 27 additions & 20 deletions llvm/lib/Target/X86/X86MCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2346,7 +2346,8 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {

case X86::TAILJMPr64_REX: {
if (EnableImportCallOptimization) {
assert(MI->getOperand(0).getReg() == X86::RAX &&
assert((MI->getOperand(0).getReg() == X86::RAX ||
MF->getFunction().getParent()->getModuleFlag("cfguard")) &&
"Indirect tail calls with impcall enabled must go through RAX (as "
"enforced by TCRETURNImpCallri64)");
emitLabelAndRecordForImportCallOptimization(
Expand Down Expand Up @@ -2547,28 +2548,18 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));

if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) {
emitLabelAndRecordForImportCallOptimization(
IMAGE_RETPOLINE_AMD64_IMPORT_CALL);

MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);

// For Import Call Optimization to work, we need a the call instruction
// with a rex prefix, and a 5-byte nop after the call instruction.
EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
emitCallInstruction(TmpInst);
emitNop(*OutStreamer, 5, Subtarget);
maybeEmitNopAfterCallForWindowsEH(MI);
return;
}
assert(!EnableImportCallOptimization ||
!isImportedFunction(MI->getOperand(0)) &&
"Calls to imported functions with import call optimization "
"should be lowered to CALL64m via CALL64_ImpCall");

break;

case X86::CALL64r:
if (EnableImportCallOptimization) {
assert(MI->getOperand(0).getReg() == X86::RAX &&
"Indirect calls with impcall enabled must go through RAX (as "
"Indirect calls with import call optimization enabled must go "
"through RAX (as "
"enforced by CALL64r_ImpCall)");

emitLabelAndRecordForImportCallOptimization(
Expand All @@ -2586,9 +2577,25 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
break;

case X86::CALL64m:
if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
emitLabelAndRecordForImportCallOptimization(
IMAGE_RETPOLINE_AMD64_CFG_CALL);
if (EnableImportCallOptimization) {
if (isCallToCFGuardFunction(MI)) {
emitLabelAndRecordForImportCallOptimization(
IMAGE_RETPOLINE_AMD64_CFG_CALL);
} else if (isImportedFunction(MI->getOperand(3))) {
Copy link
Collaborator

@efriedma-quic efriedma-quic Sep 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is referring to a CALL64m where the symbol is an __imp_ symbol?

What happens if there's an indirect call to a function which isn't an imported function? Or is that impossible because we don't have CALL64m_ImpCall?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Despite the name, Import Call Optimization also wants to know all the places where there are CF Guard calls whether they are for imported functions or not.

emitLabelAndRecordForImportCallOptimization(
IMAGE_RETPOLINE_AMD64_IMPORT_CALL);

MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);

// For Import Call Optimization to work, we need a the call instruction
// with a rex prefix, and a 5-byte nop after the call instruction.
EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
emitCallInstruction(TmpInst);
emitNop(*OutStreamer, 5, Subtarget);
maybeEmitNopAfterCallForWindowsEH(MI);
return;
}
}
break;

Expand Down
75 changes: 66 additions & 9 deletions llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll
Original file line number Diff line number Diff line change
@@ -1,33 +1,90 @@
; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK
; RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s

; FIXME: FastISel is emitting calls to the CFG dispatch function as indirect
; calls via registers. Normally this would work, but for Import Call it is the
; incorrect pattern.

@global_func_ptr = external dso_local local_unnamed_addr global ptr, align 8
declare dllimport void @a() local_unnamed_addr
declare dllimport void @b() local_unnamed_addr

define dso_local void @normal_call(ptr noundef readonly %func_ptr) local_unnamed_addr section "nc_sect" {
entry:
call void @a()
call void @a()
call void %func_ptr()
%0 = load ptr, ptr @global_func_ptr, align 8
call void %0()
ret void
}
; CHECK-LABEL: normal_call:
; CHECK: .Limpcall0:
; CHECK: movq %rcx, %rsi
; CHECK-NEXT: .Limpcall0:
; CHECK-NEXT: rex64
; CHECK-NEXT: callq *__imp_a(%rip)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Limpcall1:
; CHECK-NEXT: rex64
; CHECK-NEXT: callq *__imp_a(%rip)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: .Limpcall2:
; CHECK-NEXT: callq *__guard_dispatch_icall_fptr(%rip)
; CHECK-NEXT: movq global_func_ptr(%rip), %rax
; CHECK-NEXT: .Limpcall3:
; CHECK-NEXT: callq *__guard_dispatch_icall_fptr(%rip)
; CHECK-NEXT: nop

define dso_local void @tail_call() local_unnamed_addr section "tc_sect" {
entry:
tail call void @b()
ret void
}
; CHECK-LABEL: tail_call:
; CHECK: .Limpcall4:
; CHECK-NEXT: jmp __imp_b

define dso_local void @tail_call_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" {
entry:
tail call void %func_ptr()
ret void
}
; CHECK-LABEL: tail_call_fp:
; CHECK: .Limpcall1:
; CHECK: movq %rcx, %rax
; CHECK-NEXT: .Limpcall5:
; CHECK-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip)

define dso_local void @tail_call_global_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" {
entry:
%0 = load ptr, ptr @global_func_ptr, align 8
tail call void %0()
ret void
}
; CHECK-LABEL: tail_call_global_fp:
; CHECK: movq global_func_ptr(%rip), %rax
; CHECK-NEXT: .Limpcall6:
; CHECK-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip)

; CHECK-LABEL .section .retplne,"yi"
; CHECK-NEXT .asciz "RetpolineV1"
; CHECK-NEXT .long 16
; CHECK-NEXT .secnum tc_sect
; CHECK-NEXT .long 10
; CHECK-NEXT .secoffset .Limpcall1
; CHECK-NEXT .long 16
; CHECK-NEXT .long 40
; CHECK-NEXT .secnum nc_sect
; CHECK-NEXT .long 9
; CHECK-NEXT .long 3
; CHECK-NEXT .secoffset .Limpcall0
; CHECK-NEXT .long 3
; CHECK-NEXT .secoffset .Limpcall1
; CHECK-NEXT .long 9
; CHECK-NEXT .secoffset .Limpcall2
; CHECK-NEXT .long 9
; CHECK-NEXT .secoffset .Limpcall3
; CHECK-NEXT .long 32
; CHECK-NEXT .secnum tc_sect
; CHECK-NEXT .long 2
; CHECK-NEXT .secoffset .Limpcall4
; CHECK-NEXT .long 4
; CHECK-NEXT .secoffset .Limpcall5
; CHECK-NEXT .long 4
; CHECK-NEXT .secoffset .Limpcall6

!llvm.module.flags = !{!0, !1}
!0 = !{i32 1, !"import-call-optimization", i32 1}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s
; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s

; CHECK-LABEL: uses_rax:
; CHECK: .Limpcall0:
Expand Down
Loading