Skip to content

Commit

Permalink
[X86] Resolve FIXME: Create cld only when needed (#82415)
Browse files Browse the repository at this point in the history
Only use cld when we also have rep instructions, are calling a function, or contain inline asm.
  • Loading branch information
AtariDreams committed Feb 28, 2024
1 parent 6287b7b commit 0a54b36
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 25 deletions.
67 changes: 63 additions & 4 deletions llvm/lib/Target/X86/X86FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,34 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
return !isWin64Prologue(MF) && MF.needsFrameMoves();
}

/// Return true if an opcode is part of the REP group of instructions
static bool isOpcodeRep(unsigned Opcode) {
switch (Opcode) {
case X86::REPNE_PREFIX:
case X86::REP_MOVSB_32:
case X86::REP_MOVSB_64:
case X86::REP_MOVSD_32:
case X86::REP_MOVSD_64:
case X86::REP_MOVSQ_32:
case X86::REP_MOVSQ_64:
case X86::REP_MOVSW_32:
case X86::REP_MOVSW_64:
case X86::REP_PREFIX:
case X86::REP_STOSB_32:
case X86::REP_STOSB_64:
case X86::REP_STOSD_32:
case X86::REP_STOSD_64:
case X86::REP_STOSQ_32:
case X86::REP_STOSQ_64:
case X86::REP_STOSW_32:
case X86::REP_STOSW_64:
return true;
default:
break;
}
return false;
}

/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
Expand Down Expand Up @@ -2194,13 +2222,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
// in each prologue of interrupt handler function.
//
// FIXME: Create "cld" instruction only in these cases:
// Create "cld" instruction only in these cases:
// 1. The interrupt handling function uses any of the "rep" instructions.
// 2. Interrupt handling function calls another function.
// 3. If there are any inline asm blocks, as we do not know what they do
//
if (Fn.getCallingConv() == CallingConv::X86_INTR)
BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
.setMIFlag(MachineInstr::FrameSetup);
// TODO: We should also emit cld if we detect the use of std, but as of now,
// the compiler does not even emit that instruction or even define it, so in
// practice, this would only happen with inline asm, which we cover anyway.
if (Fn.getCallingConv() == CallingConv::X86_INTR) {
bool NeedsCLD = false;

for (const MachineBasicBlock &B : MF) {
for (const MachineInstr &MI : B) {
if (MI.isCall()) {
NeedsCLD = true;
break;
}

if (isOpcodeRep(MI.getOpcode())) {
NeedsCLD = true;
break;
}

if (MI.isInlineAsm()) {
// TODO: Parse asm for rep instructions or call sites?
// For now, let's play it safe and emit a cld instruction
// just in case.
NeedsCLD = true;
break;
}
}
}

if (NeedsCLD) {
BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
.setMIFlag(MachineInstr::FrameSetup);
}
}

// At this point we know if the function has WinCFI or not.
MF.setHasWinCFI(HasWinCFI);
Expand Down
10 changes: 0 additions & 10 deletions llvm/test/CodeGen/X86/x86-32-intrcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: fldt f80
; CHECK-NEXT: fld1
; CHECK-NEXT: faddp %st, %st(1)
Expand All @@ -163,7 +162,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
; CHECK0-NEXT: pushl %ebp
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: fldt f80
; CHECK0-NEXT: fld1
; CHECK0-NEXT: faddp %st, %st(1)
Expand All @@ -188,7 +186,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: leal 20(%ebp), %eax
; CHECK-NEXT: leal 4(%ebp), %ecx
; CHECK-NEXT: movl %ecx, sink_address
Expand All @@ -206,7 +203,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: leal 4(%ebp), %ecx
; CHECK0-NEXT: movl %ecx, %eax
; CHECK0-NEXT: addl $16, %eax
Expand Down Expand Up @@ -234,7 +230,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: movl 4(%ebp), %eax
; CHECK-NEXT: leal 24(%ebp), %ecx
; CHECK-NEXT: leal 8(%ebp), %edx
Expand All @@ -257,7 +252,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 8(%ebp), %edx
; CHECK0-NEXT: movl %edx, %ecx
Expand Down Expand Up @@ -288,7 +282,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: leal 4(%ebp), %eax
; CHECK-NEXT: movl %eax, sink_address
; CHECK-NEXT: leal -4(%ebp), %esp
Expand All @@ -303,7 +296,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 4(%ebp), %eax
; CHECK0-NEXT: movl %eax, sink_address
Expand Down Expand Up @@ -358,7 +350,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-32, %esp
; CHECK-NEXT: subl $32, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: movl 4(%ebp), %eax
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: leal -4(%ebp), %esp
Expand All @@ -374,7 +365,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-32, %esp
; CHECK0-NEXT: subl $32, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: movl %eax, (%esp)
; CHECK0-NEXT: leal -4(%ebp), %esp
Expand Down
8 changes: 0 additions & 8 deletions llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,24 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_empty(ptr nocapture byval(%s
; CHECK-USER-LABEL: test_uintr_isr_cc_empty:
; CHECK-USER: # %bb.0: # %entry
; CHECK-USER-NEXT: pushq %rax
; CHECK-USER-NEXT: cld
; CHECK-USER-NEXT: addq $16, %rsp
; CHECK-USER-NEXT: uiret
;
; CHECK0-USER-LABEL: test_uintr_isr_cc_empty:
; CHECK0-USER: # %bb.0: # %entry
; CHECK0-USER-NEXT: pushq %rax
; CHECK0-USER-NEXT: cld
; CHECK0-USER-NEXT: addq $16, %rsp
; CHECK0-USER-NEXT: uiret
;
; CHECK-KERNEL-LABEL: test_uintr_isr_cc_empty:
; CHECK-KERNEL: # %bb.0: # %entry
; CHECK-KERNEL-NEXT: pushq %rax
; CHECK-KERNEL-NEXT: cld
; CHECK-KERNEL-NEXT: addq $16, %rsp
; CHECK-KERNEL-NEXT: iretq
;
; CHECK0-KERNEL-LABEL: test_uintr_isr_cc_empty:
; CHECK0-KERNEL: # %bb.0: # %entry
; CHECK0-KERNEL-NEXT: pushq %rax
; CHECK0-KERNEL-NEXT: cld
; CHECK0-KERNEL-NEXT: addq $16, %rsp
; CHECK0-KERNEL-NEXT: iretq
entry:
Expand Down Expand Up @@ -75,7 +71,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK-USER-NEXT: pushq %rax
; CHECK-USER-NEXT: pushq %rdx
; CHECK-USER-NEXT: pushq %rcx
; CHECK-USER-NEXT: cld
; CHECK-USER-NEXT: movq 32(%rsp), %rax
; CHECK-USER-NEXT: movq 40(%rsp), %rcx
; CHECK-USER-NEXT: movq 48(%rsp), %rdx
Expand All @@ -96,7 +91,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK0-USER-NEXT: pushq %rax
; CHECK0-USER-NEXT: pushq %rdx
; CHECK0-USER-NEXT: pushq %rcx
; CHECK0-USER-NEXT: cld
; CHECK0-USER-NEXT: movq 32(%rsp), %rax
; CHECK0-USER-NEXT: leaq 40(%rsp), %rcx
; CHECK0-USER-NEXT: movq (%rcx), %rdx
Expand All @@ -118,7 +112,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK-KERNEL-NEXT: pushq %rax
; CHECK-KERNEL-NEXT: pushq %rdx
; CHECK-KERNEL-NEXT: pushq %rcx
; CHECK-KERNEL-NEXT: cld
; CHECK-KERNEL-NEXT: movq 32(%rsp), %rax
; CHECK-KERNEL-NEXT: movq 40(%rsp), %rcx
; CHECK-KERNEL-NEXT: movq 48(%rsp), %rdx
Expand All @@ -139,7 +132,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK0-KERNEL-NEXT: pushq %rax
; CHECK0-KERNEL-NEXT: pushq %rdx
; CHECK0-KERNEL-NEXT: pushq %rcx
; CHECK0-KERNEL-NEXT: cld
; CHECK0-KERNEL-NEXT: movq 32(%rsp), %rax
; CHECK0-KERNEL-NEXT: leaq 40(%rsp), %rcx
; CHECK0-KERNEL-NEXT: movq (%rcx), %rdx
Expand Down
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/X86/x86-64-intrcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
; CHECK: movq %[[R1]], sink_address
Expand All @@ -136,7 +135,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
Expand Down Expand Up @@ -164,7 +162,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
; CHECK: movq %[[R1]], sink_address(%rip)
entry:
Expand Down

0 comments on commit 0a54b36

Please sign in to comment.