diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index be416fb0db069..d914e1b61ab07 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1418,6 +1418,34 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { return !isWin64Prologue(MF) && MF.needsFrameMoves(); } +/// Return true if an opcode is part of the REP group of instructions +static bool isOpcodeRep(unsigned Opcode) { + switch (Opcode) { + case X86::REPNE_PREFIX: + case X86::REP_MOVSB_32: + case X86::REP_MOVSB_64: + case X86::REP_MOVSD_32: + case X86::REP_MOVSD_64: + case X86::REP_MOVSQ_32: + case X86::REP_MOVSQ_64: + case X86::REP_MOVSW_32: + case X86::REP_MOVSW_64: + case X86::REP_PREFIX: + case X86::REP_STOSB_32: + case X86::REP_STOSB_64: + case X86::REP_STOSD_32: + case X86::REP_STOSD_64: + case X86::REP_STOSQ_32: + case X86::REP_STOSQ_64: + case X86::REP_STOSW_32: + case X86::REP_STOSW_64: + return true; + default: + break; + } + return false; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -2194,13 +2222,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction // in each prologue of interrupt handler function. // - // FIXME: Create "cld" instruction only in these cases: + // Create "cld" instruction only in these cases: // 1. The interrupt handling function uses any of the "rep" instructions. // 2. Interrupt handling function calls another function. + // 3. If there are any inline asm blocks, as we do not know what they do // - if (Fn.getCallingConv() == CallingConv::X86_INTR) - BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) - .setMIFlag(MachineInstr::FrameSetup); + // TODO: We should also emit cld if we detect the use of std, but as of now, + // the compiler does not even emit that instruction or even define it, so in + // practice, this would only happen with inline asm, which we cover anyway. + if (Fn.getCallingConv() == CallingConv::X86_INTR) { + bool NeedsCLD = false; + + for (const MachineBasicBlock &B : MF) { + for (const MachineInstr &MI : B) { + if (MI.isCall()) { + NeedsCLD = true; + break; + } + + if (isOpcodeRep(MI.getOpcode())) { + NeedsCLD = true; + break; + } + + if (MI.isInlineAsm()) { + // TODO: Parse asm for rep instructions or call sites? + // For now, let's play it safe and emit a cld instruction + // just in case. + NeedsCLD = true; + break; + } + } + } + + if (NeedsCLD) { + BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) + .setMIFlag(MachineInstr::FrameSetup); + } + } // At this point we know if the function has WinCFI or not. MF.setHasWinCFI(HasWinCFI); diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll index 2e482753e2685..3c3944c2082bd 100644 --- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll @@ -149,7 +149,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame) ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: andl $-16, %esp -; CHECK-NEXT: cld ; CHECK-NEXT: fldt f80 ; CHECK-NEXT: fld1 ; CHECK-NEXT: faddp %st, %st(1) @@ -163,7 +162,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame) ; CHECK0-NEXT: pushl %ebp ; CHECK0-NEXT: movl %esp, %ebp ; CHECK0-NEXT: andl $-16, %esp -; CHECK0-NEXT: cld ; CHECK0-NEXT: fldt f80 ; CHECK0-NEXT: fld1 ; CHECK0-NEXT: faddp %st, %st(1) @@ -188,7 +186,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) % ; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: andl $-16, %esp -; CHECK-NEXT: cld ; CHECK-NEXT: leal 20(%ebp), %eax ; CHECK-NEXT: leal 4(%ebp), %ecx ; CHECK-NEXT: movl %ecx, sink_address @@ -206,7 +203,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) % ; CHECK0-NEXT: pushl %ecx ; CHECK0-NEXT: pushl %eax ; CHECK0-NEXT: andl $-16, %esp -; CHECK0-NEXT: cld ; CHECK0-NEXT: leal 4(%ebp), %ecx ; CHECK0-NEXT: movl %ecx, %eax ; CHECK0-NEXT: addl $16, %eax @@ -234,7 +230,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) % ; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: andl $-16, %esp -; CHECK-NEXT: cld ; CHECK-NEXT: movl 4(%ebp), %eax ; CHECK-NEXT: leal 24(%ebp), %ecx ; CHECK-NEXT: leal 8(%ebp), %edx @@ -257,7 +252,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) % ; CHECK0-NEXT: pushl %ecx ; CHECK0-NEXT: pushl %eax ; CHECK0-NEXT: andl $-16, %esp -; CHECK0-NEXT: cld ; CHECK0-NEXT: movl 4(%ebp), %eax ; CHECK0-NEXT: leal 8(%ebp), %edx ; CHECK0-NEXT: movl %edx, %ecx @@ -288,7 +282,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: andl $-16, %esp -; CHECK-NEXT: cld ; CHECK-NEXT: leal 4(%ebp), %eax ; CHECK-NEXT: movl %eax, sink_address ; CHECK-NEXT: leal -4(%ebp), %esp @@ -303,7 +296,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram ; CHECK0-NEXT: movl %esp, %ebp ; CHECK0-NEXT: pushl %eax ; CHECK0-NEXT: andl $-16, %esp -; CHECK0-NEXT: cld ; CHECK0-NEXT: movl 4(%ebp), %eax ; CHECK0-NEXT: leal 4(%ebp), %eax ; CHECK0-NEXT: movl %eax, sink_address @@ -358,7 +350,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: andl $-32, %esp ; CHECK-NEXT: subl $32, %esp -; CHECK-NEXT: cld ; CHECK-NEXT: movl 4(%ebp), %eax ; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: leal -4(%ebp), %esp @@ -374,7 +365,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra ; CHECK0-NEXT: pushl %eax ; CHECK0-NEXT: andl $-32, %esp ; CHECK0-NEXT: subl $32, %esp -; CHECK0-NEXT: cld ; CHECK0-NEXT: movl 4(%ebp), %eax ; CHECK0-NEXT: movl %eax, (%esp) ; CHECK0-NEXT: leal -4(%ebp), %esp diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll b/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll index a46b9d9ba5a10..1fe395b84d46c 100644 --- a/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll +++ b/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll @@ -21,28 +21,24 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_empty(ptr nocapture byval(%s ; CHECK-USER-LABEL: test_uintr_isr_cc_empty: ; CHECK-USER: # %bb.0: # %entry ; CHECK-USER-NEXT: pushq %rax -; CHECK-USER-NEXT: cld ; CHECK-USER-NEXT: addq $16, %rsp ; CHECK-USER-NEXT: uiret ; ; CHECK0-USER-LABEL: test_uintr_isr_cc_empty: ; CHECK0-USER: # %bb.0: # %entry ; CHECK0-USER-NEXT: pushq %rax -; CHECK0-USER-NEXT: cld ; CHECK0-USER-NEXT: addq $16, %rsp ; CHECK0-USER-NEXT: uiret ; ; CHECK-KERNEL-LABEL: test_uintr_isr_cc_empty: ; CHECK-KERNEL: # %bb.0: # %entry ; CHECK-KERNEL-NEXT: pushq %rax -; CHECK-KERNEL-NEXT: cld ; CHECK-KERNEL-NEXT: addq $16, %rsp ; CHECK-KERNEL-NEXT: iretq ; ; CHECK0-KERNEL-LABEL: test_uintr_isr_cc_empty: ; CHECK0-KERNEL: # %bb.0: # %entry ; CHECK0-KERNEL-NEXT: pushq %rax -; CHECK0-KERNEL-NEXT: cld ; CHECK0-KERNEL-NEXT: addq $16, %rsp ; CHECK0-KERNEL-NEXT: iretq entry: @@ -75,7 +71,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly ; CHECK-USER-NEXT: pushq %rax ; CHECK-USER-NEXT: pushq %rdx ; CHECK-USER-NEXT: pushq %rcx -; CHECK-USER-NEXT: cld ; CHECK-USER-NEXT: movq 32(%rsp), %rax ; CHECK-USER-NEXT: movq 40(%rsp), %rcx ; CHECK-USER-NEXT: movq 48(%rsp), %rdx @@ -96,7 +91,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly ; CHECK0-USER-NEXT: pushq %rax ; CHECK0-USER-NEXT: pushq %rdx ; CHECK0-USER-NEXT: pushq %rcx -; CHECK0-USER-NEXT: cld ; CHECK0-USER-NEXT: movq 32(%rsp), %rax ; CHECK0-USER-NEXT: leaq 40(%rsp), %rcx ; CHECK0-USER-NEXT: movq (%rcx), %rdx @@ -118,7 +112,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly ; CHECK-KERNEL-NEXT: pushq %rax ; CHECK-KERNEL-NEXT: pushq %rdx ; CHECK-KERNEL-NEXT: pushq %rcx -; CHECK-KERNEL-NEXT: cld ; CHECK-KERNEL-NEXT: movq 32(%rsp), %rax ; CHECK-KERNEL-NEXT: movq 40(%rsp), %rcx ; CHECK-KERNEL-NEXT: movq 48(%rsp), %rdx @@ -139,7 +132,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly ; CHECK0-KERNEL-NEXT: pushq %rax ; CHECK0-KERNEL-NEXT: pushq %rdx ; CHECK0-KERNEL-NEXT: pushq %rcx -; CHECK0-KERNEL-NEXT: cld ; CHECK0-KERNEL-NEXT: movq 32(%rsp), %rax ; CHECK0-KERNEL-NEXT: leaq 40(%rsp), %rcx ; CHECK0-KERNEL-NEXT: movq (%rcx), %rdx diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc.ll b/llvm/test/CodeGen/X86/x86-64-intrcc.ll index 443d4c2fa4649..5fc606eb566ea 100644 --- a/llvm/test/CodeGen/X86/x86-64-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-64-intrcc.ll @@ -114,7 +114,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp - ; CHECK: cld ; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]] ; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]] ; CHECK: movq %[[R1]], sink_address @@ -136,7 +135,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) % ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp - ; CHECK: cld ; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]] ; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]] ; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]] @@ -164,7 +162,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp - ; CHECK: cld ; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]] ; CHECK: movq %[[R1]], sink_address(%rip) entry: