Skip to content

Commit

Permalink
[X86] Mark EMMS and FEMMS as clobbering MM0-7 and ST0-7.
Browse files Browse the repository at this point in the history
This fixes the test case in PR35982 by preventing MMX instructions that read MM0-7 from being moved below EMMS/FEMMS by the post RA scheduler.

Though as discussed in bugzilla, this is not a complete fix. There is still the possibility of reordering in IR or by the pre-RA scheduler.

Differential Revision: https://reviews.llvm.org/D57298

llvm-svn: 352660
  • Loading branch information
topperc committed Jan 30, 2019
1 parent e171ade commit 22b3de5
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 88 deletions.
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86Instr3DNow.td
Expand Up @@ -73,7 +73,9 @@ defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;

let SchedRW = [WriteEMMS] in
let SchedRW = [WriteEMMS],
Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)]>, TB;

Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86InstrMMX.td
Expand Up @@ -152,7 +152,9 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
// MMX EMMS Instruction
//===----------------------------------------------------------------------===//

let SchedRW = [WriteEMMS] in
let SchedRW = [WriteEMMS],
Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;

//===----------------------------------------------------------------------===//
Expand Down
128 changes: 42 additions & 86 deletions llvm/test/CodeGen/X86/pr35982.ll
Expand Up @@ -3,49 +3,27 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=true | FileCheck %s --check-prefixes=CHECK,POST

define float @PR35982_emms(<1 x i64>) nounwind {
; NOPOST-LABEL: PR35982_emms:
; NOPOST: # %bb.0:
; NOPOST-NEXT: pushl %ebp
; NOPOST-NEXT: movl %esp, %ebp
; NOPOST-NEXT: andl $-8, %esp
; NOPOST-NEXT: subl $16, %esp
; NOPOST-NEXT: movl 8(%ebp), %eax
; NOPOST-NEXT: movl 12(%ebp), %ecx
; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; NOPOST-NEXT: movl %eax, {{[0-9]+}}(%esp)
; NOPOST-NEXT: movq {{[0-9]+}}(%esp), %mm0
; NOPOST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; NOPOST-NEXT: movd %mm0, %ecx
; NOPOST-NEXT: emms
; NOPOST-NEXT: movl %eax, (%esp)
; NOPOST-NEXT: fildl (%esp)
; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; NOPOST-NEXT: fiaddl {{[0-9]+}}(%esp)
; NOPOST-NEXT: movl %ebp, %esp
; NOPOST-NEXT: popl %ebp
; NOPOST-NEXT: retl
;
; POST-LABEL: PR35982_emms:
; POST: # %bb.0:
; POST-NEXT: pushl %ebp
; POST-NEXT: movl %esp, %ebp
; POST-NEXT: andl $-8, %esp
; POST-NEXT: subl $16, %esp
; POST-NEXT: movl 8(%ebp), %eax
; POST-NEXT: movl 12(%ebp), %ecx
; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; POST-NEXT: movl %eax, {{[0-9]+}}(%esp)
; POST-NEXT: movq {{[0-9]+}}(%esp), %mm0
; POST-NEXT: emms
; POST-NEXT: movl %eax, (%esp)
; POST-NEXT: fildl (%esp)
; POST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; POST-NEXT: movd %mm0, %ecx
; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; POST-NEXT: fiaddl {{[0-9]+}}(%esp)
; POST-NEXT: movl %ebp, %esp
; POST-NEXT: popl %ebp
; POST-NEXT: retl
; CHECK-LABEL: PR35982_emms:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: movl 8(%ebp), %eax
; CHECK-NEXT: movl 12(%ebp), %ecx
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; CHECK-NEXT: movd %mm0, %ecx
; CHECK-NEXT: emms
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: fildl (%esp)
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
%2 = bitcast <1 x i64> %0 to <2 x i32>
%3 = extractelement <2 x i32> %2, i32 0
%4 = extractelement <1 x i64> %0, i32 0
Expand All @@ -61,49 +39,27 @@ define float @PR35982_emms(<1 x i64>) nounwind {
}

define float @PR35982_femms(<1 x i64>) nounwind {
; NOPOST-LABEL: PR35982_femms:
; NOPOST: # %bb.0:
; NOPOST-NEXT: pushl %ebp
; NOPOST-NEXT: movl %esp, %ebp
; NOPOST-NEXT: andl $-8, %esp
; NOPOST-NEXT: subl $16, %esp
; NOPOST-NEXT: movl 8(%ebp), %eax
; NOPOST-NEXT: movl 12(%ebp), %ecx
; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; NOPOST-NEXT: movl %eax, {{[0-9]+}}(%esp)
; NOPOST-NEXT: movq {{[0-9]+}}(%esp), %mm0
; NOPOST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; NOPOST-NEXT: movd %mm0, %ecx
; NOPOST-NEXT: femms
; NOPOST-NEXT: movl %eax, (%esp)
; NOPOST-NEXT: fildl (%esp)
; NOPOST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; NOPOST-NEXT: fiaddl {{[0-9]+}}(%esp)
; NOPOST-NEXT: movl %ebp, %esp
; NOPOST-NEXT: popl %ebp
; NOPOST-NEXT: retl
;
; POST-LABEL: PR35982_femms:
; POST: # %bb.0:
; POST-NEXT: pushl %ebp
; POST-NEXT: movl %esp, %ebp
; POST-NEXT: andl $-8, %esp
; POST-NEXT: subl $16, %esp
; POST-NEXT: movl 8(%ebp), %eax
; POST-NEXT: movl 12(%ebp), %ecx
; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; POST-NEXT: movl %eax, {{[0-9]+}}(%esp)
; POST-NEXT: movq {{[0-9]+}}(%esp), %mm0
; POST-NEXT: femms
; POST-NEXT: movl %eax, (%esp)
; POST-NEXT: fildl (%esp)
; POST-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; POST-NEXT: movd %mm0, %ecx
; POST-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; POST-NEXT: fiaddl {{[0-9]+}}(%esp)
; POST-NEXT: movl %ebp, %esp
; POST-NEXT: popl %ebp
; POST-NEXT: retl
; CHECK-LABEL: PR35982_femms:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: movl 8(%ebp), %eax
; CHECK-NEXT: movl 12(%ebp), %ecx
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; CHECK-NEXT: movd %mm0, %ecx
; CHECK-NEXT: femms
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: fildl (%esp)
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
%2 = bitcast <1 x i64> %0 to <2 x i32>
%3 = extractelement <2 x i32> %2, i32 0
%4 = extractelement <1 x i64> %0, i32 0
Expand Down

0 comments on commit 22b3de5

Please sign in to comment.