394 changes: 197 additions & 197 deletions llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=X86-ALL,X86-GOOD-RA
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=X86-ALL,X86-FAST-RA

; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefix=64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefix=64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=X64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=X64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefix=X64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=X64-ALL
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefix=X64-ALL

declare i32 @foo()
declare i32 @bar(i64)
Expand All @@ -24,86 +24,86 @@ declare i32 @bar(i64)
; repeated saving and restoring logic and can be trivially managed by the
; register allocator.
define i64 @test_intervening_call(ptr %foo, i64 %bar, i64 %baz) nounwind {
; 32-GOOD-RA-LABEL: test_intervening_call:
; 32-GOOD-RA: # %bb.0: # %entry
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
; 32-GOOD-RA-NEXT: setne %bl
; 32-GOOD-RA-NEXT: subl $8, %esp
; 32-GOOD-RA-NEXT: pushl %edx
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: calll bar@PLT
; 32-GOOD-RA-NEXT: addl $16, %esp
; 32-GOOD-RA-NEXT: testb %bl, %bl
; 32-GOOD-RA-NEXT: jne .LBB0_3
; 32-GOOD-RA-NEXT: # %bb.1: # %t
; 32-GOOD-RA-NEXT: movl $42, %eax
; 32-GOOD-RA-NEXT: jmp .LBB0_2
; 32-GOOD-RA-NEXT: .LBB0_3: # %f
; 32-GOOD-RA-NEXT: xorl %eax, %eax
; 32-GOOD-RA-NEXT: .LBB0_2: # %t
; 32-GOOD-RA-NEXT: xorl %edx, %edx
; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
; X86-GOOD-RA-LABEL: test_intervening_call:
; X86-GOOD-RA: # %bb.0: # %entry
; X86-GOOD-RA-NEXT: pushl %ebx
; X86-GOOD-RA-NEXT: pushl %esi
; X86-GOOD-RA-NEXT: pushl %eax
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
; X86-GOOD-RA-NEXT: setne %bl
; X86-GOOD-RA-NEXT: subl $8, %esp
; X86-GOOD-RA-NEXT: pushl %edx
; X86-GOOD-RA-NEXT: pushl %eax
; X86-GOOD-RA-NEXT: calll bar@PLT
; X86-GOOD-RA-NEXT: addl $16, %esp
; X86-GOOD-RA-NEXT: testb %bl, %bl
; X86-GOOD-RA-NEXT: jne .LBB0_3
; X86-GOOD-RA-NEXT: # %bb.1: # %t
; X86-GOOD-RA-NEXT: movl $42, %eax
; X86-GOOD-RA-NEXT: jmp .LBB0_2
; X86-GOOD-RA-NEXT: .LBB0_3: # %f
; X86-GOOD-RA-NEXT: xorl %eax, %eax
; X86-GOOD-RA-NEXT: .LBB0_2: # %t
; X86-GOOD-RA-NEXT: xorl %edx, %edx
; X86-GOOD-RA-NEXT: addl $4, %esp
; X86-GOOD-RA-NEXT: popl %esi
; X86-GOOD-RA-NEXT: popl %ebx
; X86-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_intervening_call:
; 32-FAST-RA: # %bb.0: # %entry
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
; 32-FAST-RA-NEXT: setne %bl
; 32-FAST-RA-NEXT: subl $8, %esp
; 32-FAST-RA-NEXT: pushl %edx
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: calll bar@PLT
; 32-FAST-RA-NEXT: addl $16, %esp
; 32-FAST-RA-NEXT: testb %bl, %bl
; 32-FAST-RA-NEXT: jne .LBB0_3
; 32-FAST-RA-NEXT: # %bb.1: # %t
; 32-FAST-RA-NEXT: movl $42, %eax
; 32-FAST-RA-NEXT: jmp .LBB0_2
; 32-FAST-RA-NEXT: .LBB0_3: # %f
; 32-FAST-RA-NEXT: xorl %eax, %eax
; 32-FAST-RA-NEXT: .LBB0_2: # %t
; 32-FAST-RA-NEXT: xorl %edx, %edx
; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
; X86-FAST-RA-LABEL: test_intervening_call:
; X86-FAST-RA: # %bb.0: # %entry
; X86-FAST-RA-NEXT: pushl %ebx
; X86-FAST-RA-NEXT: pushl %esi
; X86-FAST-RA-NEXT: pushl %eax
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-RA-NEXT: lock cmpxchg8b (%esi)
; X86-FAST-RA-NEXT: setne %bl
; X86-FAST-RA-NEXT: subl $8, %esp
; X86-FAST-RA-NEXT: pushl %edx
; X86-FAST-RA-NEXT: pushl %eax
; X86-FAST-RA-NEXT: calll bar@PLT
; X86-FAST-RA-NEXT: addl $16, %esp
; X86-FAST-RA-NEXT: testb %bl, %bl
; X86-FAST-RA-NEXT: jne .LBB0_3
; X86-FAST-RA-NEXT: # %bb.1: # %t
; X86-FAST-RA-NEXT: movl $42, %eax
; X86-FAST-RA-NEXT: jmp .LBB0_2
; X86-FAST-RA-NEXT: .LBB0_3: # %f
; X86-FAST-RA-NEXT: xorl %eax, %eax
; X86-FAST-RA-NEXT: .LBB0_2: # %t
; X86-FAST-RA-NEXT: xorl %edx, %edx
; X86-FAST-RA-NEXT: addl $4, %esp
; X86-FAST-RA-NEXT: popl %esi
; X86-FAST-RA-NEXT: popl %ebx
; X86-FAST-RA-NEXT: retl
;
; 64-ALL-LABEL: test_intervening_call:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: pushq %rbx
; 64-ALL-NEXT: movq %rsi, %rax
; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
; 64-ALL-NEXT: setne %bl
; 64-ALL-NEXT: movq %rax, %rdi
; 64-ALL-NEXT: callq bar@PLT
; 64-ALL-NEXT: testb %bl, %bl
; 64-ALL-NEXT: jne .LBB0_2
; 64-ALL-NEXT: # %bb.1: # %t
; 64-ALL-NEXT: movl $42, %eax
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: retq
; 64-ALL-NEXT: .LBB0_2: # %f
; 64-ALL-NEXT: xorl %eax, %eax
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: retq
; X64-ALL-LABEL: test_intervening_call:
; X64-ALL: # %bb.0: # %entry
; X64-ALL-NEXT: pushq %rbx
; X64-ALL-NEXT: movq %rsi, %rax
; X64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
; X64-ALL-NEXT: setne %bl
; X64-ALL-NEXT: movq %rax, %rdi
; X64-ALL-NEXT: callq bar@PLT
; X64-ALL-NEXT: testb %bl, %bl
; X64-ALL-NEXT: jne .LBB0_2
; X64-ALL-NEXT: # %bb.1: # %t
; X64-ALL-NEXT: movl $42, %eax
; X64-ALL-NEXT: popq %rbx
; X64-ALL-NEXT: retq
; X64-ALL-NEXT: .LBB0_2: # %f
; X64-ALL-NEXT: xorl %eax, %eax
; X64-ALL-NEXT: popq %rbx
; X64-ALL-NEXT: retq
entry:
%cx = cmpxchg ptr %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
Expand All @@ -120,61 +120,61 @@ f:

; Interesting in producing a clobber without any function calls.
define i32 @test_control_flow(ptr %p, i32 %i, i32 %j) nounwind {
; 32-ALL-LABEL: test_control_flow:
; 32-ALL: # %bb.0: # %entry
; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; 32-ALL-NEXT: jle .LBB1_6
; 32-ALL-NEXT: # %bb.1: # %loop_start
; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-ALL-NEXT: .p2align 4, 0x90
; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
; 32-ALL-NEXT: # =>This Loop Header: Depth=1
; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2
; 32-ALL-NEXT: movl (%ecx), %edx
; 32-ALL-NEXT: .p2align 4, 0x90
; 32-ALL-NEXT: .LBB1_3: # %while.cond.i
; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1
; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2
; 32-ALL-NEXT: movl %edx, %eax
; 32-ALL-NEXT: xorl %edx, %edx
; 32-ALL-NEXT: testl %eax, %eax
; 32-ALL-NEXT: je .LBB1_3
; 32-ALL-NEXT: # %bb.4: # %while.body.i
; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
; 32-ALL-NEXT: jne .LBB1_2
; 32-ALL-NEXT: # %bb.5:
; 32-ALL-NEXT: xorl %eax, %eax
; 32-ALL-NEXT: .LBB1_6: # %cond.end
; 32-ALL-NEXT: retl
; X86-ALL-LABEL: test_control_flow:
; X86-ALL: # %bb.0: # %entry
; X86-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-ALL-NEXT: jle .LBB1_6
; X86-ALL-NEXT: # %bb.1: # %loop_start
; X86-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ALL-NEXT: .p2align 4, 0x90
; X86-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
; X86-ALL-NEXT: # =>This Loop Header: Depth=1
; X86-ALL-NEXT: # Child Loop BB1_3 Depth 2
; X86-ALL-NEXT: movl (%ecx), %edx
; X86-ALL-NEXT: .p2align 4, 0x90
; X86-ALL-NEXT: .LBB1_3: # %while.cond.i
; X86-ALL-NEXT: # Parent Loop BB1_2 Depth=1
; X86-ALL-NEXT: # => This Inner Loop Header: Depth=2
; X86-ALL-NEXT: movl %edx, %eax
; X86-ALL-NEXT: xorl %edx, %edx
; X86-ALL-NEXT: testl %eax, %eax
; X86-ALL-NEXT: je .LBB1_3
; X86-ALL-NEXT: # %bb.4: # %while.body.i
; X86-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
; X86-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-ALL-NEXT: jne .LBB1_2
; X86-ALL-NEXT: # %bb.5:
; X86-ALL-NEXT: xorl %eax, %eax
; X86-ALL-NEXT: .LBB1_6: # %cond.end
; X86-ALL-NEXT: retl
;
; 64-ALL-LABEL: test_control_flow:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: movl %esi, %eax
; 64-ALL-NEXT: cmpl %edx, %esi
; 64-ALL-NEXT: jle .LBB1_5
; 64-ALL-NEXT: .p2align 4, 0x90
; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
; 64-ALL-NEXT: # =>This Loop Header: Depth=1
; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2
; 64-ALL-NEXT: movl (%rdi), %ecx
; 64-ALL-NEXT: .p2align 4, 0x90
; 64-ALL-NEXT: .LBB1_2: # %while.cond.i
; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2
; 64-ALL-NEXT: movl %ecx, %eax
; 64-ALL-NEXT: xorl %ecx, %ecx
; 64-ALL-NEXT: testl %eax, %eax
; 64-ALL-NEXT: je .LBB1_2
; 64-ALL-NEXT: # %bb.3: # %while.body.i
; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
; 64-ALL-NEXT: jne .LBB1_1
; 64-ALL-NEXT: # %bb.4:
; 64-ALL-NEXT: xorl %eax, %eax
; 64-ALL-NEXT: .LBB1_5: # %cond.end
; 64-ALL-NEXT: retq
; X64-ALL-LABEL: test_control_flow:
; X64-ALL: # %bb.0: # %entry
; X64-ALL-NEXT: movl %esi, %eax
; X64-ALL-NEXT: cmpl %edx, %esi
; X64-ALL-NEXT: jle .LBB1_5
; X64-ALL-NEXT: .p2align 4, 0x90
; X64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
; X64-ALL-NEXT: # =>This Loop Header: Depth=1
; X64-ALL-NEXT: # Child Loop BB1_2 Depth 2
; X64-ALL-NEXT: movl (%rdi), %ecx
; X64-ALL-NEXT: .p2align 4, 0x90
; X64-ALL-NEXT: .LBB1_2: # %while.cond.i
; X64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
; X64-ALL-NEXT: # => This Inner Loop Header: Depth=2
; X64-ALL-NEXT: movl %ecx, %eax
; X64-ALL-NEXT: xorl %ecx, %ecx
; X64-ALL-NEXT: testl %eax, %eax
; X64-ALL-NEXT: je .LBB1_2
; X64-ALL-NEXT: # %bb.3: # %while.body.i
; X64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
; X64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
; X64-ALL-NEXT: jne .LBB1_1
; X64-ALL-NEXT: # %bb.4:
; X64-ALL-NEXT: xorl %eax, %eax
; X64-ALL-NEXT: .LBB1_5: # %cond.end
; X64-ALL-NEXT: retq
entry:
%cmp = icmp sgt i32 %i, %j
br i1 %cmp, label %loop_start, label %cond.end
Expand Down Expand Up @@ -208,66 +208,66 @@ cond.end:
; This one is an interesting case because CMOV doesn't have a chain
; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
define i32 @test_feed_cmov(ptr %addr, i32 %desired, i32 %new) nounwind {
; 32-GOOD-RA-LABEL: test_feed_cmov:
; 32-GOOD-RA: # %bb.0: # %entry
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; 32-GOOD-RA-NEXT: sete %bl
; 32-GOOD-RA-NEXT: calll foo@PLT
; 32-GOOD-RA-NEXT: testb %bl, %bl
; 32-GOOD-RA-NEXT: jne .LBB2_2
; 32-GOOD-RA-NEXT: # %bb.1: # %entry
; 32-GOOD-RA-NEXT: movl %eax, %esi
; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
; 32-GOOD-RA-NEXT: movl %esi, %eax
; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
; X86-GOOD-RA-LABEL: test_feed_cmov:
; X86-GOOD-RA: # %bb.0: # %entry
; X86-GOOD-RA-NEXT: pushl %ebx
; X86-GOOD-RA-NEXT: pushl %esi
; X86-GOOD-RA-NEXT: pushl %eax
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; X86-GOOD-RA-NEXT: sete %bl
; X86-GOOD-RA-NEXT: calll foo@PLT
; X86-GOOD-RA-NEXT: testb %bl, %bl
; X86-GOOD-RA-NEXT: jne .LBB2_2
; X86-GOOD-RA-NEXT: # %bb.1: # %entry
; X86-GOOD-RA-NEXT: movl %eax, %esi
; X86-GOOD-RA-NEXT: .LBB2_2: # %entry
; X86-GOOD-RA-NEXT: movl %esi, %eax
; X86-GOOD-RA-NEXT: addl $4, %esp
; X86-GOOD-RA-NEXT: popl %esi
; X86-GOOD-RA-NEXT: popl %ebx
; X86-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_feed_cmov:
; 32-FAST-RA: # %bb.0: # %entry
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; 32-FAST-RA-NEXT: sete %bl
; 32-FAST-RA-NEXT: calll foo@PLT
; 32-FAST-RA-NEXT: testb %bl, %bl
; 32-FAST-RA-NEXT: jne .LBB2_2
; 32-FAST-RA-NEXT: # %bb.1: # %entry
; 32-FAST-RA-NEXT: movl %eax, %esi
; 32-FAST-RA-NEXT: .LBB2_2: # %entry
; 32-FAST-RA-NEXT: movl %esi, %eax
; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
; X86-FAST-RA-LABEL: test_feed_cmov:
; X86-FAST-RA: # %bb.0: # %entry
; X86-FAST-RA-NEXT: pushl %ebx
; X86-FAST-RA-NEXT: pushl %esi
; X86-FAST-RA-NEXT: pushl %eax
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; X86-FAST-RA-NEXT: sete %bl
; X86-FAST-RA-NEXT: calll foo@PLT
; X86-FAST-RA-NEXT: testb %bl, %bl
; X86-FAST-RA-NEXT: jne .LBB2_2
; X86-FAST-RA-NEXT: # %bb.1: # %entry
; X86-FAST-RA-NEXT: movl %eax, %esi
; X86-FAST-RA-NEXT: .LBB2_2: # %entry
; X86-FAST-RA-NEXT: movl %esi, %eax
; X86-FAST-RA-NEXT: addl $4, %esp
; X86-FAST-RA-NEXT: popl %esi
; X86-FAST-RA-NEXT: popl %ebx
; X86-FAST-RA-NEXT: retl
;
; 64-ALL-LABEL: test_feed_cmov:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: pushq %rbp
; 64-ALL-NEXT: pushq %rbx
; 64-ALL-NEXT: pushq %rax
; 64-ALL-NEXT: movl %edx, %ebx
; 64-ALL-NEXT: movl %esi, %eax
; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi)
; 64-ALL-NEXT: sete %bpl
; 64-ALL-NEXT: callq foo@PLT
; 64-ALL-NEXT: testb %bpl, %bpl
; 64-ALL-NEXT: cmovnel %ebx, %eax
; 64-ALL-NEXT: addq $8, %rsp
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: popq %rbp
; 64-ALL-NEXT: retq
; X64-ALL-LABEL: test_feed_cmov:
; X64-ALL: # %bb.0: # %entry
; X64-ALL-NEXT: pushq %rbp
; X64-ALL-NEXT: pushq %rbx
; X64-ALL-NEXT: pushq %rax
; X64-ALL-NEXT: movl %edx, %ebx
; X64-ALL-NEXT: movl %esi, %eax
; X64-ALL-NEXT: lock cmpxchgl %edx, (%rdi)
; X64-ALL-NEXT: sete %bpl
; X64-ALL-NEXT: callq foo@PLT
; X64-ALL-NEXT: testb %bpl, %bpl
; X64-ALL-NEXT: cmovnel %ebx, %eax
; X64-ALL-NEXT: addq $8, %rsp
; X64-ALL-NEXT: popq %rbx
; X64-ALL-NEXT: popq %rbp
; X64-ALL-NEXT: retq
entry:
%res = cmpxchg ptr %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,X86 %s
; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,X64 %s

;; -fpatchable-function-entry=0 -fcf-protection=branch
define void @f0() "patchable-function-entry"="0" {
; CHECK-LABEL: f0:
; CHECK-NEXT: .Lfunc_begin0:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: # %bb.0:
; 32-NEXT: endbr32
; 64-NEXT: endbr64
; X86-NEXT: endbr32
; X64-NEXT: endbr64
; CHECK-NEXT: ret
; CHECK-NOT: .section __patchable_function_entries
ret void
Expand All @@ -22,16 +22,16 @@ define void @f1() "patchable-function-entry"="1" {
; CHECK-NEXT: .Lfunc_begin1:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: # %bb.0:
; 32-NEXT: endbr32
; 64-NEXT: endbr64
; X86-NEXT: endbr32
; X64-NEXT: endbr64
; CHECK-NEXT: .Lpatch0:
; CHECK-NEXT: nop
; CHECK-NEXT: ret
; CHECK: .section __patchable_function_entries,"awo",@progbits,f1{{$}}
; 32-NEXT: .p2align 2
; 32-NEXT: .long .Lpatch0
; 64-NEXT: .p2align 3
; 64-NEXT: .quad .Lpatch0
; X86-NEXT: .p2align 2
; X86-NEXT: .long .Lpatch0
; X64-NEXT: .p2align 3
; X64-NEXT: .quad .Lpatch0
ret void
}

Expand All @@ -44,17 +44,17 @@ define void @f2_1() "patchable-function-entry"="1" "patchable-function-prefix"="
; CHECK-NEXT: .Lfunc_begin2:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: # %bb.0:
; 32-NEXT: endbr32
; 64-NEXT: endbr64
; X86-NEXT: endbr32
; X64-NEXT: endbr64
; CHECK-NEXT: nop
; CHECK-NEXT: ret
; CHECK: .Lfunc_end2:
; CHECK-NEXT: .size f2_1, .Lfunc_end2-f2_1
; CHECK: .section __patchable_function_entries,"awo",@progbits,f2_1{{$}}
; 32-NEXT: .p2align 2
; 32-NEXT: .long .Ltmp0
; 64-NEXT: .p2align 3
; 64-NEXT: .quad .Ltmp0
; X86-NEXT: .p2align 2
; X86-NEXT: .long .Ltmp0
; X64-NEXT: .p2align 3
; X64-NEXT: .quad .Ltmp0
ret void
}

Expand All @@ -74,10 +74,10 @@ define internal void @f1i() "patchable-function-entry"="1" {
;; Another basic block has ENDBR, but it doesn't affect our decision to not create .Lpatch0
; CHECK: endbr
; CHECK: .section __patchable_function_entries,"awo",@progbits,f1i{{$}}
; 32-NEXT: .p2align 2
; 32-NEXT: .long .Lfunc_begin3
; 64-NEXT: .p2align 3
; 64-NEXT: .quad .Lfunc_begin3
; X86-NEXT: .p2align 2
; X86-NEXT: .long .Lfunc_begin3
; X64-NEXT: .p2align 3
; X64-NEXT: .quad .Lfunc_begin3
entry:
tail call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
ret void
Expand All @@ -93,8 +93,8 @@ entry:
; CHECK-NEXT: .Lfunc_begin{{.*}}:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: # %bb.0:
; 32-NEXT: endbr32
; 64-NEXT: endbr64
; X86-NEXT: endbr32
; X64-NEXT: endbr64
; CHECK-NEXT: nop
; CHECK-NEXT: ret
define void @sanitize_function(ptr noundef %x) "patchable-function-prefix"="1" "patchable-function-entry"="1" !func_sanitize !1 {
Expand Down
62 changes: 31 additions & 31 deletions llvm/test/CodeGen/X86/patchable-function-entry.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=i386 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
; RUN: llc -mtriple=x86_64 -function-sections %s -o - | FileCheck --check-prefixes=CHECK,64 %s
; RUN: llc -mtriple=i386 %s -o - | FileCheck --check-prefixes=CHECK,X86 %s
; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,X64 %s
; RUN: llc -mtriple=x86_64 -function-sections %s -o - | FileCheck --check-prefixes=CHECK,X64 %s

define void @f0() "patchable-function-entry"="0" {
; CHECK-LABEL: f0:
Expand All @@ -17,10 +17,10 @@ define void @f1() "patchable-function-entry"="1" {
; CHECK: nop
; CHECK-NEXT: ret
; CHECK: .section __patchable_function_entries,"awo",@progbits,f1{{$}}
; 32: .p2align 2
; 32-NEXT: .long .Lfunc_begin1
; 64: .p2align 3
; 64-NEXT: .quad .Lfunc_begin1
; X86: .p2align 2
; X86-NEXT: .long .Lfunc_begin1
; X64: .p2align 3
; X64-NEXT: .quad .Lfunc_begin1
ret void
}

Expand All @@ -31,46 +31,46 @@ define void @f1() "patchable-function-entry"="1" {
define void @f2() "patchable-function-entry"="2" {
; CHECK-LABEL: f2:
; CHECK-NEXT: .Lfunc_begin2:
; 32: xchgw %ax, %ax
; 64: xchgw %ax, %ax
; X86: xchgw %ax, %ax
; X64: xchgw %ax, %ax
; CHECK-NEXT: ret
; CHECK: .section __patchable_function_entries,"awo",@progbits,f2{{$}}
; 32: .p2align 2
; 32-NEXT: .long .Lfunc_begin2
; 64: .p2align 3
; 64-NEXT: .quad .Lfunc_begin2
; X86: .p2align 2
; X86-NEXT: .long .Lfunc_begin2
; X64: .p2align 3
; X64-NEXT: .quad .Lfunc_begin2
ret void
}

$f3 = comdat any
define void @f3() "patchable-function-entry"="3" comdat {
; CHECK-LABEL: f3:
; CHECK-NEXT: .Lfunc_begin3:
; 32: xchgw %ax, %ax
; 32-NEXT: nop
; 64: nopl (%rax)
; X86: xchgw %ax, %ax
; X86-NEXT: nop
; X64: nopl (%rax)
; CHECK: ret
; CHECK: .section __patchable_function_entries,"awoG",@progbits,f3,f3,comdat{{$}}
; 32: .p2align 2
; 32-NEXT: .long .Lfunc_begin3
; 64: .p2align 3
; 64-NEXT: .quad .Lfunc_begin3
; X86: .p2align 2
; X86-NEXT: .long .Lfunc_begin3
; X64: .p2align 3
; X64-NEXT: .quad .Lfunc_begin3
ret void
}

$f5 = comdat any
define void @f5() "patchable-function-entry"="5" comdat {
; CHECK-LABEL: f5:
; CHECK-NEXT: .Lfunc_begin4:
; 32-COUNT-2: xchgw %ax, %ax
; 32-NEXT: nop
; 64: nopl 8(%rax,%rax)
; X86-COUNT-2: xchgw %ax, %ax
; X86-NEXT: nop
; X64: nopl 8(%rax,%rax)
; CHECK-NEXT: ret
; CHECK: .section __patchable_function_entries,"awoG",@progbits,f5,f5,comdat{{$}}
; 32: .p2align 2
; 32-NEXT: .long .Lfunc_begin4
; 64: .p2align 3
; 64-NEXT: .quad .Lfunc_begin4
; X86: .p2align 2
; X86-NEXT: .long .Lfunc_begin4
; X64: .p2align 3
; X64-NEXT: .quad .Lfunc_begin4
ret void
}

Expand All @@ -91,10 +91,10 @@ define void @f3_2() "patchable-function-entry"="1" "patchable-function-prefix"="
; CHECK: .Lfunc_end5:
; CHECK-NEXT: .size f3_2, .Lfunc_end5-f3_2
; CHECK: .section __patchable_function_entries,"awo",@progbits,f3_2{{$}}
; 32: .p2align 2
; 32-NEXT: .long .Ltmp0
; 64: .p2align 3
; 64-NEXT: .quad .Ltmp0
; X86: .p2align 2
; X86-NEXT: .long .Ltmp0
; X64: .p2align 3
; X64-NEXT: .quad .Ltmp0
%frame = alloca i8, i32 16
ret void
}
134 changes: 67 additions & 67 deletions llvm/test/CodeGen/X86/patchable-prologue.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; RUN: llc -verify-machineinstrs -filetype=obj -o - -mtriple=x86_64-apple-macosx < %s | llvm-objdump --no-print-imm-hex --triple=x86_64-apple-macosx -d - | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx < %s | FileCheck %s --check-prefix=CHECK-ALIGN
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386 < %s | FileCheck %s --check-prefixes=32,32CFI,XCHG
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc < %s | FileCheck %s --check-prefixes=32,MOV
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium3 < %s | FileCheck %s --check-prefixes=32,MOV
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium4 < %s | FileCheck %s --check-prefixes=32,XCHG
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=64
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386 < %s | FileCheck %s --check-prefixes=X86,X86CFI,XCHG
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc < %s | FileCheck %s --check-prefixes=X86,MOV
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium3 < %s | FileCheck %s --check-prefixes=X86,MOV
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium4 < %s | FileCheck %s --check-prefixes=X86,XCHG
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64

declare void @callee(ptr)

Expand All @@ -15,18 +15,18 @@ define void @f0() "patchable-function"="prologue-short-redirect" {
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f0:

; 32: f0:
; 32CFI-NEXT: .cfi_startproc
; 32-NEXT: # %bb.0:
; X86: f0:
; X86CFI-NEXT: .cfi_startproc
; X86-NEXT: # %bb.0:
; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
; 32-NEXT: retl
; X86-NEXT: retl

; X64: f0:
; X64-NEXT: # %bb.0:
; X64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
; X64-NEXT: retq

; 64: f0:
; 64-NEXT: # %bb.0:
; 64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
; 64-NEXT: retq

ret void
}

Expand All @@ -38,19 +38,19 @@ define void @f1() "patchable-function"="prologue-short-redirect" "frame-pointer"
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f1:

; 32: f1:
; 32CFI-NEXT: .cfi_startproc
; 32-NEXT: # %bb.0:
; X86: f1:
; X86CFI-NEXT: .cfi_startproc
; X86-NEXT: # %bb.0:
; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
; 32-NEXT: pushl %ebp

; 64: f1:
; 64-NEXT: .seh_proc f1
; 64-NEXT: # %bb.0:
; 64-NEXT: xchgw %ax, %ax
; 64-NEXT: pushq %rbp
; X86-NEXT: pushl %ebp

; X64: f1:
; X64-NEXT: .seh_proc f1
; X64-NEXT: # %bb.0:
; X64-NEXT: xchgw %ax, %ax
; X64-NEXT: pushq %rbp

ret void
}

Expand All @@ -61,18 +61,18 @@ define void @f2() "patchable-function"="prologue-short-redirect" {
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f2:

; 32: f2:
; 32CFI-NEXT: .cfi_startproc
; 32-NEXT: # %bb.0:
; X86: f2:
; X86CFI-NEXT: .cfi_startproc
; X86-NEXT: # %bb.0:
; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
; 32-NEXT: pushl %ebp
; X86-NEXT: pushl %ebp

; X64: f2:
; X64-NEXT: .seh_proc f2
; X64-NEXT: # %bb.0:
; X64-NEXT: subq $200, %rsp

; 64: f2:
; 64-NEXT: .seh_proc f2
; 64-NEXT: # %bb.0:
; 64-NEXT: subq $200, %rsp

%ptr = alloca i64, i32 20
call void @callee(ptr %ptr)
ret void
Expand All @@ -85,17 +85,17 @@ define void @f3() "patchable-function"="prologue-short-redirect" optsize {
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f3:

; 32: f3:
; 32CFI-NEXT: .cfi_startproc
; 32-NEXT: # %bb.0:
; X86: f3:
; X86CFI-NEXT: .cfi_startproc
; X86-NEXT: # %bb.0:
; XCHG-NEXT: xchgw %ax, %ax
; MOV-NEXT: movl %edi, %edi
; 32-NEXT: retl
; X86-NEXT: retl

; 64: f3:
; 64-NEXT: # %bb.0:
; 64-NEXT: xchgw %ax, %ax
; 64-NEXT: retq
; X64: f3:
; X64-NEXT: # %bb.0:
; X64-NEXT: xchgw %ax, %ax
; X64-NEXT: retq

ret void
}
Expand All @@ -105,16 +105,16 @@ define void @f3() "patchable-function"="prologue-short-redirect" optsize {
; patchable one.
; CHECK-LABEL: f4{{>?}}:
; CHECK-NEXT: 8b 0c 37 movl (%rdi,%rsi), %ecx
; 32: f4:
; 32CFI-NEXT: .cfi_startproc
; 32-NEXT: # %bb.0:
; X86: f4:
; X86CFI-NEXT: .cfi_startproc
; X86-NEXT: # %bb.0:
; XCHG-NEXT: xchgw %ax, %ax
; MOV-NEXT: movl %edi, %edi
; 32-NEXT: pushl %ebx
; X86-NEXT: pushl %ebx

; 64: f4:
; 64-NEXT: # %bb.0:
; 64-NOT: xchgw %ax, %ax
; X64: f4:
; X64-NEXT: # %bb.0:
; X64-NOT: xchgw %ax, %ax

define i32 @f4(ptr %arg1, i64 %arg2, i32 %arg3) "patchable-function"="prologue-short-redirect" {
bb:
Expand Down Expand Up @@ -143,15 +143,15 @@ bb21:
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _emptyfunc:

; 32: emptyfunc:
; 32CFI-NEXT: .cfi_startproc
; 32-NEXT: # %bb.0:
; X86: emptyfunc:
; X86CFI-NEXT: .cfi_startproc
; X86-NEXT: # %bb.0:
; XCHG-NEXT: xchgw %ax, %ax
; MOV-NEXT: movl %edi, %edi

; 64: emptyfunc:
; 64-NEXT: # %bb.0:
; 64-NEXT: xchgw %ax, %ax
; X64: emptyfunc:
; X64-NEXT: # %bb.0:
; X64-NEXT: xchgw %ax, %ax

; From code: int emptyfunc() {}
define i32 @emptyfunc() "patchable-function"="prologue-short-redirect" {
Expand All @@ -169,15 +169,15 @@ define i32 @emptyfunc() "patchable-function"="prologue-short-redirect" {
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _jmp_to_start:

; 32: jmp_to_start:
; 32CFI-NEXT: .cfi_startproc
; 32-NEXT: # %bb.0:
; X86: jmp_to_start:
; X86CFI-NEXT: .cfi_startproc
; X86-NEXT: # %bb.0:
; XCHG-NEXT: xchgw %ax, %ax
; MOV-NEXT: movl %edi, %edi

; 64: jmp_to_start:
; 64-NEXT: # %bb.0:
; 64-NEXT: xchgw %ax, %ax
; X64: jmp_to_start:
; X64-NEXT: # %bb.0:
; X64-NEXT: xchgw %ax, %ax

define dso_local void @jmp_to_start(ptr inreg nocapture noundef %b) "patchable-function"="prologue-short-redirect" {
entry:
Expand All @@ -198,12 +198,12 @@ do.end: ; preds = %do.body
; Test that inline asm is properly hotpatched. We currently don't examine the
; asm instruction when printing it, thus we always emit patching NOPs.

; 64: inline_asm:
; 64-NEXT: # %bb.0:
; 64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
; 64-NEXT: #APP
; 64-NEXT: int3 # encoding: [0xcc]
; 64-NEXT: #NO_APP
; X64: inline_asm:
; X64-NEXT: # %bb.0:
; X64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
; X64-NEXT: #APP
; X64-NEXT: int3 # encoding: [0xcc]
; X64-NEXT: #NO_APP

define dso_local void @inline_asm() "patchable-function"="prologue-short-redirect" {
entry:
Expand Down
138 changes: 69 additions & 69 deletions llvm/test/CodeGen/X86/prefetch.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=SSE
; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=3DNOW
; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s --check-prefix=X86-SSE
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=X86-SSE
; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prfchw | FileCheck %s -check-prefix=X86-PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=X86-PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=X86-PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=X86-PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=X86-SSE
; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=X86-PREFETCHWT1
; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=X86-PREFETCHWT1
; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=X86-PREFETCHWT1
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=X86-3DNOW
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=X86-3DNOW

; Rules:
; 3dnow by itself get you just the single prefetch instruction with no hints
Expand All @@ -22,67 +22,67 @@
; rdar://10538297

define void @t(ptr %ptr) nounwind {
; SSE-LABEL: t:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT: prefetcht2 (%eax)
; SSE-NEXT: prefetcht1 (%eax)
; SSE-NEXT: prefetcht0 (%eax)
; SSE-NEXT: prefetchnta (%eax)
; SSE-NEXT: prefetcht2 (%eax)
; SSE-NEXT: prefetcht1 (%eax)
; SSE-NEXT: prefetcht0 (%eax)
; SSE-NEXT: prefetchnta (%eax)
; SSE-NEXT: retl
; X86-SSE-LABEL: t:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: prefetcht2 (%eax)
; X86-SSE-NEXT: prefetcht1 (%eax)
; X86-SSE-NEXT: prefetcht0 (%eax)
; X86-SSE-NEXT: prefetchnta (%eax)
; X86-SSE-NEXT: prefetcht2 (%eax)
; X86-SSE-NEXT: prefetcht1 (%eax)
; X86-SSE-NEXT: prefetcht0 (%eax)
; X86-SSE-NEXT: prefetchnta (%eax)
; X86-SSE-NEXT: retl
;
; PRFCHWSSE-LABEL: t:
; PRFCHWSSE: # %bb.0: # %entry
; PRFCHWSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; PRFCHWSSE-NEXT: prefetcht2 (%eax)
; PRFCHWSSE-NEXT: prefetcht1 (%eax)
; PRFCHWSSE-NEXT: prefetcht0 (%eax)
; PRFCHWSSE-NEXT: prefetchnta (%eax)
; PRFCHWSSE-NEXT: prefetchw (%eax)
; PRFCHWSSE-NEXT: prefetchw (%eax)
; PRFCHWSSE-NEXT: prefetchw (%eax)
; PRFCHWSSE-NEXT: prefetchw (%eax)
; PRFCHWSSE-NEXT: retl
; X86-PRFCHWSSE-LABEL: t:
; X86-PRFCHWSSE: # %bb.0: # %entry
; X86-PRFCHWSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-PRFCHWSSE-NEXT: prefetcht2 (%eax)
; X86-PRFCHWSSE-NEXT: prefetcht1 (%eax)
; X86-PRFCHWSSE-NEXT: prefetcht0 (%eax)
; X86-PRFCHWSSE-NEXT: prefetchnta (%eax)
; X86-PRFCHWSSE-NEXT: prefetchw (%eax)
; X86-PRFCHWSSE-NEXT: prefetchw (%eax)
; X86-PRFCHWSSE-NEXT: prefetchw (%eax)
; X86-PRFCHWSSE-NEXT: prefetchw (%eax)
; X86-PRFCHWSSE-NEXT: retl
;
; PREFETCHWT1-LABEL: t:
; PREFETCHWT1: # %bb.0: # %entry
; PREFETCHWT1-NEXT: movl {{[0-9]+}}(%esp), %eax
; PREFETCHWT1-NEXT: prefetcht2 (%eax)
; PREFETCHWT1-NEXT: prefetcht1 (%eax)
; PREFETCHWT1-NEXT: prefetcht0 (%eax)
; PREFETCHWT1-NEXT: prefetchnta (%eax)
; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
; PREFETCHWT1-NEXT: prefetchw (%eax)
; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
; PREFETCHWT1-NEXT: retl
; X86-PREFETCHWT1-LABEL: t:
; X86-PREFETCHWT1: # %bb.0: # %entry
; X86-PREFETCHWT1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-PREFETCHWT1-NEXT: prefetcht2 (%eax)
; X86-PREFETCHWT1-NEXT: prefetcht1 (%eax)
; X86-PREFETCHWT1-NEXT: prefetcht0 (%eax)
; X86-PREFETCHWT1-NEXT: prefetchnta (%eax)
; X86-PREFETCHWT1-NEXT: prefetchwt1 (%eax)
; X86-PREFETCHWT1-NEXT: prefetchwt1 (%eax)
; X86-PREFETCHWT1-NEXT: prefetchw (%eax)
; X86-PREFETCHWT1-NEXT: prefetchwt1 (%eax)
; X86-PREFETCHWT1-NEXT: retl
;
; 3DNOW-LABEL: t:
; 3DNOW: # %bb.0: # %entry
; 3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; 3DNOW-NEXT: prefetch (%eax)
; 3DNOW-NEXT: prefetch (%eax)
; 3DNOW-NEXT: prefetch (%eax)
; 3DNOW-NEXT: prefetch (%eax)
; 3DNOW-NEXT: prefetchw (%eax)
; 3DNOW-NEXT: prefetchw (%eax)
; 3DNOW-NEXT: prefetchw (%eax)
; 3DNOW-NEXT: prefetchw (%eax)
; 3DNOW-NEXT: retl
; X86-3DNOW-LABEL: t:
; X86-3DNOW: # %bb.0: # %entry
; X86-3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-3DNOW-NEXT: prefetch (%eax)
; X86-3DNOW-NEXT: prefetch (%eax)
; X86-3DNOW-NEXT: prefetch (%eax)
; X86-3DNOW-NEXT: prefetch (%eax)
; X86-3DNOW-NEXT: prefetchw (%eax)
; X86-3DNOW-NEXT: prefetchw (%eax)
; X86-3DNOW-NEXT: prefetchw (%eax)
; X86-3DNOW-NEXT: prefetchw (%eax)
; X86-3DNOW-NEXT: retl
entry:
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 1, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 2, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 3, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 0, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 1, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 2, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 3, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 0, i32 1 )
ret void
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 1, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 2, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 3, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 0, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 1, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 2, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 3, i32 1 )
tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 0, i32 1 )
ret void
}

declare void @llvm.prefetch(ptr, i32, i32, i32) nounwind