58 changes: 29 additions & 29 deletions llvm/test/CodeGen/X86/bmi2-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) {
;
; ZNVER1-LABEL: test_bzhi_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: bzhil %edi, (%rdx), %ecx
; ZNVER1-NEXT: bzhil %edi, %esi, %eax
; ZNVER1-NEXT: bzhil %edi, (%rdx), %ecx # sched: [5:0.50]
; ZNVER1-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a2
%2 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %1, i32 %a0)
%3 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a1, i32 %a0)
Expand All @@ -51,10 +51,10 @@ define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) {
;
; ZNVER1-LABEL: test_bzhi_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: bzhiq %rdi, (%rdx), %rcx
; ZNVER1-NEXT: bzhiq %rdi, %rsi, %rax
; ZNVER1-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [5:0.50]
; ZNVER1-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.25]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %1, i64 %a0)
%3 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a1, i64 %a0)
Expand Down Expand Up @@ -88,10 +88,10 @@ define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movq %rdx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: movq %rdi, %rdx # sched: [1:0.25]
; ZNVER1-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:2.00]
; ZNVER1-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:2.00]
; ZNVER1-NEXT: mulxq %rsi, %rsi, %rcx # sched: [3:1.00]
; ZNVER1-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:1.00]
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = zext i64 %a0 to i128
%3 = zext i64 %a1 to i128
Expand Down Expand Up @@ -123,10 +123,10 @@ define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) {
;
; ZNVER1-LABEL: test_pdep_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pdepl (%rdx), %edi, %ecx
; ZNVER1-NEXT: pdepl %esi, %edi, %eax
; ZNVER1-NEXT: pdepl (%rdx), %edi, %ecx # sched: [100:?]
; ZNVER1-NEXT: pdepl %esi, %edi, %eax # sched: [100:?]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a2
%2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %1)
%3 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %a1)
Expand All @@ -152,10 +152,10 @@ define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) {
;
; ZNVER1-LABEL: test_pdep_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pdepq (%rdx), %rdi, %rcx
; ZNVER1-NEXT: pdepq %rsi, %rdi, %rax
; ZNVER1-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [100:?]
; ZNVER1-NEXT: pdepq %rsi, %rdi, %rax # sched: [100:?]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %1)
%3 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %a1)
Expand All @@ -181,10 +181,10 @@ define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) {
;
; ZNVER1-LABEL: test_pext_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pextl (%rdx), %edi, %ecx
; ZNVER1-NEXT: pextl %esi, %edi, %eax
; ZNVER1-NEXT: pextl (%rdx), %edi, %ecx # sched: [100:?]
; ZNVER1-NEXT: pextl %esi, %edi, %eax # sched: [100:?]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a2
%2 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %1)
%3 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %a1)
Expand All @@ -210,10 +210,10 @@ define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) {
;
; ZNVER1-LABEL: test_pext_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pextq (%rdx), %rdi, %rcx
; ZNVER1-NEXT: pextq %rsi, %rdi, %rax
; ZNVER1-NEXT: pextq (%rdx), %rdi, %rcx # sched: [100:?]
; ZNVER1-NEXT: pextq %rsi, %rdi, %rax # sched: [100:?]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %1)
%3 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %a1)
Expand Down Expand Up @@ -242,7 +242,7 @@ define i32 @test_rorx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1-NEXT: rorxl $5, (%rdx), %eax # sched: [5:0.50]
; ZNVER1-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a2
%2 = lshr i32 %a0, 5
%3 = shl i32 %a0, 27
Expand Down Expand Up @@ -274,7 +274,7 @@ define i64 @test_rorx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1-NEXT: rorxq $5, (%rdx), %rax # sched: [5:0.50]
; ZNVER1-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.25]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = lshr i64 %a0, 5
%3 = shl i64 %a0, 59
Expand Down Expand Up @@ -306,7 +306,7 @@ define i32 @test_sarx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1-NEXT: sarxl %esi, (%rdx), %eax # sched: [5:0.50]
; ZNVER1-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a2
%2 = ashr i32 %a0, %a1
%3 = ashr i32 %1, %a1
Expand Down Expand Up @@ -334,7 +334,7 @@ define i64 @test_sarx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1-NEXT: sarxq %rsi, (%rdx), %rax # sched: [5:0.50]
; ZNVER1-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.25]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = ashr i64 %a0, %a1
%3 = ashr i64 %1, %a1
Expand Down Expand Up @@ -362,7 +362,7 @@ define i32 @test_shlx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1-NEXT: shlxl %esi, (%rdx), %eax # sched: [5:0.50]
; ZNVER1-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a2
%2 = shl i32 %a0, %a1
%3 = shl i32 %1, %a1
Expand Down Expand Up @@ -390,7 +390,7 @@ define i64 @test_shlx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1-NEXT: shlxq %rsi, (%rdx), %rax # sched: [5:0.50]
; ZNVER1-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.25]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = shl i64 %a0, %a1
%3 = shl i64 %1, %a1
Expand Down Expand Up @@ -418,7 +418,7 @@ define i32 @test_shrx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1-NEXT: shrxl %esi, (%rdx), %eax # sched: [5:0.50]
; ZNVER1-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a2
%2 = lshr i32 %a0, %a1
%3 = lshr i32 %1, %a1
Expand Down Expand Up @@ -446,7 +446,7 @@ define i64 @test_shrx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1-NEXT: shrxq %rsi, (%rdx), %rax # sched: [5:0.50]
; ZNVER1-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.25]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a2
%2 = lshr i64 %a0, %a1
%3 = lshr i64 %1, %a1
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/X86/f16c-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
;
; ZNVER1-LABEL: test_vcvtph2ps_128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [100:?]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <8 x i16>, <8 x i16> *%a1
%2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
%3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
Expand Down Expand Up @@ -94,10 +94,10 @@ define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
;
; ZNVER1-LABEL: test_vcvtph2ps_256:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [100:?]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [100:?]
; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <8 x i16>, <8 x i16> *%a1
%2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
%3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
Expand Down Expand Up @@ -139,9 +139,9 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16>
;
; ZNVER1-LABEL: test_vcvtps2ph_128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [12:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
%2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0)
%3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Expand Down Expand Up @@ -187,10 +187,10 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16>
;
; ZNVER1-LABEL: test_vcvtps2ph_256:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [12:1.00]
; ZNVER1-NEXT: vzeroupper
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [100:?]
; ZNVER1-NEXT: vzeroupper # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
%2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0)
store <8 x i16> %2, <8 x i16> *%a2
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/X86/lea32-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ define i32 @test_lea_offset(i32) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -24(%rdi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = add nsw i32 %0, -24
ret i32 %2
}
Expand Down Expand Up @@ -109,7 +109,7 @@ define i32 @test_lea_offset_big(i32) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 1024(%rdi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = add nsw i32 %0, 1024
ret i32 %2
}
Expand Down Expand Up @@ -169,7 +169,7 @@ define i32 @test_lea_add(i32, i32) {
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = add nsw i32 %1, %0
ret i32 %3
}
Expand Down Expand Up @@ -231,7 +231,7 @@ define i32 @test_lea_add_offset(i32, i32) {
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = add i32 %0, 16
%4 = add i32 %3, %1
ret i32 %4
Expand Down Expand Up @@ -297,7 +297,7 @@ define i32 @test_lea_add_offset_big(i32, i32) {
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = add i32 %0, -4096
%4 = add i32 %3, %1
ret i32 %4
Expand Down Expand Up @@ -350,7 +350,7 @@ define i32 @test_lea_mul(i32) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = mul nsw i32 %0, 3
ret i32 %2
}
Expand Down Expand Up @@ -405,7 +405,7 @@ define i32 @test_lea_mul_offset(i32) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = mul nsw i32 %0, 3
%3 = add nsw i32 %2, -32
ret i32 %3
Expand Down Expand Up @@ -464,7 +464,7 @@ define i32 @test_lea_mul_offset_big(i32) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = mul nsw i32 %0, 9
%3 = add nsw i32 %2, 10000
ret i32 %3
Expand Down Expand Up @@ -524,7 +524,7 @@ define i32 @test_lea_add_scale(i32, i32) {
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = shl i32 %1, 1
%4 = add nsw i32 %3, %0
ret i32 %4
Expand Down Expand Up @@ -587,7 +587,7 @@ define i32 @test_lea_add_scale_offset(i32, i32) {
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = shl i32 %1, 2
%4 = add i32 %0, 96
%5 = add i32 %4, %3
Expand Down Expand Up @@ -654,7 +654,7 @@ define i32 @test_lea_add_scale_offset_big(i32, i32) {
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = shl i32 %1, 3
%4 = add i32 %0, -1200
%5 = add i32 %4, %3
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/X86/lea64-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ define i64 @test_lea_offset(i64) {
; ZNVER1-LABEL: test_lea_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -24(%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = add nsw i64 %0, -24
ret i64 %2
}
Expand Down Expand Up @@ -95,7 +95,7 @@ define i64 @test_lea_offset_big(i64) {
; ZNVER1-LABEL: test_lea_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = add nsw i64 %0, 1024
ret i64 %2
}
Expand Down Expand Up @@ -141,7 +141,7 @@ define i64 @test_lea_add(i64, i64) {
; ZNVER1-LABEL: test_lea_add:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = add nsw i64 %1, %0
ret i64 %3
}
Expand Down Expand Up @@ -189,7 +189,7 @@ define i64 @test_lea_add_offset(i64, i64) {
; ZNVER1-LABEL: test_lea_add_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = add i64 %0, 16
%4 = add i64 %3, %1
ret i64 %4
Expand Down Expand Up @@ -241,7 +241,7 @@ define i64 @test_lea_add_offset_big(i64, i64) {
; ZNVER1-LABEL: test_lea_add_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = add i64 %0, -4096
%4 = add i64 %3, %1
ret i64 %4
Expand Down Expand Up @@ -287,7 +287,7 @@ define i64 @test_lea_mul(i64) {
; ZNVER1-LABEL: test_lea_mul:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = mul nsw i64 %0, 3
ret i64 %2
}
Expand Down Expand Up @@ -335,7 +335,7 @@ define i64 @test_lea_mul_offset(i64) {
; ZNVER1-LABEL: test_lea_mul_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = mul nsw i64 %0, 3
%3 = add nsw i64 %2, -32
ret i64 %3
Expand Down Expand Up @@ -387,7 +387,7 @@ define i64 @test_lea_mul_offset_big(i64) {
; ZNVER1-LABEL: test_lea_mul_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%2 = mul nsw i64 %0, 9
%3 = add nsw i64 %2, 10000
ret i64 %3
Expand Down Expand Up @@ -433,7 +433,7 @@ define i64 @test_lea_add_scale(i64, i64) {
; ZNVER1-LABEL: test_lea_add_scale:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = shl i64 %1, 1
%4 = add nsw i64 %3, %0
ret i64 %4
Expand Down Expand Up @@ -482,7 +482,7 @@ define i64 @test_lea_add_scale_offset(i64, i64) {
; ZNVER1-LABEL: test_lea_add_scale_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = shl i64 %1, 2
%4 = add i64 %0, 96
%5 = add i64 %4, %3
Expand Down Expand Up @@ -535,7 +535,7 @@ define i64 @test_lea_add_scale_offset_big(i64, i64) {
; ZNVER1-LABEL: test_lea_add_scale_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%3 = shl i64 %1, 3
%4 = add i64 %0, -1200
%5 = add i64 %4, %3
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/lzcnt-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
;
; ZNVER1-LABEL: test_ctlz_i16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: lzcntw (%rsi), %cx
; ZNVER1-NEXT: lzcntw %di, %ax
; ZNVER1-NEXT: lzcntw (%rsi), %cx # sched: [6:0.50]
; ZNVER1-NEXT: lzcntw %di, %ax # sched: [2:0.25]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i16, i16 *%a1
%2 = tail call i16 @llvm.ctlz.i16( i16 %1, i1 false )
%3 = tail call i16 @llvm.ctlz.i16( i16 %a0, i1 false )
Expand Down Expand Up @@ -70,10 +70,10 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) {
;
; ZNVER1-LABEL: test_ctlz_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: lzcntl (%rsi), %ecx
; ZNVER1-NEXT: lzcntl %edi, %eax
; ZNVER1-NEXT: lzcntl (%rsi), %ecx # sched: [6:0.50]
; ZNVER1-NEXT: lzcntl %edi, %eax # sched: [2:0.25]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a1
%2 = tail call i32 @llvm.ctlz.i32( i32 %1, i1 false )
%3 = tail call i32 @llvm.ctlz.i32( i32 %a0, i1 false )
Expand Down Expand Up @@ -106,10 +106,10 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) {
;
; ZNVER1-LABEL: test_ctlz_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: lzcntq (%rsi), %rcx
; ZNVER1-NEXT: lzcntq %rdi, %rax
; ZNVER1-NEXT: lzcntq (%rsi), %rcx # sched: [6:0.50]
; ZNVER1-NEXT: lzcntq %rdi, %rax # sched: [2:0.25]
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a1
%2 = tail call i64 @llvm.ctlz.i64( i64 %1, i1 false )
%3 = tail call i64 @llvm.ctlz.i64( i64 %a0, i1 false )
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/movbe-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ define i16 @test_ctlz_i16(i16 *%a0, i16 %a1, i16 *%a2) {
; ZNVER1-LABEL: test_ctlz_i16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movbew (%rdi), %ax # sched: [5:0.50]
; ZNVER1-NEXT: movbew %si, (%rdx) # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: movbew %si, (%rdx) # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i16, i16 *%a0
%2 = tail call i16 @llvm.bswap.i16( i16 %1 )
%3 = tail call i16 @llvm.bswap.i16( i16 %a1 )
Expand Down Expand Up @@ -94,8 +94,8 @@ define i32 @test_ctlz_i32(i32 *%a0, i32 %a1, i32 *%a2) {
; ZNVER1-LABEL: test_ctlz_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movbel (%rdi), %eax # sched: [5:0.50]
; ZNVER1-NEXT: movbel %esi, (%rdx) # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: movbel %esi, (%rdx) # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a0
%2 = tail call i32 @llvm.bswap.i32( i32 %1 )
%3 = tail call i32 @llvm.bswap.i32( i32 %a1 )
Expand Down Expand Up @@ -142,8 +142,8 @@ define i64 @test_ctlz_i64(i64 *%a0, i64 %a1, i64 *%a2) {
; ZNVER1-LABEL: test_ctlz_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movbeq (%rdi), %rax # sched: [5:0.50]
; ZNVER1-NEXT: movbeq %rsi, (%rdx) # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: movbeq %rsi, (%rdx) # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a0
%2 = tail call i64 @llvm.bswap.i64( i64 %1 )
%3 = tail call i64 @llvm.bswap.i64( i64 %a1 )
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/popcnt-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
; ZNVER1-NEXT: popcntw %di, %ax # sched: [3:1.00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i16, i16 *%a1
%2 = tail call i16 @llvm.ctpop.i16( i16 %1 )
%3 = tail call i16 @llvm.ctpop.i16( i16 %a0 )
Expand Down Expand Up @@ -107,7 +107,7 @@ define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [10:1.00]
; ZNVER1-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i32, i32 *%a1
%2 = tail call i32 @llvm.ctpop.i32( i32 %1 )
%3 = tail call i32 @llvm.ctpop.i32( i32 %a0 )
Expand Down Expand Up @@ -157,7 +157,7 @@ define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [10:1.00]
; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load i64, i64 *%a1
%2 = tail call i64 @llvm.ctpop.i64( i64 %1 )
%3 = tail call i64 @llvm.ctpop.i64( i64 %a0 )
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/X86/sha-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ZNVER1-LABEL: test_sha1msg1:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: sha1msg1 %xmm1, %xmm0
; ZNVER1-NEXT: sha1msg1 (%rdi), %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [2:1.00]
; ZNVER1-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [9:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <4 x i32>, <4 x i32>* %a2
%2 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a0, <4 x i32> %a1)
%3 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %2, <4 x i32> %1)
Expand Down Expand Up @@ -60,9 +60,9 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ZNVER1-LABEL: test_sha1msg2:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: sha1msg2 %xmm1, %xmm0
; ZNVER1-NEXT: sha1msg2 (%rdi), %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [1:0.50]
; ZNVER1-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <4 x i32>, <4 x i32>* %a2
%2 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a0, <4 x i32> %a1)
%3 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %2, <4 x i32> %1)
Expand Down Expand Up @@ -91,9 +91,9 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ZNVER1-LABEL: test_sha1nexte:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: sha1nexte %xmm1, %xmm0
; ZNVER1-NEXT: sha1nexte (%rdi), %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: sha1nexte %xmm1, %xmm0 # sched: [1:1.00]
; ZNVER1-NEXT: sha1nexte (%rdi), %xmm0 # sched: [8:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <4 x i32>, <4 x i32>* %a2
%2 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a0, <4 x i32> %a1)
%3 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %2, <4 x i32> %1)
Expand Down Expand Up @@ -122,9 +122,9 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ZNVER1-LABEL: test_sha1rnds4:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: sha1rnds4 $3, %xmm1, %xmm0
; ZNVER1-NEXT: sha1rnds4 $3, (%rdi), %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [6:1.00]
; ZNVER1-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [13:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <4 x i32>, <4 x i32>* %a2
%2 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, i8 3)
%3 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %2, <4 x i32> %1, i8 3)
Expand Down Expand Up @@ -157,9 +157,9 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
;
; ZNVER1-LABEL: test_sha256msg1:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: sha256msg1 %xmm1, %xmm0
; ZNVER1-NEXT: sha256msg1 (%rdi), %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [2:1.00]
; ZNVER1-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [9:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <4 x i32>, <4 x i32>* %a2
%2 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a0, <4 x i32> %a1)
%3 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %2, <4 x i32> %1)
Expand Down Expand Up @@ -188,9 +188,9 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
;
; ZNVER1-LABEL: test_sha256msg2:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: sha256msg2 %xmm1, %xmm0
; ZNVER1-NEXT: sha256msg2 (%rdi), %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <4 x i32>, <4 x i32>* %a2
%2 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a0, <4 x i32> %a1)
%3 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %2, <4 x i32> %1)
Expand Down Expand Up @@ -230,10 +230,10 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2,
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.50]
; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50]
; ZNVER1-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3
; ZNVER1-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3
; ZNVER1-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00]
; ZNVER1-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
; ZNVER1-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <4 x i32>, <4 x i32>* %a3
%2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
%3 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %2, <4 x i32> %1, <4 x i32> %a2)
Expand Down
116 changes: 58 additions & 58 deletions llvm/test/CodeGen/X86/sse-schedule.ll

Large diffs are not rendered by default.

322 changes: 161 additions & 161 deletions llvm/test/CodeGen/X86/sse2-schedule.ll

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions llvm/test/CodeGen/X86/sse3-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
Expand Down Expand Up @@ -111,7 +111,7 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
Expand Down Expand Up @@ -164,9 +164,9 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; ZNVER1-LABEL: test_haddpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
Expand Down Expand Up @@ -219,9 +219,9 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; ZNVER1-LABEL: test_haddps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
Expand Down Expand Up @@ -274,9 +274,9 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; ZNVER1-LABEL: test_hsubpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
Expand Down Expand Up @@ -329,9 +329,9 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; ZNVER1-LABEL: test_hsubps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
Expand Down Expand Up @@ -380,7 +380,7 @@ define <16 x i8> @test_lddqu(i8* %a0) {
; ZNVER1-LABEL: test_lddqu:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
ret <16 x i8> %1
}
Expand Down Expand Up @@ -441,7 +441,7 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: monitor # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
ret void
}
Expand Down Expand Up @@ -503,7 +503,7 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
%2 = load <2 x double>, <2 x double> *%a1, align 16
%3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
Expand Down Expand Up @@ -567,7 +567,7 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
Expand Down Expand Up @@ -631,7 +631,7 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
Expand Down Expand Up @@ -694,7 +694,7 @@ define void @test_mwait(i32 %a0, i32 %a1) {
; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: mwait # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
ret void
}
Expand Down
126 changes: 63 additions & 63 deletions llvm/test/CodeGen/X86/sse41-schedule.ll

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/sse42-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
%2 = load i8, i8 *%a2
%3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %1, i8 %2)
Expand Down Expand Up @@ -112,7 +112,7 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32w (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
%2 = load i16, i16 *%a2
%3 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %1, i16 %2)
Expand Down Expand Up @@ -168,7 +168,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32l (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
%2 = load i32, i32 *%a2
%3 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %1, i32 %2)
Expand Down Expand Up @@ -224,7 +224,7 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
%2 = load i8, i8 *%a2
%3 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %1, i8 %2)
Expand Down Expand Up @@ -280,7 +280,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; ZNVER1-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00]
; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
%2 = load i64, i64 *%a2
%3 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %1, i64 %2)
Expand Down Expand Up @@ -378,7 +378,7 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ZNVER1-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [100:?]
; ZNVER1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; ZNVER1-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %2, i32 7, i8 7)
Expand Down Expand Up @@ -456,7 +456,7 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7)
Expand Down Expand Up @@ -526,7 +526,7 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ZNVER1-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [100:?]
; ZNVER1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; ZNVER1-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %2, i8 7)
Expand Down Expand Up @@ -576,7 +576,7 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %1, <16 x i8> %2, i8 7)
Expand Down Expand Up @@ -623,9 +623,9 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; ZNVER1-LABEL: test_pcmpgtq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = icmp sgt <2 x i64> %a0, %a1
%2 = sext <2 x i1> %1 to <2 x i64>
%3 = load <2 x i64>, <2 x i64>*%a2, align 16
Expand Down Expand Up @@ -675,7 +675,7 @@ define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = load <2 x i64>, <2 x i64> *%a2, align 16
%2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
%3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 0)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/sse4a-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) {
;
; ZNVER1-LABEL: test_extrq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: extrq %xmm1, %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: extrq %xmm1, %xmm0 # sched: [2:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %a0, <16 x i8> %a1)
ret <2 x i64> %1
}
Expand All @@ -36,8 +36,8 @@ define <2 x i64> @test_extrqi(<2 x i64> %a0) {
;
; ZNVER1-LABEL: test_extrqi:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: extrq $2, $3, %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: extrq $2, $3, %xmm0 # sched: [2:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a0, i8 3, i8 2)
ret <2 x i64> %1
}
Expand All @@ -56,8 +56,8 @@ define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) {
;
; ZNVER1-LABEL: test_insertq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: insertq %xmm1, %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: insertq %xmm1, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %a0, <2 x i64> %a1)
ret <2 x i64> %1
}
Expand All @@ -76,8 +76,8 @@ define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) {
;
; ZNVER1-LABEL: test_insertqi:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: insertq $6, $5, %xmm1, %xmm0
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 6)
ret <2 x i64> %1
}
Expand All @@ -96,8 +96,8 @@ define void @test_movntsd(i8* %p, <2 x double> %a) {
;
; ZNVER1-LABEL: test_movntsd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movntsd %xmm0, (%rdi) # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: movntsd %xmm0, (%rdi) # sched: [8:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a)
ret void
}
Expand All @@ -116,8 +116,8 @@ define void @test_movntss(i8* %p, <4 x float> %a) {
;
; ZNVER1-LABEL: test_movntss:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movntss %xmm0, (%rdi) # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: movntss %xmm0, (%rdi) # sched: [8:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a)
ret void
}
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/X86/ssse3-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; ZNVER1-NEXT: vpabsb (%rdi), %xmm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
%2 = load <16 x i8>, <16 x i8> *%a1, align 16
%3 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %2)
Expand Down Expand Up @@ -130,7 +130,7 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
; ZNVER1-NEXT: vpabsd (%rdi), %xmm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
%2 = load <4 x i32>, <4 x i32> *%a1, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %2)
Expand Down Expand Up @@ -195,7 +195,7 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
; ZNVER1-NEXT: vpabsw (%rdi), %xmm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
%2 = load <8 x i16>, <8 x i16> *%a1, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %2)
Expand Down Expand Up @@ -256,7 +256,7 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = shufflevector <8 x i16> %2, <8 x i16> %1, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
Expand Down Expand Up @@ -308,9 +308,9 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ZNVER1-LABEL: test_phaddd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
%2 = load <4 x i32>, <4 x i32> *%a2, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %1, <4 x i32> %2)
Expand Down Expand Up @@ -363,9 +363,9 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ZNVER1-LABEL: test_phaddsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -418,9 +418,9 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ZNVER1-LABEL: test_phaddw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -473,9 +473,9 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ZNVER1-LABEL: test_phsubd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
%2 = load <4 x i32>, <4 x i32> *%a2, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %1, <4 x i32> %2)
Expand Down Expand Up @@ -528,9 +528,9 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ZNVER1-LABEL: test_phsubsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -583,9 +583,9 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ZNVER1-LABEL: test_phsubw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [100:?]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -640,7 +640,7 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = bitcast <8 x i16> %1 to <16 x i8>
Expand Down Expand Up @@ -696,7 +696,7 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -751,7 +751,7 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> %2)
Expand Down Expand Up @@ -810,7 +810,7 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %1, <16 x i8> %2)
Expand Down Expand Up @@ -869,7 +869,7 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
%2 = load <4 x i32>, <4 x i32> *%a2, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %1, <4 x i32> %2)
Expand Down Expand Up @@ -928,7 +928,7 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %1, <8 x i16> %2)
Expand Down