508 changes: 254 additions & 254 deletions llvm/test/CodeGen/X86/avx-schedule.ll

Large diffs are not rendered by default.

116 changes: 58 additions & 58 deletions llvm/test/CodeGen/X86/avx2-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
;
; ZNVER1-LABEL: test_pabsb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [6:1.00]
; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
%2 = load <32 x i8>, <32 x i8> *%a1, align 32
%3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
Expand All @@ -35,10 +35,10 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
;
; ZNVER1-LABEL: test_pabsd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [6:1.00]
; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
%2 = load <8 x i32>, <8 x i32> *%a1, align 32
%3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
Expand All @@ -57,10 +57,10 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
;
; ZNVER1-LABEL: test_pabsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [6:1.00]
; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
%2 = load <16 x i16>, <16 x i16> *%a1, align 32
%3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
Expand All @@ -78,9 +78,9 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
;
; ZNVER1-LABEL: test_paddb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = add <32 x i8> %a0, %a1
%2 = load <32 x i8>, <32 x i8> *%a2, align 32
%3 = add <32 x i8> %1, %2
Expand All @@ -96,9 +96,9 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; ZNVER1-LABEL: test_paddd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = add <8 x i32> %a0, %a1
%2 = load <8 x i32>, <8 x i32> *%a2, align 32
%3 = add <8 x i32> %1, %2
Expand All @@ -114,9 +114,9 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; ZNVER1-LABEL: test_paddq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = add <4 x i64> %a0, %a1
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
%3 = add <4 x i64> %1, %2
Expand All @@ -132,9 +132,9 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
;
; ZNVER1-LABEL: test_paddw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = add <16 x i16> %a0, %a1
%2 = load <16 x i16>, <16 x i16> *%a2, align 32
%3 = add <16 x i16> %1, %2
Expand All @@ -151,10 +151,10 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; ZNVER1-LABEL: test_pand:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = and <4 x i64> %a0, %a1
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
%3 = and <4 x i64> %1, %2
Expand All @@ -172,10 +172,10 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; ZNVER1-LABEL: test_pandn:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [6:1.00]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
%2 = and <4 x i64> %a1, %1
%3 = load <4 x i64>, <4 x i64> *%a2, align 32
Expand All @@ -194,9 +194,9 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; ZNVER1-LABEL: test_pmulld:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = mul <8 x i32> %a0, %a1
%2 = load <8 x i32>, <8 x i32> *%a2, align 32
%3 = mul <8 x i32> %1, %2
Expand All @@ -212,9 +212,9 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
;
; ZNVER1-LABEL: test_pmullw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = mul <16 x i16> %a0, %a1
%2 = load <16 x i16>, <16 x i16> *%a2, align 32
%3 = mul <16 x i16> %1, %2
Expand All @@ -231,10 +231,10 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; ZNVER1-LABEL: test_por:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = or <4 x i64> %a0, %a1
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
%3 = or <4 x i64> %1, %2
Expand All @@ -251,9 +251,9 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
;
; ZNVER1-LABEL: test_psubb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = sub <32 x i8> %a0, %a1
%2 = load <32 x i8>, <32 x i8> *%a2, align 32
%3 = sub <32 x i8> %1, %2
Expand All @@ -269,9 +269,9 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; ZNVER1-LABEL: test_psubd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = sub <8 x i32> %a0, %a1
%2 = load <8 x i32>, <8 x i32> *%a2, align 32
%3 = sub <8 x i32> %1, %2
Expand All @@ -287,9 +287,9 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; ZNVER1-LABEL: test_psubq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = sub <4 x i64> %a0, %a1
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
%3 = sub <4 x i64> %1, %2
Expand All @@ -305,9 +305,9 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
;
; ZNVER1-LABEL: test_psubw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = sub <16 x i16> %a0, %a1
%2 = load <16 x i16>, <16 x i16> *%a2, align 32
%3 = sub <16 x i16> %1, %2
Expand All @@ -324,10 +324,10 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
;
; ZNVER1-LABEL: test_pxor:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = xor <4 x i64> %a0, %a1
%2 = load <4 x i64>, <4 x i64> *%a2, align 32
%3 = xor <4 x i64> %1, %2
Expand Down
103 changes: 102 additions & 1 deletion llvm/test/CodeGen/X86/bmi-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) {
; GENERIC-LABEL: test_andn_i16:
Expand Down Expand Up @@ -33,6 +33,15 @@ define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) {
; BTVER2-NEXT: addl %edi, %eax # sched: [1:0.50]
; BTVER2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_andn_i16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: andnl %esi, %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: notl %edi # sched: [1:0.25]
; ZNVER1-NEXT: andw (%rdx), %di # sched: [5:0.50]
; ZNVER1-NEXT: addl %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i16, i16 *%a2
%2 = xor i16 %a0, -1
%3 = and i16 %2, %a1
Expand Down Expand Up @@ -62,6 +71,13 @@ define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
; BTVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50]
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_andn_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50]
; ZNVER1-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a2
%2 = xor i32 %a0, -1
%3 = and i32 %2, %a1
Expand Down Expand Up @@ -91,6 +107,13 @@ define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
; BTVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50]
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_andn_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50]
; ZNVER1-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.25]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a2
%2 = xor i64 %a0, -1
%3 = and i64 %2, %a1
Expand Down Expand Up @@ -120,6 +143,13 @@ define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
; BTVER2-NEXT: bextrl %edi, %esi, %eax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_bextr_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: bextrl %edi, (%rdx), %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: bextrl %edi, %esi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a2
%2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %1, i32 %a0)
%3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a1, i32 %a0)
Expand Down Expand Up @@ -149,6 +179,13 @@ define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
; BTVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_bextr_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: bextrq %rdi, %rsi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a2
%2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %1, i64 %a0)
%3 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a1, i64 %a0)
Expand Down Expand Up @@ -178,6 +215,13 @@ define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
; BTVER2-NEXT: blsil %edi, %eax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blsi_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: blsil (%rsi), %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: blsil %edi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a1
%2 = sub i32 0, %1
%3 = sub i32 0, %a0
Expand Down Expand Up @@ -208,6 +252,13 @@ define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
; BTVER2-NEXT: blsiq %rdi, %rax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blsi_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: blsiq (%rsi), %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: blsiq %rdi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a1
%2 = sub i64 0, %1
%3 = sub i64 0, %a0
Expand Down Expand Up @@ -238,6 +289,13 @@ define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
; BTVER2-NEXT: blsmskl %edi, %eax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blsmsk_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: blsmskl (%rsi), %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: blsmskl %edi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a1
%2 = sub i32 %1, 1
%3 = sub i32 %a0, 1
Expand Down Expand Up @@ -268,6 +326,13 @@ define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
; BTVER2-NEXT: blsmskq %rdi, %rax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blsmsk_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: blsmskq (%rsi), %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: blsmskq %rdi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a1
%2 = sub i64 %1, 1
%3 = sub i64 %a0, 1
Expand Down Expand Up @@ -298,6 +363,13 @@ define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
; BTVER2-NEXT: blsrl %edi, %eax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blsr_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: blsrl (%rsi), %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: blsrl %edi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a1
%2 = sub i32 %1, 1
%3 = sub i32 %a0, 1
Expand Down Expand Up @@ -328,6 +400,13 @@ define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
; BTVER2-NEXT: blsrq %rdi, %rax # sched: [?:0.000000e+00]
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_blsr_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: blsrq (%rsi), %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: blsrq %rdi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a1
%2 = sub i64 %1, 1
%3 = sub i64 %a0, 1
Expand Down Expand Up @@ -361,6 +440,14 @@ define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_cttz_i16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: tzcntw (%rsi), %cx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: tzcntw %di, %ax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i16, i16 *%a1
%2 = tail call i16 @llvm.cttz.i16( i16 %1, i1 false )
%3 = tail call i16 @llvm.cttz.i16( i16 %a0, i1 false )
Expand Down Expand Up @@ -390,6 +477,13 @@ define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
; BTVER2-NEXT: tzcntl %edi, %eax # sched: [?:0.000000e+00]
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_cttz_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: tzcntl (%rsi), %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: tzcntl %edi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a1
%2 = tail call i32 @llvm.cttz.i32( i32 %1, i1 false )
%3 = tail call i32 @llvm.cttz.i32( i32 %a0, i1 false )
Expand Down Expand Up @@ -419,6 +513,13 @@ define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
; BTVER2-NEXT: tzcntq %rdi, %rax # sched: [?:0.000000e+00]
; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_cttz_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: tzcntq (%rsi), %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: tzcntq %rdi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a1
%2 = tail call i64 @llvm.cttz.i64( i64 %1, i1 false )
%3 = tail call i64 @llvm.cttz.i64( i64 %a0, i1 false )
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/bmi2-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: bzhil %edi, (%rdx), %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: bzhil %edi, %esi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a2
%2 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %1, i32 %a0)
%3 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a1, i32 %a0)
Expand Down Expand Up @@ -53,8 +53,8 @@ define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: bzhiq %rdi, %rsi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a2
%2 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %1, i64 %a0)
%3 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a1, i64 %a0)
Expand Down Expand Up @@ -82,8 +82,8 @@ define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pdepl (%rdx), %edi, %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: pdepl %esi, %edi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a2
%2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %1)
%3 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %a1)
Expand Down Expand Up @@ -111,8 +111,8 @@ define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: pdepq %rsi, %rdi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a2
%2 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %1)
%3 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %a1)
Expand Down Expand Up @@ -140,8 +140,8 @@ define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pextl (%rdx), %edi, %ecx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: pextl %esi, %edi, %eax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a2
%2 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %1)
%3 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %a1)
Expand Down Expand Up @@ -169,8 +169,8 @@ define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) {
; ZNVER1: # BB#0:
; ZNVER1-NEXT: pextq (%rdx), %rdi, %rcx # sched: [?:0.000000e+00]
; ZNVER1-NEXT: pextq %rsi, %rdi, %rax # sched: [?:0.000000e+00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a2
%2 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %1)
%3 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %a1)
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/X86/f16c-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
;
; ZNVER1-LABEL: test_vcvtph2ps_128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load <8 x i16>, <8 x i16> *%a1
%2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
%3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
Expand Down Expand Up @@ -65,10 +65,10 @@ define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
;
; ZNVER1-LABEL: test_vcvtph2ps_256:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [5:1.00]
; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load <8 x i16>, <8 x i16> *%a1
%2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
%3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
Expand Down Expand Up @@ -98,9 +98,9 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16>
;
; ZNVER1-LABEL: test_vcvtps2ph_128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [12:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
%2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0)
%3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Expand Down Expand Up @@ -132,10 +132,10 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16>
;
; ZNVER1-LABEL: test_vcvtps2ph_256:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [12:1.00]
; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
%2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0)
store <8 x i16> %2, <8 x i16> *%a2
Expand Down
74 changes: 73 additions & 1 deletion llvm/test/CodeGen/X86/lea32-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define i32 @test_lea_offset(i32) {
; GENERIC-LABEL: test_lea_offset:
Expand Down Expand Up @@ -52,6 +52,12 @@ define i32 @test_lea_offset(i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -24(%rdi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i32 %0, -24
ret i32 %2
}
Expand Down Expand Up @@ -98,6 +104,12 @@ define i32 @test_lea_offset_big(i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 1024(%rdi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i32 %0, 1024
ret i32 %2
}
Expand Down Expand Up @@ -151,6 +163,13 @@ define i32 @test_lea_add(i32, i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add nsw i32 %1, %0
ret i32 %3
}
Expand Down Expand Up @@ -205,6 +224,13 @@ define i32 @test_lea_add_offset(i32, i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i32 %0, 16
%4 = add i32 %3, %1
ret i32 %4
Expand Down Expand Up @@ -262,6 +288,13 @@ define i32 @test_lea_add_offset_big(i32, i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i32 %0, -4096
%4 = add i32 %3, %1
ret i32 %4
Expand Down Expand Up @@ -309,6 +342,12 @@ define i32 @test_lea_mul(i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i32 %0, 3
ret i32 %2
}
Expand Down Expand Up @@ -357,6 +396,12 @@ define i32 @test_lea_mul_offset(i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i32 %0, 3
%3 = add nsw i32 %2, -32
ret i32 %3
Expand Down Expand Up @@ -408,6 +453,12 @@ define i32 @test_lea_mul_offset_big(i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i32 %0, 9
%3 = add nsw i32 %2, 10000
ret i32 %3
Expand Down Expand Up @@ -461,6 +512,13 @@ define i32 @test_lea_add_scale(i32, i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i32 %1, 1
%4 = add nsw i32 %3, %0
ret i32 %4
Expand Down Expand Up @@ -516,6 +574,13 @@ define i32 @test_lea_add_scale_offset(i32, i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i32 %1, 2
%4 = add i32 %0, 96
%5 = add i32 %4, %3
Expand Down Expand Up @@ -574,6 +639,13 @@ define i32 @test_lea_add_scale_offset_big(i32, i32) {
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i32 %1, 3
%4 = add i32 %0, -1200
%5 = add i32 %4, %3
Expand Down
57 changes: 56 additions & 1 deletion llvm/test/CodeGen/X86/lea64-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define i64 @test_lea_offset(i64) {
; GENERIC-LABEL: test_lea_offset:
Expand Down Expand Up @@ -46,6 +46,11 @@ define i64 @test_lea_offset(i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -24(%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i64 %0, -24
ret i64 %2
}
Expand Down Expand Up @@ -86,6 +91,11 @@ define i64 @test_lea_offset_big(i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i64 %0, 1024
ret i64 %2
}
Expand Down Expand Up @@ -127,6 +137,11 @@ define i64 @test_lea_add(i64, i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add nsw i64 %1, %0
ret i64 %3
}
Expand Down Expand Up @@ -169,6 +184,11 @@ define i64 @test_lea_add_offset(i64, i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i64 %0, 16
%4 = add i64 %3, %1
ret i64 %4
Expand Down Expand Up @@ -214,6 +234,11 @@ define i64 @test_lea_add_offset_big(i64, i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i64 %0, -4096
%4 = add i64 %3, %1
ret i64 %4
Expand Down Expand Up @@ -255,6 +280,11 @@ define i64 @test_lea_mul(i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i64 %0, 3
ret i64 %2
}
Expand Down Expand Up @@ -297,6 +327,11 @@ define i64 @test_lea_mul_offset(i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i64 %0, 3
%3 = add nsw i64 %2, -32
ret i64 %3
Expand Down Expand Up @@ -342,6 +377,11 @@ define i64 @test_lea_mul_offset_big(i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i64 %0, 9
%3 = add nsw i64 %2, 10000
ret i64 %3
Expand Down Expand Up @@ -383,6 +423,11 @@ define i64 @test_lea_add_scale(i64, i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i64 %1, 1
%4 = add nsw i64 %3, %0
ret i64 %4
Expand Down Expand Up @@ -426,6 +471,11 @@ define i64 @test_lea_add_scale_offset(i64, i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i64 %1, 2
%4 = add i64 %0, 96
%5 = add i64 %4, %3
Expand Down Expand Up @@ -472,6 +522,11 @@ define i64 @test_lea_add_scale_offset_big(i64, i64) {
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i64 %1, 3
%4 = add i64 %0, -1200
%5 = add i64 %4, %3
Expand Down
24 changes: 23 additions & 1 deletion llvm/test/CodeGen/X86/lzcnt-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
; GENERIC-LABEL: test_ctlz_i16:
Expand All @@ -30,6 +30,14 @@ define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctlz_i16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: lzcntw (%rsi), %cx
; ZNVER1-NEXT: lzcntw %di, %ax
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i16, i16 *%a1
%2 = tail call i16 @llvm.ctlz.i16( i16 %1, i1 false )
%3 = tail call i16 @llvm.ctlz.i16( i16 %a0, i1 false )
Expand Down Expand Up @@ -59,6 +67,13 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) {
; BTVER2-NEXT: lzcntl %edi, %eax
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctlz_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: lzcntl (%rsi), %ecx
; ZNVER1-NEXT: lzcntl %edi, %eax
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a1
%2 = tail call i32 @llvm.ctlz.i32( i32 %1, i1 false )
%3 = tail call i32 @llvm.ctlz.i32( i32 %a0, i1 false )
Expand Down Expand Up @@ -88,6 +103,13 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) {
; BTVER2-NEXT: lzcntq %rdi, %rax
; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctlz_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: lzcntq (%rsi), %rcx
; ZNVER1-NEXT: lzcntq %rdi, %rax
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a1
%2 = tail call i64 @llvm.ctlz.i64( i64 %1, i1 false )
%3 = tail call i64 @llvm.ctlz.i64( i64 %a0, i1 false )
Expand Down
24 changes: 23 additions & 1 deletion llvm/test/CodeGen/X86/popcnt-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
; GENERIC-LABEL: test_ctpop_i16:
Expand Down Expand Up @@ -50,6 +50,14 @@ define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctpop_i16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: popcntw (%rsi), %cx # sched: [10:1.00]
; ZNVER1-NEXT: popcntw %di, %ax # sched: [3:1.00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i16, i16 *%a1
%2 = tail call i16 @llvm.ctpop.i16( i16 %1 )
%3 = tail call i16 @llvm.ctpop.i16( i16 %a0 )
Expand Down Expand Up @@ -93,6 +101,13 @@ define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
; BTVER2-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctpop_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [10:1.00]
; ZNVER1-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a1
%2 = tail call i32 @llvm.ctpop.i32( i32 %1 )
%3 = tail call i32 @llvm.ctpop.i32( i32 %a0 )
Expand Down Expand Up @@ -136,6 +151,13 @@ define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
; BTVER2-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctpop_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [10:1.00]
; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a1
%2 = tail call i64 @llvm.ctpop.i64( i64 %1 )
%3 = tail call i64 @llvm.ctpop.i64( i64 %a0 )
Expand Down
327 changes: 326 additions & 1 deletion llvm/test/CodeGen/X86/sse-schedule.ll

Large diffs are not rendered by default.

824 changes: 823 additions & 1 deletion llvm/test/CodeGen/X86/sse2-schedule.ll

Large diffs are not rendered by default.

64 changes: 63 additions & 1 deletion llvm/test/CodeGen/X86/sse3-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_addsubpd:
Expand Down Expand Up @@ -45,6 +45,12 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_addsubpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
Expand Down Expand Up @@ -88,6 +94,12 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_addsubps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
Expand Down Expand Up @@ -131,6 +143,12 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_haddpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
Expand Down Expand Up @@ -174,6 +192,12 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_haddps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
Expand Down Expand Up @@ -217,6 +241,12 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_hsubpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
Expand Down Expand Up @@ -260,6 +290,12 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_hsubps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
Expand Down Expand Up @@ -299,6 +335,11 @@ define <16 x i8> @test_lddqu(i8* %a0) {
; BTVER2: # BB#0:
; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lddqu:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
ret <16 x i8> %1
}
Expand Down Expand Up @@ -347,6 +388,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movddup:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
%2 = load <2 x double>, <2 x double> *%a1, align 16
%3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
Expand Down Expand Up @@ -397,6 +445,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movshdup:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
Expand Down Expand Up @@ -447,6 +502,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movsldup:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
Expand Down
311 changes: 310 additions & 1 deletion llvm/test/CodeGen/X86/sse41-schedule.ll

Large diffs are not rendered by default.

81 changes: 80 additions & 1 deletion llvm/test/CodeGen/X86/sse42-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; GENERIC-LABEL: crc32_32_8:
Expand Down Expand Up @@ -43,6 +43,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: crc32_32_8:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
%2 = load i8, i8 *%a2
%3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %1, i8 %2)
Expand Down Expand Up @@ -85,6 +92,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; BTVER2-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: crc32_32_16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32w (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
%2 = load i16, i16 *%a2
%3 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %1, i16 %2)
Expand Down Expand Up @@ -127,6 +141,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; BTVER2-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.17]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: crc32_32_32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32l (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
%2 = load i32, i32 *%a2
%3 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %1, i32 %2)
Expand Down Expand Up @@ -169,6 +190,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
; BTVER2-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: crc32_64_8:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; ZNVER1-NEXT: crc32b (%rdx), %edi # sched: [10:1.00]
; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
%2 = load i8, i8 *%a2
%3 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %1, i8 %2)
Expand Down Expand Up @@ -211,6 +239,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
; BTVER2-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.17]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: crc32_64_64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; ZNVER1-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00]
; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
%2 = load i64, i64 *%a2
%3 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %1, i64 %2)
Expand Down Expand Up @@ -283,6 +318,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; BTVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pcmpestri:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
; ZNVER1-NEXT: movl %ecx, %esi # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; ZNVER1-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %2, i32 7, i8 7)
Expand Down Expand Up @@ -341,6 +389,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: movl $7, %edx # sched: [1:0.17]
; BTVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [18:2.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pcmpestrm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7)
Expand Down Expand Up @@ -393,6 +451,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; BTVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pcmpistri:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: movl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; ZNVER1-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %2, i8 7)
Expand Down Expand Up @@ -431,6 +498,12 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [12:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pcmpistrm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [100:0.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %1, <16 x i8> %2, i8 7)
Expand Down Expand Up @@ -468,6 +541,12 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pcmpgtq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = icmp sgt <2 x i64> %a0, %a1
%2 = sext <2 x i1> %1 to <2 x i64>
%3 = load <2 x i64>, <2 x i64>*%a2, align 16
Expand Down
32 changes: 31 additions & 1 deletion llvm/test/CodeGen/X86/sse4a-schedule.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1

define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) {
; GENERIC-LABEL: test_extrq:
Expand All @@ -13,6 +13,11 @@ define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) {
; BTVER2: # BB#0:
; BTVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_extrq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: extrq %xmm1, %xmm0 # sched: [?:0.000000e+00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %a0, <16 x i8> %a1)
ret <2 x i64> %1
}
Expand All @@ -28,6 +33,11 @@ define <2 x i64> @test_extrqi(<2 x i64> %a0) {
; BTVER2: # BB#0:
; BTVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_extrqi:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: extrq $2, $3, %xmm0 # sched: [?:0.000000e+00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a0, i8 3, i8 2)
ret <2 x i64> %1
}
Expand All @@ -43,6 +53,11 @@ define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) {
; BTVER2: # BB#0:
; BTVER2-NEXT: insertq %xmm1, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_insertq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: insertq %xmm1, %xmm0 # sched: [?:0.000000e+00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %a0, <2 x i64> %a1)
ret <2 x i64> %1
}
Expand All @@ -58,6 +73,11 @@ define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) {
; BTVER2: # BB#0:
; BTVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_insertqi:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [?:0.000000e+00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 6)
ret <2 x i64> %1
}
Expand All @@ -73,6 +93,11 @@ define void @test_movntsd(i8* %p, <2 x double> %a) {
; BTVER2: # BB#0:
; BTVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movntsd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movntsd %xmm0, (%rdi) # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a)
ret void
}
Expand All @@ -88,6 +113,11 @@ define void @test_movntss(i8* %p, <4 x float> %a) {
; BTVER2: # BB#0:
; BTVER2-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movntss:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: movntss %xmm0, (%rdi) # sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a)
ret void
}
Expand Down
98 changes: 97 additions & 1 deletion llvm/test/CodeGen/X86/ssse3-schedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1

define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC-LABEL: test_pabsb:
Expand Down Expand Up @@ -52,6 +52,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; BTVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pabsb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsb (%rdi), %xmm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
%2 = load <16 x i8>, <16 x i8> *%a1, align 16
%3 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %2)
Expand Down Expand Up @@ -103,6 +110,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
; BTVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pabsd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsd (%rdi), %xmm1 # sched: [8:0.50]
; ZNVER1-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
%2 = load <4 x i32>, <4 x i32> *%a1, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %2)
Expand Down Expand Up @@ -147,6 +161,11 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
; BTVER2: # BB#0:
; BTVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pabsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
%2 = load <8 x i16>, <8 x i16> *%a1, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %2)
Expand Down Expand Up @@ -196,6 +215,12 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_palignr:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = shufflevector <8 x i16> %2, <8 x i16> %1, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
Expand Down Expand Up @@ -238,6 +263,12 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_phaddd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
%2 = load <4 x i32>, <4 x i32> *%a2, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %1, <4 x i32> %2)
Expand Down Expand Up @@ -289,6 +320,12 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_phaddsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -332,6 +369,12 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_phaddw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -375,6 +418,12 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_phsubd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
%2 = load <4 x i32>, <4 x i32> *%a2, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %1, <4 x i32> %2)
Expand Down Expand Up @@ -426,6 +475,12 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_phsubsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -469,6 +524,12 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_phsubw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -512,6 +573,12 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pmaddubsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = bitcast <8 x i16> %1 to <16 x i8>
Expand Down Expand Up @@ -550,6 +617,11 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pmulhrsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2)
Expand Down Expand Up @@ -593,6 +665,12 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_pshufb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> %2)
Expand Down Expand Up @@ -644,6 +722,12 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_psignb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
%2 = load <16 x i8>, <16 x i8> *%a2, align 16
%3 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %1, <16 x i8> %2)
Expand Down Expand Up @@ -695,6 +779,12 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_psignd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
%2 = load <4 x i32>, <4 x i32> *%a2, align 16
%3 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %1, <4 x i32> %2)
Expand Down Expand Up @@ -746,6 +836,12 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_psignw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
%2 = load <8 x i16>, <8 x i16> *%a2, align 16
%3 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %1, <8 x i16> %2)
Expand Down