67 changes: 35 additions & 32 deletions llvm/test/CodeGen/X86/masked_gather_scatter.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_64
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_32
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX
Expand Down Expand Up @@ -676,23 +675,25 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
;
; KNL_64-LABEL: test15:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_64: vpxor %ymm2, %ymm2, %ymm2
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
; KNL_64-NEXT: # kill
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test15:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_32: vpxor %ymm2, %ymm2, %ymm2
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
; KNL_32-NEXT: # kill
; KNL_32-NEXT: retl
;
; SKX-LABEL: test15:
Expand Down Expand Up @@ -723,7 +724,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
;
; KNL_64-LABEL: test16:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
Expand All @@ -737,7 +738,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
;
; KNL_32-LABEL: test16:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
Expand Down Expand Up @@ -777,7 +778,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
;
; KNL_64-LABEL: test17:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
Expand All @@ -787,7 +788,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
;
; KNL_32-LABEL: test17:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllvq .LCPI16_0, %zmm1, %zmm1
Expand Down Expand Up @@ -829,7 +830,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
;
; KNL_64-LABEL: test18:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
Expand All @@ -838,7 +839,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
;
; KNL_32-LABEL: test18:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
Expand Down Expand Up @@ -867,7 +868,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
;
; KNL_64-LABEL: test19:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
Expand All @@ -879,7 +880,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
;
; KNL_32-LABEL: test19:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
Expand Down Expand Up @@ -914,7 +915,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; KNL_64-LABEL: test20:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_64: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
Expand All @@ -925,7 +926,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; KNL_32-LABEL: test20:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_32: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
Expand All @@ -938,7 +939,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; SKX-LABEL: test20:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
Expand All @@ -963,7 +964,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; KNL_64-LABEL: test21:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
Expand All @@ -973,7 +974,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; KNL_32-LABEL: test21:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpsllvq .LCPI20_0, %zmm2, %zmm2
Expand All @@ -983,7 +984,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; SKX-LABEL: test21:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
Expand All @@ -993,7 +994,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; SKX_32-LABEL: test21:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX_32-NEXT: kshiftlb $6, %k0, %k0
; SKX_32-NEXT: kshiftrb $6, %k0, %k1
Expand All @@ -1012,7 +1013,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; KNL_64-LABEL: test22:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_64: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
Expand All @@ -1026,7 +1027,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; KNL_32-LABEL: test22:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
Expand Down Expand Up @@ -1074,7 +1075,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
;
; KNL_64-LABEL: test23:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
Expand All @@ -1084,7 +1085,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
;
; KNL_32-LABEL: test23:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllvq .LCPI22_0, %zmm1, %zmm1
Expand Down Expand Up @@ -1118,15 +1119,15 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test24:
; KNL_64: # BB#0:
; KNL_64-NEXT: movb $3, %al
; KNL_64: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovaps %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test24:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL_32-NEXT: vinserti32x4 $0, .LCPI23_0, %zmm1, %zmm1
; KNL_32-NEXT: vpsllvq .LCPI23_1, %zmm1, %zmm1
Expand Down Expand Up @@ -1159,7 +1160,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
;
; KNL_64-LABEL: test25:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
Expand All @@ -1169,7 +1170,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
;
; KNL_32-LABEL: test25:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllvq .LCPI24_0, %zmm1, %zmm1
Expand Down Expand Up @@ -1204,15 +1205,15 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
;
; KNL_64-LABEL: test26:
; KNL_64: # BB#0:
; KNL_64-NEXT: movb $3, %al
; KNL_64: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovaps %zmm1, %zmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test26:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
; KNL_32-NEXT: vinserti32x4 $0, .LCPI25_0, %zmm2, %zmm2
; KNL_32-NEXT: vpsllvq .LCPI25_1, %zmm2, %zmm2
Expand Down Expand Up @@ -1251,6 +1252,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
; KNL_64-NEXT: # kill
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test27:
Expand All @@ -1261,6 +1263,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_32-NEXT: movb $3, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
; KNL_32-NEXT: # kill
; KNL_32-NEXT: retl
;
; SKX-LABEL: test27:
Expand All @@ -1282,15 +1285,15 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; KNL_64-LABEL: test28:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test28:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
; KNL_32-NEXT: vinserti32x4 $0, .LCPI27_0, %zmm2, %zmm2
; KNL_32-NEXT: vpsllvq .LCPI27_1, %zmm2, %zmm2
Expand All @@ -1300,15 +1303,15 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; SKX-LABEL: test28:
; SKX: # BB#0:
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: movb $3, %al
; SKX-NEXT: kmovb %eax, %k1
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test28:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: movb $3, %al
; SKX_32-NEXT: kmovb %eax, %k1
; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
Expand Down
8,037 changes: 8,036 additions & 1 deletion llvm/test/CodeGen/X86/masked_memop.ll

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions llvm/test/CodeGen/X86/materialize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ entry:
; CHECK32-LABEL: one16:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: # kill
; CHECK32-NEXT: retl
}

Expand All @@ -135,6 +136,7 @@ entry:
; CHECK32-LABEL: minus_one16:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: # kill
; CHECK32-NEXT: retl
}

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/movmsk.ll
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ define void @float_call_signbit(double %n) {
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: movd %xmm0, %rdi
; CHECK-NEXT: shrq $63, %rdi
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<kill>
; CHECK-NEXT: jmp _float_call_signbit_callee ## TAILCALL
entry:
%t0 = bitcast double %n to i64
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/X86/or-lea.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
define i32 @or_shift1_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift1_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,2), %eax
; CHECK-NEXT: retq
Expand All @@ -22,6 +24,8 @@ define i32 @or_shift1_and1(i32 %x, i32 %y) {
define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift1_and1_swapped:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,2), %eax
; CHECK-NEXT: retq
Expand All @@ -35,6 +39,8 @@ define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
define i32 @or_shift2_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift2_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,4), %eax
; CHECK-NEXT: retq
Expand All @@ -48,6 +54,8 @@ define i32 @or_shift2_and1(i32 %x, i32 %y) {
define i32 @or_shift3_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift3_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,8), %eax
; CHECK-NEXT: retq
Expand All @@ -61,6 +69,8 @@ define i32 @or_shift3_and1(i32 %x, i32 %y) {
define i32 @or_shift3_and7(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift3_and7:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $7, %esi
; CHECK-NEXT: leal (%rsi,%rdi,8), %eax
; CHECK-NEXT: retq
Expand All @@ -76,6 +86,8 @@ define i32 @or_shift3_and7(i32 %x, i32 %y) {
define i32 @or_shift4_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift4_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: shll $4, %edi
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi), %eax
Expand All @@ -92,6 +104,7 @@ define i32 @or_shift4_and1(i32 %x, i32 %y) {
define i32 @or_shift3_and8(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift3_and8:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: leal (,%rdi,8), %eax
; CHECK-NEXT: andl $8, %esi
; CHECK-NEXT: orl %esi, %eax
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/X86/pmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind {
; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
entry:
%A = mul <16 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
Expand Down Expand Up @@ -206,6 +207,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind {
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
entry:
%A = mul <16 x i8> %i, %j
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/pr28173.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
define i64 @foo64(i1 zeroext %i, i32 %j) #0 {
; CHECK-LABEL: foo64:
; CHECK: # BB#0:
; CHECK-NEXT: # kill
; CHECK-NEXT: orq $-2, %rdi
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/X86/promote-i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ entry:
; CHECK-LABEL: foo:
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $21998, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retl
%0 = xor i16 %x, 21998
ret i16 %0
Expand All @@ -15,6 +16,7 @@ entry:
; CHECK-LABEL: bar:
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $54766, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retl
%0 = xor i16 %x, 54766
ret i16 %0
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ define i32 @test__blcfill_u32(i32 %a0) {
;
; X64-LABEL: test__blcfill_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: retq
Expand All @@ -47,6 +48,7 @@ define i32 @test__blci_u32(i32 %a0) {
;
; X64-LABEL: test__blci_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: xorl $-1, %eax
; X64-NEXT: orl %edi, %eax
Expand Down Expand Up @@ -91,6 +93,7 @@ define i32 @test__blcmsk_u32(i32 %a0) {
;
; X64-LABEL: test__blcmsk_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: xorl %edi, %eax
; X64-NEXT: retq
Expand All @@ -109,6 +112,7 @@ define i32 @test__blcs_u32(i32 %a0) {
;
; X64-LABEL: test__blcs_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: orl %edi, %eax
; X64-NEXT: retq
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/urem-i8-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ define i8 @foo(i8 %tmp325) {
; CHECK-NEXT: andl $28672, %eax # imm = 0x7000
; CHECK-NEXT: shrl $12, %eax
; CHECK-NEXT: movb $37, %dl
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: mulb %dl
; CHECK-NEXT: subb %al, %cl
; CHECK-NEXT: movl %ecx, %eax
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/X86/urem-power-of-two.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,18 @@ define i8 @and_pow_2(i8 %x, i8 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: andb $4, %sil
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def>
; CHECK-NEXT: divb %sil
; CHECK-NEXT: movzbl %ah, %eax # NOREX
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
;
%and = and i8 %y, 4
%urem = urem i8 %x, %and
ret i8 %urem
}

; A vector splat constant divisor should get the same treatment as a scalar.
; A vector splat constant divisor should get the same treatment as a scalar.

define <4 x i32> @vec_const_pow_2(<4 x i32> %x) {
; CHECK-LABEL: vec_const_pow_2:
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/vec_fp_to_int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
;
; AVX-LABEL: fptosi_4f64_to_2i32:
; AVX: # BB#0:
; AVX-NEXT: # kill
; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/vec_insert-5.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ define void @t1(i32 %a, x86_mmx* %P) nounwind {
;
; X64-LABEL: t1:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: shll $12, %edi
; X64-NEXT: movd %rdi, %xmm0
; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/vec_insert-mmx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ define x86_mmx @t0(i32 %A) nounwind {
;
; X64-LABEL: t0:
; X64: ## BB#0:
; X64-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: movd %rdi, %xmm0
; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/vec_int_to_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
; AVX-LABEL: sitofp_4i32_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX-NEXT: # kill
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%cvt = sitofp <4 x i32> %a to <4 x double>
Expand Down Expand Up @@ -98,13 +99,15 @@ define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: sitofp_8i16_to_2f64:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <8 x i16> %a to <8 x double>
Expand Down Expand Up @@ -144,6 +147,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand All @@ -152,6 +156,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <16 x i8> %a to <16 x double>
Expand Down Expand Up @@ -432,6 +437,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand All @@ -445,6 +451,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <4 x i32> %a to <4 x double>
Expand Down Expand Up @@ -482,13 +489,15 @@ define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_8i16_to_2f64:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <8 x i16> %a to <8 x double>
Expand Down Expand Up @@ -528,6 +537,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand All @@ -536,6 +546,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <16 x i8> %a to <16 x double>
Expand Down Expand Up @@ -890,13 +901,15 @@ define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: sitofp_8i16_to_4f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <8 x i16> %a to <8 x float>
Expand Down Expand Up @@ -939,6 +952,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand All @@ -947,6 +961,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <16 x i8> %a to <16 x float>
Expand Down Expand Up @@ -1384,13 +1399,15 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_8i16_to_4f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <8 x i16> %a to <8 x float>
Expand Down Expand Up @@ -1433,6 +1450,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
Expand All @@ -1441,6 +1459,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <16 x i8> %a to <16 x float>
Expand Down
45 changes: 26 additions & 19 deletions llvm/test/CodeGen/X86/vec_ss_load_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,20 @@ define i16 @test1(float %f) nounwind {
; CHECK-NEXT: minss LCPI0_2, %xmm0
; CHECK-NEXT: maxss %xmm1, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retl
%tmp = insertelement <4 x float> undef, float %f, i32 0 ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1]
ret i16 %tmp69
;
%tmp = insertelement <4 x float> undef, float %f, i32 0 ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1]
ret i16 %tmp69
}

define i16 @test2(float %f) nounwind {
Expand All @@ -37,15 +39,17 @@ define i16 @test2(float %f) nounwind {
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: maxss %xmm1, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retl
%tmp28 = fsub float %f, 1.000000e+00 ; <float> [#uses=1]
%tmp37 = fmul float %tmp28, 5.000000e-01 ; <float> [#uses=1]
%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0 ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1]
ret i16 %tmp69
;
%tmp28 = fsub float %f, 1.000000e+00 ; <float> [#uses=1]
%tmp37 = fmul float %tmp28, 5.000000e-01 ; <float> [#uses=1]
%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0 ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1]
ret i16 %tmp69
}

declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
Expand All @@ -68,6 +72,7 @@ define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: roundss $4, (%eax), %xmm0
; CHECK-NEXT: retl
;
%a = load float , float *%b
%B = insertelement <4 x float> undef, float %a, i32 0
%X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
Expand All @@ -86,6 +91,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
; CHECK-NEXT: roundss $4, %xmm1, %xmm0
; CHECK-NEXT: addl $28, %esp
; CHECK-NEXT: retl
;
%a = load float , float *%b
%B = insertelement <4 x float> undef, float %a, i32 0
%q = call <4 x float> @f()
Expand All @@ -101,6 +107,7 @@ define <2 x double> @test5() nounwind uwtable readnone noinline {
; CHECK-NEXT: movl $128, %eax
; CHECK-NEXT: cvtsi2sdl %eax, %xmm0
; CHECK-NEXT: retl
;
entry:
%0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
;
; AVX512F-LABEL: test_uitofp_v4i32_to_v4f32:
; AVX512F: # BB#0:
; AVX512F-NEXT: # kill
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_uitofp_v4i32_to_v4f32:
Expand Down Expand Up @@ -142,7 +144,9 @@ define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
;
; AVX512F-LABEL: test_uitofp_v8i32_to_v8f32:
; AVX512F: # BB#0:
; AVX512F-NEXT: # kill
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_uitofp_v8i32_to_v8f32:
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/X86/vector-bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; XOP-NEXT: vmovd %edi, %xmm0
; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
; XOP-NEXT: vpextrb $0, %xmm0, %eax
; XOP-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; XOP-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
Expand All @@ -88,6 +89,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
define i16 @test_bitreverse_i16(i16 %a) nounwind {
; SSE-LABEL: test_bitreverse_i16:
; SSE: # BB#0:
; SSE-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE-NEXT: movl %edi, %ecx
; SSE-NEXT: andl $32768, %ecx # imm = 0x8000
; SSE-NEXT: movl %edi, %eax
Expand Down Expand Up @@ -148,10 +150,12 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; SSE-NEXT: shrl $15, %ecx
; SSE-NEXT: orl %edi, %ecx
; SSE-NEXT: orl %ecx, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: test_bitreverse_i16:
; AVX: # BB#0:
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movl %edi, %ecx
; AVX-NEXT: andl $32768, %ecx # imm = 0x8000
; AVX-NEXT: movl %edi, %eax
Expand Down Expand Up @@ -212,13 +216,15 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; AVX-NEXT: shrl $15, %ecx
; AVX-NEXT: orl %edi, %ecx
; AVX-NEXT: orl %ecx, %eax
; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX-NEXT: retq
;
; XOP-LABEL: test_bitreverse_i16:
; XOP: # BB#0:
; XOP-NEXT: vmovd %edi, %xmm0
; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
; XOP-NEXT: vmovd %xmm0, %eax
; XOP-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; XOP-NEXT: retq
%b = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %b
Expand All @@ -227,6 +233,7 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
define i32 @test_bitreverse_i32(i32 %a) nounwind {
; SSE-LABEL: test_bitreverse_i32:
; SSE: # BB#0:
; SSE-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: shll $31, %eax
; SSE-NEXT: movl %edi, %ecx
Expand Down Expand Up @@ -353,6 +360,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
;
; AVX-LABEL: test_bitreverse_i32:
; AVX: # BB#0:
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: shll $31, %eax
; AVX-NEXT: movl %edi, %ecx
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/X86/vector-compare-results.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,15 @@ define <4 x i1> @test_cmp_v4f64(<4 x double> %a0, <4 x double> %a1) nounwind {
; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_cmp_v4f64:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = fcmp ogt <4 x double> %a0, %a1
ret <4 x i1> %1
Expand Down Expand Up @@ -200,13 +202,15 @@ define <8 x i1> @test_cmp_v8f32(<8 x float> %a0, <8 x float> %a1) nounwind {
; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_cmp_v8f32:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = fcmp ogt <8 x float> %a0, %a1
ret <8 x i1> %1
Expand Down Expand Up @@ -267,13 +271,15 @@ define <4 x i1> @test_cmp_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_cmp_v4i64:
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = icmp sgt <4 x i64> %a0, %a1
ret <4 x i1> %1
Expand Down Expand Up @@ -319,13 +325,15 @@ define <8 x i1> @test_cmp_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_cmp_v8i32:
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = icmp sgt <8 x i32> %a0, %a1
ret <8 x i1> %1
Expand Down Expand Up @@ -691,6 +699,7 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand Down Expand Up @@ -874,6 +883,7 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand Down
49 changes: 49 additions & 0 deletions llvm/test/CodeGen/X86/vector-half-conversions.ll

Large diffs are not rendered by default.

64 changes: 64 additions & 0 deletions llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions llvm/test/CodeGen/X86/vector-lzcnt-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv2i64:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv2i64:
Expand Down Expand Up @@ -221,7 +223,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv2i64u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv2i64u:
Expand Down Expand Up @@ -408,7 +412,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv4i32:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv4i32:
Expand Down Expand Up @@ -571,7 +577,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv4i32u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv4i32u:
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/X86/vector-lzcnt-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv4i64:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq

%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
Expand Down Expand Up @@ -138,7 +140,9 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv4i64u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq

%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
Expand Down Expand Up @@ -220,7 +224,9 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv8i32:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq

%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
Expand Down Expand Up @@ -293,7 +299,9 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv8i32u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq

%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/CodeGen/X86/vector-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
Expand Down Expand Up @@ -967,6 +968,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
Expand Down Expand Up @@ -1161,6 +1163,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_4i1_to_4i64:
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand All @@ -323,7 +324,10 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: var_shift_v8i16:
Expand Down Expand Up @@ -1218,6 +1222,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand All @@ -1230,8 +1235,10 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: constant_shift_v8i16:
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,10 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = ashr <16 x i16> %a, %b
ret <16 x i16> %shift
Expand Down Expand Up @@ -818,8 +821,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift
Expand Down
7 changes: 7 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand All @@ -292,7 +293,10 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: var_shift_v8i16:
Expand Down Expand Up @@ -952,6 +956,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand All @@ -964,8 +969,10 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: constant_shift_v8i16:
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = lshr <16 x i16> %a, %b
ret <16 x i16> %shift
Expand Down Expand Up @@ -673,8 +676,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-shl-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand All @@ -247,7 +248,10 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: var_shift_v8i16:
Expand Down Expand Up @@ -837,8 +841,10 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: constant_shift_v8i16:
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-shl-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,10 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = shl <16 x i16> %a, %b
ret <16 x i16> %shift
Expand Down Expand Up @@ -585,8 +588,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1223,12 +1223,14 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; AVX1-LABEL: insert_reg_and_zero_v4f64:
; AVX1: # BB#0:
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_reg_and_zero_v4f64:
; AVX2: # BB#0:
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX2-NEXT: retq
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
Expand All @@ -96,6 +97,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
Expand All @@ -121,6 +123,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: retq
Expand All @@ -142,6 +145,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: retq
Expand All @@ -165,6 +169,7 @@ define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Expand All @@ -175,6 +180,7 @@ define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
Expand Down
49 changes: 48 additions & 1 deletion llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq| FileCheck %s --check-prefix=VL_BW_DQ
Expand Down Expand Up @@ -200,6 +199,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
Expand All @@ -212,6 +212,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector < 8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 10, i32 2, i32 9, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
Expand All @@ -228,6 +229,7 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
Expand All @@ -238,6 +240,7 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
Expand All @@ -256,6 +259,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
Expand All @@ -268,6 +272,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector <8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
Expand All @@ -286,6 +291,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
Expand All @@ -298,6 +304,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector <8 x i1> zeroinitializer, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 10, i32 3, i32 7, i32 7, i32 0>
Expand All @@ -319,6 +326,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
Expand All @@ -333,6 +341,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 1>
Expand All @@ -353,6 +362,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
Expand All @@ -366,6 +376,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%c = shufflevector <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1> %a, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
%c1 = bitcast <8 x i1>%c to i8
Expand All @@ -382,6 +393,7 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
Expand All @@ -392,6 +404,7 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
; VL_BW_DQ-NEXT: vpslld $31, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovw %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i16 %a to <16 x i1>
%c = shufflevector < 16 x i1> %b, <16 x i1> undef, <16 x i32> zeroinitializer
Expand All @@ -400,6 +413,40 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
}

define i64 @shuf64i1_zero(i64 %a) {
; AVX512F-LABEL: shuf64i1_zero:
; AVX512F: # BB#0:
; AVX512F-NEXT: pushq %rbp
; AVX512F-NEXT: .Ltmp0:
; AVX512F-NEXT: .cfi_def_cfa_offset 16
; AVX512F-NEXT: .Ltmp1:
; AVX512F-NEXT: .cfi_offset %rbp, -16
; AVX512F-NEXT: movq %rsp, %rbp
; AVX512F-NEXT: .Ltmp2:
; AVX512F-NEXT: .cfi_def_cfa_register %rbp
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $96, %rsp
; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, (%rsp)
; AVX512F-NEXT: movl (%rsp), %ecx
; AVX512F-NEXT: movq %rcx, %rax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
; AVX512F-NEXT: movq %rbp, %rsp
; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf64i1_zero:
; VL_BW_DQ: # BB#0:
; VL_BW_DQ-NEXT: kmovq %rdi, %k0
Expand Down
78 changes: 78 additions & 0 deletions llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i16 %i1, i16 %i2, i16 %i3, i16 %i4, i16 %i5, i16 %i6, i16 %i7) nounwind {
; SSE2-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
; SSE2: # BB#0:
; SSE2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE2-NEXT: movswq %di, %rax
; SSE2-NEXT: movswq %si, %rsi
; SSE2-NEXT: movswq %dx, %rdx
Expand Down Expand Up @@ -234,6 +240,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
; SSSE3: # BB#0:
; SSSE3-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSSE3-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSSE3-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSSE3-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSSE3-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSSE3-NEXT: movswq %di, %rax
; SSSE3-NEXT: movswq %si, %rsi
; SSSE3-NEXT: movswq %dx, %rdx
Expand Down Expand Up @@ -271,6 +283,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
; SSE41: # BB#0:
; SSE41-NEXT: pushq %rbx
; SSE41-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE41-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE41-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE41-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE41-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE41-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE41-NEXT: movswq %di, %rax
; SSE41-NEXT: movswq %si, %rbx
; SSE41-NEXT: movswq %dx, %r11
Expand Down Expand Up @@ -298,6 +316,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; AVX: # BB#0:
; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %rbx
; AVX-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movswq %di, %r10
; AVX-NEXT: movswq %si, %r11
; AVX-NEXT: movswq %dx, %r14
Expand Down Expand Up @@ -343,6 +367,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %i0, i8 %i1, i8 %i2, i8 %i3, i8 %i4, i8 %i5, i8 %i6, i8 %i7, i8 %i8, i8 %i9, i8 %i10, i8 %i11, i8 %i12, i8 %i13, i8 %i14, i8 %i15) nounwind {
; SSE2-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
; SSE2: # BB#0:
; SSE2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movsbq {{[0-9]+}}(%rsp), %r10
; SSE2-NEXT: leaq -{{[0-9]+}}(%rsp), %r11
Expand Down Expand Up @@ -412,6 +442,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
;
; SSSE3-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
; SSSE3: # BB#0:
; SSSE3-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSSE3-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSSE3-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSSE3-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSSE3-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: movsbq {{[0-9]+}}(%rsp), %r10
; SSSE3-NEXT: leaq -{{[0-9]+}}(%rsp), %r11
Expand Down Expand Up @@ -487,6 +523,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; SSE41-NEXT: pushq %r13
; SSE41-NEXT: pushq %r12
; SSE41-NEXT: pushq %rbx
; SSE41-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE41-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE41-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE41-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE41-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE41-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE41-NEXT: movsbq %dil, %r15
; SSE41-NEXT: movsbq %sil, %r14
; SSE41-NEXT: movsbq %dl, %r11
Expand Down Expand Up @@ -548,6 +590,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; AVX-NEXT: pushq %r13
; AVX-NEXT: pushq %r12
; AVX-NEXT: pushq %rbx
; AVX-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movsbq %dil, %r10
; AVX-NEXT: movsbq %sil, %r11
; AVX-NEXT: movsbq %dl, %r14
Expand Down Expand Up @@ -1097,6 +1145,12 @@ define <4 x float> @var_shuffle_v4f32_v4f32_x0yx_i32(<4 x float> %x, <4 x float>
define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %y, i16 %i0, i16 %i1, i16 %i2, i16 %i3, i16 %i4, i16 %i5, i16 %i6, i16 %i7) nounwind {
; SSE2-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; SSE2: # BB#0:
; SSE2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE2-NEXT: movswq %di, %r10
; SSE2-NEXT: movswq %si, %rsi
; SSE2-NEXT: movswq %dx, %r11
Expand Down Expand Up @@ -1130,6 +1184,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; SSSE3: # BB#0:
; SSSE3-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSSE3-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSSE3-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSSE3-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSSE3-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSSE3-NEXT: movswq %di, %r10
; SSSE3-NEXT: movswq %si, %rsi
; SSSE3-NEXT: movswq %dx, %r11
Expand Down Expand Up @@ -1163,6 +1223,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; SSE41: # BB#0:
; SSE41-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE41-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE41-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE41-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE41-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE41-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE41-NEXT: movswq %di, %rax
; SSE41-NEXT: movswq %si, %rsi
; SSE41-NEXT: movswq %dx, %rdx
Expand All @@ -1184,6 +1250,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; AVX1-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; AVX1: # BB#0:
; AVX1-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX1-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX1-NEXT: movswq %di, %r10
; AVX1-NEXT: movswq %si, %r11
; AVX1-NEXT: movswq %dx, %rdx
Expand All @@ -1205,6 +1277,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; AVX2-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; AVX2: # BB#0:
; AVX2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX2-NEXT: movswq %di, %r10
; AVX2-NEXT: movswq %si, %r11
; AVX2-NEXT: movswq %dx, %rdx
Expand Down
74 changes: 74 additions & 0 deletions llvm/test/CodeGen/X86/vector-trunc-math.ll

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions llvm/test/CodeGen/X86/vector-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand Down Expand Up @@ -251,12 +252,15 @@ define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512BW-LABEL: trunc8i32_8i16:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
entry:
%0 = trunc <8 x i32> %a to <8 x i16>
Expand Down Expand Up @@ -314,6 +318,7 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
;
; AVX512BW-LABEL: trunc8i32_8i8:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vmovq %xmm0, (%rax)
Expand Down Expand Up @@ -434,6 +439,8 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
;
; AVX512BW-LABEL: trunc2x4i64_8i32:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
; AVX512BW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -534,6 +541,8 @@ define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
;
; AVX512BW-LABEL: trunc2x4i64_8i16:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/X86/widen_bitops-0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v3i8_as_i24:
Expand All @@ -155,6 +158,9 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
Expand All @@ -176,6 +182,9 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v3i8_as_i24:
Expand All @@ -190,6 +199,9 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
Expand All @@ -211,6 +223,9 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v3i8_as_i24:
Expand All @@ -225,6 +240,9 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ attributes #2 = { "no-frame-pointer-elim"="false" nounwind }
; CHECK-NEXT: je [[STRINGS_EQUAL]]
;
; CHECK: [[STRINGS_EQUAL]]
; CHECK-NEXT: popq
; CHECK: popq
define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 {
entry:
%cmp.i = icmp eq i8* %vk1, null
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/xaluo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: # kill
; KNL-NEXT: retq
%mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160)
%mul.overflow = extractvalue { i64, i1 } %mul, 1
Expand Down