88 changes: 44 additions & 44 deletions llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5478,19 +5478,19 @@ define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe9]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
Expand All @@ -5515,7 +5515,7 @@ define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; X86-LABEL: test_mask_cmp_q_512:
; X86: ## %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
; X86-NEXT: vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0]
Expand All @@ -5525,18 +5525,18 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
; X86-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
Expand All @@ -5551,18 +5551,18 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; X64-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
Expand Down Expand Up @@ -5597,19 +5597,19 @@ define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe9,0x06]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
Expand All @@ -5634,7 +5634,7 @@ define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; X86-LABEL: test_mask_ucmp_q_512:
; X86: ## %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
; X86-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01]
Expand All @@ -5644,18 +5644,18 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; X86-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
; X86-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
Expand All @@ -5670,18 +5670,18 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; X64-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; X64-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
Expand Down
32 changes: 18 additions & 14 deletions llvm/test/CodeGen/X86/avx512-mask-op.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2296,38 +2296,40 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL-LABEL: load_2i1:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: load_2i1:
; SKX: ## %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: retq
;
; AVX512BW-LABEL: load_2i1:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: kmovw (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: load_2i1:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovb (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; X86-LABEL: load_2i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovb (%eax), %k0
; X86-NEXT: vpmovm2q %k0, %xmm0
; X86-NEXT: vpmovm2w %k0, %xmm0
; X86-NEXT: retl
%b = load <2 x i1>, <2 x i1>* %a
%c = sext <2 x i1> %b to <2 x i16>
Expand All @@ -2339,20 +2341,21 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL: ## %bb.0:
; KNL-NEXT: kmovw (%rdi), %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: load_4i1:
; SKX: ## %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: retq
;
; AVX512BW-LABEL: load_4i1:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: kmovw (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Expand All @@ -2361,15 +2364,16 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) {
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovb (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; X86-LABEL: load_4i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovb (%eax), %k0
; X86-NEXT: vpmovm2d %k0, %xmm0
; X86-NEXT: vpmovm2w %k0, %xmm0
; X86-NEXT: retl
%b = load <4 x i1>, <4 x i1>* %a
%c = sext <4 x i1> %b to <4 x i16>
Expand Down
50 changes: 27 additions & 23 deletions llvm/test/CodeGen/X86/avx512-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
; ALL-LABEL: trunc_qb_512:
; ALL: ## %bb.0:
; ALL-NEXT: vpmovqw %zmm0, %xmm0
; ALL-NEXT: vpmovqb %zmm0, %xmm0
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x = trunc <8 x i64> %i to <8 x i8>
Expand All @@ -58,14 +58,13 @@ define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
; KNL-LABEL: trunc_qb_256:
; KNL: ## %bb.0:
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT: vpmovqb %zmm0, %xmm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_qb_256:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovqd %ymm0, %xmm0
; SKX-NEXT: vpmovqb %ymm0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x = trunc <4 x i64> %i to <4 x i8>
Expand All @@ -76,8 +75,7 @@ define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
; KNL-LABEL: trunc_qb_256_mem:
; KNL: ## %bb.0:
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; KNL-NEXT: vpmovqb %zmm0, %xmm0
; KNL-NEXT: vmovd %xmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
Expand All @@ -95,6 +93,7 @@ define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
; ALL-LABEL: trunc_qb_128:
; ALL: ## %bb.0:
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
%x = trunc <2 x i64> %i to <2 x i8>
ret <2 x i8> %x
Expand Down Expand Up @@ -141,14 +140,13 @@ define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
; KNL-LABEL: trunc_qw_256:
; KNL: ## %bb.0:
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_qw_256:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovqd %ymm0, %xmm0
; SKX-NEXT: vpmovqw %ymm0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x = trunc <4 x i64> %i to <4 x i16>
Expand All @@ -159,8 +157,7 @@ define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
; KNL-LABEL: trunc_qw_256_mem:
; KNL: ## %bb.0:
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; KNL-NEXT: vpmovqw %zmm0, %xmm0
; KNL-NEXT: vmovq %xmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
Expand All @@ -176,9 +173,16 @@ define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
}

define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
; ALL-LABEL: trunc_qw_128:
; ALL: ## %bb.0:
; ALL-NEXT: retq
; KNL-LABEL: trunc_qw_128:
; KNL: ## %bb.0:
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_qw_128:
; SKX: ## %bb.0:
; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
; SKX-NEXT: retq
%x = trunc <2 x i64> %i to <2 x i16>
ret <2 x i16> %x
}
Expand Down Expand Up @@ -260,6 +264,7 @@ define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
; ALL-LABEL: trunc_qd_128:
; ALL: ## %bb.0:
; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; ALL-NEXT: retq
%x = trunc <2 x i64> %i to <2 x i32>
ret <2 x i32> %x
Expand Down Expand Up @@ -306,14 +311,13 @@ define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
; KNL-LABEL: trunc_db_256:
; KNL: ## %bb.0:
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_db_256:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovdw %ymm0, %xmm0
; SKX-NEXT: vpmovdb %ymm0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x = trunc <8 x i32> %i to <8 x i8>
Expand All @@ -324,8 +328,7 @@ define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
; KNL-LABEL: trunc_db_256_mem:
; KNL: ## %bb.0:
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vmovq %xmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
Expand All @@ -343,6 +346,7 @@ define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
; ALL-LABEL: trunc_db_128:
; ALL: ## %bb.0:
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
%x = trunc <4 x i32> %i to <4 x i8>
ret <4 x i8> %x
Expand Down Expand Up @@ -513,6 +517,7 @@ define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
; ALL-LABEL: trunc_wb_128:
; ALL: ## %bb.0:
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
%x = trunc <8 x i16> %i to <8 x i8>
ret <8 x i8> %x
Expand Down Expand Up @@ -691,6 +696,7 @@ define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
; ALL-LABEL: usat_trunc_wb_128:
; ALL: ## %bb.0:
; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
; ALL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; ALL-NEXT: retq
%x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
Expand All @@ -716,16 +722,14 @@ define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
; KNL: ## %bb.0:
; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
; KNL-NEXT: vpminud %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: usat_trunc_db_256:
; SKX: ## %bb.0:
; SKX-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
; SKX-NEXT: vpmovdw %ymm0, %xmm0
; SKX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; SKX-NEXT: vpmovdb %ymm0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
Expand Down
36 changes: 11 additions & 25 deletions llvm/test/CodeGen/X86/avx512-vec-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -886,22 +886,14 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
; AVX512-LABEL: test44:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
; AVX512-NEXT: vpblendw $170, %xmm2, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xaa]
; AVX512-NEXT: ## xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; AVX512-NEXT: vpblendw $170, %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xaa]
; AVX512-NEXT: ## xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x76,0xc1]
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
; AVX512-NEXT: vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test44:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; SKX-NEXT: vpblendw $170, %xmm2, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xaa]
; SKX-NEXT: ## xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; SKX-NEXT: vpblendw $170, %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xaa]
; SKX-NEXT: ## xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x76,0xc1]
; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; SKX-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <4 x i16> %x, %y
%1 = sext <4 x i1> %mask to <4 x i32>
Expand All @@ -911,23 +903,17 @@ define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
; AVX512-LABEL: test45:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
; AVX512-NEXT: vpblendw $17, %xmm1, %xmm2, %xmm1 ## encoding: [0xc4,0xe3,0x69,0x0e,0xc9,0x11]
; AVX512-NEXT: ## xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; AVX512-NEXT: vpblendw $17, %xmm0, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x0e,0xc0,0x11]
; AVX512-NEXT: ## xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc1]
; AVX512-NEXT: vpsrlq $63, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
; AVX512-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
; AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI46_0-4, kind: reloc_riprel_4byte
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test45:
; SKX: ## %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; SKX-NEXT: vpblendw $17, %xmm1, %xmm2, %xmm1 ## encoding: [0xc4,0xe3,0x69,0x0e,0xc9,0x11]
; SKX-NEXT: ## xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; SKX-NEXT: vpblendw $17, %xmm0, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x0e,0xc0,0x11]
; SKX-NEXT: ## xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc1]
; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i16> %x, %y
Expand Down
18 changes: 7 additions & 11 deletions llvm/test/CodeGen/X86/avx512-vec3-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,15 @@ define <3 x i8 > @foo(<3 x i8>%x, <3 x i8>%a, <3 x i8>%b) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovd %edi, %xmm0
; CHECK-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
; CHECK-NEXT: vpslld $24, %xmm0, %xmm0
; CHECK-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0
; CHECK-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm1
; CHECK-NEXT: vpinsrd $1, %r8d, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
; CHECK-NEXT: vpinsrd $2, %r9d, %xmm1, %xmm1
; CHECK-NEXT: vpslld $24, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $24, %xmm1, %xmm1
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1
; CHECK-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpextrb $0, %xmm0, %eax
; CHECK-NEXT: vpextrb $4, %xmm0, %edx
; CHECK-NEXT: vpextrb $8, %xmm0, %ecx
; CHECK-NEXT: vpextrb $1, %xmm0, %edx
; CHECK-NEXT: vpextrb $2, %xmm0, %ecx
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: # kill: def $dl killed $dl killed $edx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
Expand Down
88 changes: 44 additions & 44 deletions llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3326,6 +3326,8 @@ define <2 x i64> @test_mm256_cvtepi64_epi8(<4 x i64> %__A) {
; CHECK-LABEL: test_mm256_cvtepi64_epi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpmovqb %ymm0, %xmm0
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
Expand All @@ -3339,6 +3341,7 @@ define <2 x i64> @test_mm256_cvtepi64_epi16(<4 x i64> %__A) {
; CHECK-LABEL: test_mm256_cvtepi64_epi16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpmovqw %ymm0, %xmm0
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
Expand All @@ -3352,6 +3355,7 @@ define <2 x i64> @test_mm256_cvtepi32_epi8(<4 x i64> %__A) {
; CHECK-LABEL: test_mm256_cvtepi32_epi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpmovdb %ymm0, %xmm0
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
Expand Down
340 changes: 170 additions & 170 deletions llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll

Large diffs are not rendered by default.

653 changes: 167 additions & 486 deletions llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll

Large diffs are not rendered by default.

339 changes: 84 additions & 255 deletions llvm/test/CodeGen/X86/bitcast-setcc-128.ll

Large diffs are not rendered by default.

155 changes: 52 additions & 103 deletions llvm/test/CodeGen/X86/bitcast-vector-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -151,27 +151,14 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
}

define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
; SSE2-LABEL: bitcast_v16i8_to_v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-NEXT: retq
;
; SSSE3-LABEL: bitcast_v16i8_to_v2i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,u,u,u,u,u,u,u,1,u,u,u,u,u,u,u]
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSSE3-NEXT: retq
; SSE2-SSSE3-LABEL: bitcast_v16i8_to_v2i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: retq
;
; AVX12-LABEL: bitcast_v16i8_to_v2i8:
; AVX12: # %bb.0:
Expand All @@ -187,7 +174,7 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovb2m %xmm0, %k0
; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
; AVX512-NEXT: vpextrb $1, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
Expand Down Expand Up @@ -318,29 +305,15 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
}

define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
; SSE2-LABEL: bitcast_v16i16_to_v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: packsswb %xmm1, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-NEXT: retq
;
; SSSE3-LABEL: bitcast_v16i16_to_v2i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: packsswb %xmm1, %xmm0
; SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,u,u,u,u,u,u,u,1,u,u,u,u,u,u,u]
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSSE3-NEXT: retq
; SSE2-SSSE3-LABEL: bitcast_v16i16_to_v2i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: retq
;
; AVX1-LABEL: bitcast_v16i16_to_v2i8:
; AVX1: # %bb.0:
Expand Down Expand Up @@ -374,7 +347,7 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovw2m %ymm0, %k0
; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
; AVX512-NEXT: vpextrb $1, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
Expand All @@ -392,12 +365,10 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
; SSE2-SSSE3-NEXT: shll $16, %ecx
; SSE2-SSSE3-NEXT: orl %eax, %ecx
; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
; SSE2-SSSE3-NEXT: shll $16, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
; SSE2-SSSE3-NEXT: addl %ecx, %eax
; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Expand All @@ -411,7 +382,6 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
; AVX1-NEXT: shll $16, %ecx
; AVX1-NEXT: orl %eax, %ecx
; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
; AVX1-NEXT: vpextrw $1, %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Expand All @@ -420,9 +390,8 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
;
; AVX2-LABEL: bitcast_v32i8_to_v2i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovmskb %ymm0, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vpextrw $1, %xmm0, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Expand All @@ -437,8 +406,8 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
; AVX512-NEXT: subq $32, %rsp
; AVX512-NEXT: vpmovb2m %ymm0, %k0
; AVX512-NEXT: kmovd %k0, (%rsp)
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
; AVX512-NEXT: vmovdqa (%rsp), %xmm0
; AVX512-NEXT: vmovd %xmm0, %ecx
; AVX512-NEXT: vpextrw $1, %xmm0, %eax
; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
Expand Down Expand Up @@ -579,33 +548,17 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
}

define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
; SSE2-LABEL: bitcast_v16i32_to_v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: packssdw %xmm3, %xmm2
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: packsswb %xmm2, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-NEXT: retq
;
; SSSE3-LABEL: bitcast_v16i32_to_v2i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: packssdw %xmm3, %xmm2
; SSSE3-NEXT: packssdw %xmm1, %xmm0
; SSSE3-NEXT: packsswb %xmm2, %xmm0
; SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,u,u,u,u,u,u,u,1,u,u,u,u,u,u,u]
; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSSE3-NEXT: retq
; SSE2-SSSE3-LABEL: bitcast_v16i32_to_v2i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: retq
;
; AVX1-LABEL: bitcast_v16i32_to_v2i8:
; AVX1: # %bb.0:
Expand Down Expand Up @@ -646,7 +599,7 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %ecx
; AVX512-NEXT: vpextrb $1, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
Expand All @@ -665,13 +618,11 @@ define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
; SSE2-SSSE3-NEXT: shll $16, %ecx
; SSE2-SSSE3-NEXT: orl %eax, %ecx
; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
; SSE2-SSSE3-NEXT: pextrw $0, %xmm0, %ecx
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
; SSE2-SSSE3-NEXT: shll $16, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
; SSE2-SSSE3-NEXT: addl %ecx, %eax
; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
Expand All @@ -688,7 +639,6 @@ define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; AVX1-NEXT: shll $16, %ecx
; AVX1-NEXT: orl %eax, %ecx
; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: vpextrw $0, %xmm0, %ecx
; AVX1-NEXT: vpextrw $1, %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Expand All @@ -699,9 +649,8 @@ define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vpmovmskb %ymm0, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpextrw $0, %xmm0, %ecx
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vpextrw $1, %xmm0, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Expand All @@ -716,8 +665,8 @@ define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; AVX512-NEXT: subq $32, %rsp
; AVX512-NEXT: vpmovw2m %zmm0, %k0
; AVX512-NEXT: kmovd %k0, (%rsp)
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
; AVX512-NEXT: vmovdqa (%rsp), %xmm0
; AVX512-NEXT: vmovd %xmm0, %ecx
; AVX512-NEXT: vpextrw $1, %xmm0, %eax
; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
Expand Down Expand Up @@ -984,9 +933,9 @@ define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
; SSE2-SSSE3-NEXT: orl %ecx, %edx
; SSE2-SSSE3-NEXT: orl %eax, %edx
; SSE2-SSSE3-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-SSSE3-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0
; SSE2-SSSE3-NEXT: movd %xmm0, %ecx
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,0,1]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-SSSE3-NEXT: movd %xmm0, %eax
; SSE2-SSSE3-NEXT: addl %ecx, %eax
; SSE2-SSSE3-NEXT: retq
Expand Down Expand Up @@ -1246,7 +1195,7 @@ define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
; AVX1-NEXT: orl %ecx, %edx
; AVX1-NEXT: orl %eax, %edx
; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; AVX1-NEXT: vmovd %xmm0, %ecx
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
Expand Down Expand Up @@ -1506,7 +1455,7 @@ define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
; AVX2-NEXT: orl %ecx, %edx
; AVX2-NEXT: orl %eax, %edx
; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; AVX2-NEXT: vmovd %xmm0, %ecx
; AVX2-NEXT: vpextrd $1, %xmm0, %eax
; AVX2-NEXT: addl %ecx, %eax
Expand All @@ -1517,7 +1466,7 @@ define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovb2m %zmm0, %k0
; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; AVX512-NEXT: vmovd %xmm0, %ecx
; AVX512-NEXT: vpextrd $1, %xmm0, %eax
; AVX512-NEXT: addl %ecx, %eax
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/X86/bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,11 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: movdqa %xmm0, %xmm2
; X64-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; X64-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
; X64-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
; X64-NEXT: packuswb %xmm2, %xmm0
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psllw $4, %xmm1
Expand All @@ -81,7 +79,6 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
; X64-NEXT: pand {{.*}}(%rip), %xmm0
; X64-NEXT: psrlw $1, %xmm0
; X64-NEXT: por %xmm1, %xmm0
; X64-NEXT: psrlq $48, %xmm0
; X64-NEXT: retq
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
ret <2 x i16> %b
Expand Down Expand Up @@ -410,7 +407,7 @@ define <2 x i16> @fold_v2i16() {
;
; X64-LABEL: fold_v2i16:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [61440,240]
; X64-NEXT: movaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u>
; X64-NEXT: retq
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>)
ret <2 x i16> %b
Expand Down
13 changes: 6 additions & 7 deletions llvm/test/CodeGen/X86/bswap-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -291,23 +291,22 @@ define <4 x i16> @test7(<4 x i16> %v) {
; CHECK-NOSSSE3-NEXT: pxor %xmm1, %xmm1
; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm2
; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
; CHECK-NOSSSE3-NEXT: packuswb %xmm2, %xmm0
; CHECK-NOSSSE3-NEXT: psrld $16, %xmm0
; CHECK-NOSSSE3-NEXT: retq
;
; CHECK-SSSE3-LABEL: test7:
; CHECK-SSSE3: # %bb.0: # %entry
; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0],zero,zero,xmm0[5,4],zero,zero,xmm0[9,8],zero,zero,xmm0[13,12],zero,zero
; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
; CHECK-SSSE3-NEXT: retq
;
; CHECK-AVX-LABEL: test7:
; CHECK-AVX: # %bb.0: # %entry
; CHECK-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0],zero,zero,xmm0[5,4],zero,zero,xmm0[9,8],zero,zero,xmm0[13,12],zero,zero
; CHECK-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
; CHECK-AVX-NEXT: retq
;
; CHECK-WIDE-AVX-LABEL: test7:
Expand Down
25 changes: 16 additions & 9 deletions llvm/test/CodeGen/X86/buildvec-insertvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,29 @@ define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
; SSE2-LABEL: foo:
; SSE2: # %bb.0:
; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
; SSE2-NEXT: movl $255, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: shll $8, %ecx
; SSE2-NEXT: orl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl $65280, %eax # imm = 0xFF00
; SSE2-NEXT: orl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: pinsrw $1, %eax, %xmm0
; SSE2-NEXT: movd %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSE41-LABEL: foo:
; SSE41: # %bb.0:
; SSE41-NEXT: cvttps2dq %xmm0, %xmm0
; SSE41-NEXT: pextrb $8, %xmm0, %eax
; SSE41-NEXT: pextrb $4, %xmm0, %ecx
; SSE41-NEXT: pextrb $0, %xmm0, %edx
; SSE41-NEXT: movd %edx, %xmm0
; SSE41-NEXT: pinsrb $1, %ecx, %xmm0
; SSE41-NEXT: pinsrb $2, %eax, %xmm0
; SSE41-NEXT: movl $255, %eax
; SSE41-NEXT: pinsrd $3, %eax, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: pinsrb $3, %eax, %xmm0
; SSE41-NEXT: movd %xmm0, (%rdi)
; SSE41-NEXT: retq
%t0 = fptoui <3 x float> %in to <3 x i8>
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/combine-64bit-vec-binop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ define double @test2_mul(double %A, double %B) {
define double @test3_mul(double %A, double %B) {
; SSE41-LABEL: test3_mul:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; SSE41-NEXT: pmullw %xmm2, %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: pmullw %xmm1, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; SSE41-NEXT: retq
%1 = bitcast double %A to <8 x i8>
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/combine-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ define <4 x float> @test25(<4 x float> %a0) {
define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: test_crash:
; CHECK: # %bb.0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/complex-fastmath.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ define <2 x float> @complex_square_f32(<2 x float>) #0 {
; FMA-NEXT: vaddss %xmm0, %xmm0, %xmm2
; FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2
; FMA-NEXT: vmulss %xmm1, %xmm1, %xmm1
; FMA-NEXT: vfmsub231ss %xmm0, %xmm0, %xmm1
; FMA-NEXT: vfmsub231ss {{.*#+}} xmm1 = (xmm0 * xmm0) - xmm1
; FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm2[0],xmm1[2,3]
; FMA-NEXT: retq
%2 = extractelement <2 x float> %0, i32 0
Expand Down Expand Up @@ -85,7 +85,7 @@ define <2 x double> @complex_square_f64(<2 x double>) #0 {
; FMA-NEXT: vaddsd %xmm0, %xmm0, %xmm2
; FMA-NEXT: vmulsd %xmm2, %xmm1, %xmm2
; FMA-NEXT: vmulsd %xmm1, %xmm1, %xmm1
; FMA-NEXT: vfmsub231sd %xmm0, %xmm0, %xmm1
; FMA-NEXT: vfmsub231sd {{.*#+}} xmm1 = (xmm0 * xmm0) - xmm1
; FMA-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0]
; FMA-NEXT: retq
%2 = extractelement <2 x double> %0, i32 0
Expand Down Expand Up @@ -137,9 +137,9 @@ define <2 x float> @complex_mul_f32(<2 x float>, <2 x float>) #0 {
; FMA-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; FMA-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; FMA-NEXT: vmulss %xmm2, %xmm1, %xmm4
; FMA-NEXT: vfmadd231ss %xmm0, %xmm3, %xmm4
; FMA-NEXT: vfmadd231ss {{.*#+}} xmm4 = (xmm3 * xmm0) + xmm4
; FMA-NEXT: vmulss %xmm2, %xmm3, %xmm2
; FMA-NEXT: vfmsub231ss %xmm0, %xmm1, %xmm2
; FMA-NEXT: vfmsub231ss {{.*#+}} xmm2 = (xmm1 * xmm0) - xmm2
; FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm4[0],xmm2[2,3]
; FMA-NEXT: retq
%3 = extractelement <2 x float> %0, i32 0
Expand Down Expand Up @@ -192,9 +192,9 @@ define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 {
; FMA-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; FMA-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
; FMA-NEXT: vmulsd %xmm2, %xmm1, %xmm4
; FMA-NEXT: vfmadd231sd %xmm0, %xmm3, %xmm4
; FMA-NEXT: vfmadd231sd {{.*#+}} xmm4 = (xmm3 * xmm0) + xmm4
; FMA-NEXT: vmulsd %xmm2, %xmm3, %xmm2
; FMA-NEXT: vfmsub231sd %xmm0, %xmm1, %xmm2
; FMA-NEXT: vfmsub231sd {{.*#+}} xmm2 = (xmm1 * xmm0) - xmm2
; FMA-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm4[0]
; FMA-NEXT: retq
%3 = extractelement <2 x double> %0, i32 0
Expand Down
44 changes: 20 additions & 24 deletions llvm/test/CodeGen/X86/cvtv2f32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,25 +42,23 @@ define <2 x float> @uitofp_2i32_cvt_buildvector(i32 %x, i32 %y, <2 x float> %v)
define <2 x float> @uitofp_2i32_buildvector_cvt(i32 %x, i32 %y, <2 x float> %v) {
; X32-LABEL: uitofp_2i32_buildvector_cvt:
; X32: # %bb.0:
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; X32-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; X32-NEXT: orpd %xmm1, %xmm2
; X32-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; X32-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; X32-NEXT: por %xmm1, %xmm2
; X32-NEXT: subpd %xmm1, %xmm2
; X32-NEXT: cvtpd2ps %xmm2, %xmm1
; X32-NEXT: mulps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: uitofp_2i32_buildvector_cvt:
; X64: # %bb.0:
; X64-NEXT: movd %esi, %xmm1
; X64-NEXT: movd %edi, %xmm2
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; X64-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; X64-NEXT: por %xmm1, %xmm2
; X64-NEXT: subpd %xmm1, %xmm2
; X64-NEXT: cvtpd2ps %xmm2, %xmm1
; X64-NEXT: movd %edi, %xmm1
; X64-NEXT: pinsrd $1, %esi, %xmm1
; X64-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X64-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
; X64-NEXT: por %xmm2, %xmm1
; X64-NEXT: subpd %xmm2, %xmm1
; X64-NEXT: cvtpd2ps %xmm1, %xmm1
; X64-NEXT: mulps %xmm1, %xmm0
; X64-NEXT: retq
%t1 = insertelement <2 x i32> undef, i32 %x, i32 0
Expand All @@ -73,23 +71,21 @@ define <2 x float> @uitofp_2i32_buildvector_cvt(i32 %x, i32 %y, <2 x float> %v)
define <2 x float> @uitofp_2i32_legalized(<2 x i32> %in, <2 x float> %v) {
; X32-LABEL: uitofp_2i32_legalized:
; X32: # %bb.0:
; X32-NEXT: xorps %xmm2, %xmm2
; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; X32-NEXT: movaps {{.*#+}} xmm0 = [4.503599627370496E+15,4.503599627370496E+15]
; X32-NEXT: orps %xmm0, %xmm2
; X32-NEXT: subpd %xmm0, %xmm2
; X32-NEXT: cvtpd2ps %xmm2, %xmm0
; X32-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X32-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
; X32-NEXT: por %xmm2, %xmm0
; X32-NEXT: subpd %xmm2, %xmm0
; X32-NEXT: cvtpd2ps %xmm0, %xmm0
; X32-NEXT: mulps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: uitofp_2i32_legalized:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm2, %xmm2
; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; X64-NEXT: movaps {{.*#+}} xmm0 = [4.503599627370496E+15,4.503599627370496E+15]
; X64-NEXT: orps %xmm0, %xmm2
; X64-NEXT: subpd %xmm0, %xmm2
; X64-NEXT: cvtpd2ps %xmm2, %xmm0
; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
; X64-NEXT: por %xmm2, %xmm0
; X64-NEXT: subpd %xmm2, %xmm0
; X64-NEXT: cvtpd2ps %xmm0, %xmm0
; X64-NEXT: mulps %xmm1, %xmm0
; X64-NEXT: retq
%t1 = uitofp <2 x i32> %in to <2 x float>
Expand Down
9 changes: 7 additions & 2 deletions llvm/test/CodeGen/X86/extract-concat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@ define void @foo(<4 x float> %in, <4 x i8>* %out) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
; CHECK-NEXT: pextrb $8, %xmm0, %eax
; CHECK-NEXT: pextrb $4, %xmm0, %ecx
; CHECK-NEXT: pextrb $0, %xmm0, %edx
; CHECK-NEXT: movd %edx, %xmm0
; CHECK-NEXT: pinsrb $1, %ecx, %xmm0
; CHECK-NEXT: pinsrb $2, %eax, %xmm0
; CHECK-NEXT: movl $255, %eax
; CHECK-NEXT: pinsrd $3, %eax, %xmm0
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-NEXT: pinsrb $3, %eax, %xmm0
; CHECK-NEXT: movd %xmm0, (%rdi)
; CHECK-NEXT: retq
%t0 = fptosi <4 x float> %in to <4 x i32>
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/X86/extract-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,10 @@ define i8 @extractelt_bitcast(i32 %x) nounwind {
define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind {
; X86-LABEL: extractelt_bitcast_extra_use:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: popl %ecx
; X86-NEXT: retl
;
; X64-LABEL: extractelt_bitcast_extra_use:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/f16c-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,12 @@ define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) n
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
; X32-AVX512VL: # %bb.0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
; X64-AVX512VL: # %bb.0: # %entry
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a, i32 3)
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/fold-vector-sext-zext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
define <4 x i16> @test_sext_4i8_4i16() {
; X32-LABEL: test_sext_4i8_4i16:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u>
; X32-NEXT: retl
;
; X64-LABEL: test_sext_4i8_4i16:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293]
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u>
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 0, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
Expand All @@ -29,12 +29,12 @@ define <4 x i16> @test_sext_4i8_4i16() {
define <4 x i16> @test_sext_4i8_4i16_undef() {
; X32-LABEL: test_sext_4i8_4i16_undef:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u>
; X32-NEXT: retl
;
; X64-LABEL: test_sext_4i8_4i16_undef:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293>
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u>
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 undef, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
Expand Down Expand Up @@ -207,12 +207,12 @@ define <8 x i32> @test_sext_8i8_8i32_undef() {
define <4 x i16> @test_zext_4i8_4i16() {
; X32-LABEL: test_zext_4i8_4i16:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253]
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,2,253,u,u,u,u>
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i16:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253]
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,2,253,u,u,u,u>
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 0, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
Expand Down Expand Up @@ -261,12 +261,12 @@ define <4 x i64> @test_zext_4i8_4i64() {
define <4 x i16> @test_zext_4i8_4i16_undef() {
; X32-LABEL: test_zext_4i8_4i16_undef:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253]
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,0,253,u,u,u,u>
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i16_undef:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253]
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,0,253,u,u,u,u>
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 undef, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
Expand Down
10 changes: 1 addition & 9 deletions llvm/test/CodeGen/X86/insertelement-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,10 @@ define <8 x float> @insert_subvector_256(i16 %x0, i16 %x1, <8 x float> %v) nounw
define <8 x i64> @insert_subvector_512(i32 %x0, i32 %x1, <8 x i64> %v) nounwind {
; X86_AVX256-LABEL: insert_subvector_512:
; X86_AVX256: # %bb.0:
; X86_AVX256-NEXT: pushl %ebp
; X86_AVX256-NEXT: movl %esp, %ebp
; X86_AVX256-NEXT: andl $-8, %esp
; X86_AVX256-NEXT: subl $8, %esp
; X86_AVX256-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; X86_AVX256-NEXT: vmovlps %xmm2, (%esp)
; X86_AVX256-NEXT: vextracti128 $1, %ymm0, %xmm2
; X86_AVX256-NEXT: vpinsrd $0, (%esp), %xmm2, %xmm2
; X86_AVX256-NEXT: vpinsrd $0, {{[0-9]+}}(%esp), %xmm2, %xmm2
; X86_AVX256-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2
; X86_AVX256-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; X86_AVX256-NEXT: movl %ebp, %esp
; X86_AVX256-NEXT: popl %ebp
; X86_AVX256-NEXT: retl
;
; X64_AVX256-LABEL: insert_subvector_512:
Expand Down
76 changes: 10 additions & 66 deletions llvm/test/CodeGen/X86/known-bits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,100 +5,44 @@
define void @knownbits_zext_in_reg(i8*) nounwind {
; X32-LABEL: knownbits_zext_in_reg:
; X32: # %bb.0: # %BB
; X32-NEXT: pushl %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzbl (%eax), %ecx
; X32-NEXT: imull $101, %ecx, %eax
; X32-NEXT: shrl $14, %eax
; X32-NEXT: imull $177, %ecx, %ecx
; X32-NEXT: shrl $14, %ecx
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: vpxor %xmm0, %xmm0, %xmm0
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1
; X32-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; X32-NEXT: vpand %xmm2, %xmm0, %xmm0
; X32-NEXT: vpextrd $1, %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: vpextrd $1, %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: vmovd %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: vmovd %xmm0, (%esp) # 4-byte Folded Spill
; X32-NEXT: vpextrd $2, %xmm1, %edi
; X32-NEXT: vpextrd $2, %xmm0, %esi
; X32-NEXT: vpextrd $3, %xmm1, %ebx
; X32-NEXT: vpextrd $3, %xmm0, %ebp
; X32-NEXT: imull $177, %ecx, %edx
; X32-NEXT: shrl $14, %edx
; X32-NEXT: movzbl %al, %ecx
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB0_1: # %CF
; X32-NEXT: # =>This Loop Header: Depth=1
; X32-NEXT: # Child Loop BB0_2 Depth 2
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: divl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: divl (%esp) # 4-byte Folded Reload
; X32-NEXT: movl %edi, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: divl %esi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: divl %ebp
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: divb %dl
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB0_2: # %CF237
; X32-NEXT: # Parent Loop BB0_1 Depth=1
; X32-NEXT: # => This Inner Loop Header: Depth=2
; X32-NEXT: testb %cl, %cl
; X32-NEXT: testb %bl, %bl
; X32-NEXT: jne .LBB0_2
; X32-NEXT: jmp .LBB0_1
;
; X64-LABEL: knownbits_zext_in_reg:
; X64: # %bb.0: # %BB
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %rbx
; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: imull $101, %eax, %ecx
; X64-NEXT: shrl $14, %ecx
; X64-NEXT: imull $177, %eax, %eax
; X64-NEXT: shrl $14, %eax
; X64-NEXT: imull $177, %eax, %edx
; X64-NEXT: shrl $14, %edx
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0
; X64-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm1
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
; X64-NEXT: vpextrd $1, %xmm1, %r8d
; X64-NEXT: vpextrd $1, %xmm0, %r9d
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: vmovd %xmm1, %r10d
; X64-NEXT: vmovd %xmm0, %r11d
; X64-NEXT: vpextrd $2, %xmm1, %edi
; X64-NEXT: vpextrd $2, %xmm0, %ebx
; X64-NEXT: vpextrd $3, %xmm1, %ecx
; X64-NEXT: vpextrd $3, %xmm0, %ebp
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_1: # %CF
; X64-NEXT: # =>This Loop Header: Depth=1
; X64-NEXT: # Child Loop BB0_2 Depth 2
; X64-NEXT: movl %r8d, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %r9d
; X64-NEXT: movl %r10d, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %r11d
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %ebx
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %ebp
; X64-NEXT: divb %dl
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_2: # %CF237
; X64-NEXT: # Parent Loop BB0_1 Depth=1
Expand Down
25 changes: 6 additions & 19 deletions llvm/test/CodeGen/X86/load-partial.ll
Original file line number Diff line number Diff line change
Expand Up @@ -145,48 +145,35 @@ define i32 @load_partial_illegal_type() {
; SSE2: # %bb.0:
; SSE2-NEXT: movzwl {{.*}}(%rip), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,0,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movl $2, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: por {{.*}}(%rip), %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_partial_illegal_type:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movzwl {{.*}}(%rip), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[3,4,5,6,7,8,9,10,11,12,13,14,15]
; SSSE3-NEXT: por {{.*}}(%rip), %xmm0
; SSSE3-NEXT: movd %xmm0, %eax
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_partial_illegal_type:
; SSE41: # %bb.0:
; SSE41-NEXT: movzwl {{.*}}(%rip), %eax
; SSE41-NEXT: movd %eax, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,1],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; SSE41-NEXT: movl $2, %eax
; SSE41-NEXT: pinsrd $2, %eax, %xmm0
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,u,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: pinsrb $2, %eax, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: retq
;
; AVX-LABEL: load_partial_illegal_type:
; AVX: # %bb.0:
; AVX-NEXT: movzwl {{.*}}(%rip), %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,1],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX-NEXT: movl $2, %eax
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
%1 = load <2 x i8>, <2 x i8>* bitcast (i8* @h to <2 x i8>*), align 1
Expand Down
8 changes: 0 additions & 8 deletions llvm/test/CodeGen/X86/lower-bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
define double @test1(double %A) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: retq
;
; CHECK-WIDE-LABEL: test1:
Expand Down Expand Up @@ -68,9 +66,7 @@ define i64 @test4(i64 %A) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: retq
;
Expand Down Expand Up @@ -108,9 +104,7 @@ define double @test5(double %A) {
define double @test6(double %A) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; CHECK-NEXT: paddw {{.*}}(%rip), %xmm0
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; CHECK-NEXT: retq
;
; CHECK-WIDE-LABEL: test6:
Expand Down Expand Up @@ -147,9 +141,7 @@ define double @test7(double %A, double %B) {
define double @test8(double %A) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0:
; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: paddb {{.*}}(%rip), %xmm0
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; CHECK-NEXT: retq
;
; CHECK-WIDE-LABEL: test8:
Expand Down
48 changes: 17 additions & 31 deletions llvm/test/CodeGen/X86/madd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1876,26 +1876,12 @@ define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) {
;
; AVX1-LABEL: larger_mul:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm1, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpmovsxwd %xmm1, %xmm1
; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: larger_mul:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vpmovsxwd %xmm1, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
Expand Down Expand Up @@ -2597,43 +2583,43 @@ define <4 x i32> @pmaddwd_bad_indices(<8 x i16>* %Aptr, <8 x i16>* %Bptr) {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: movdqa (%rsi), %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,6,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: pmulhw %xmm2, %xmm4
; SSE2-NEXT: pmullw %xmm2, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,0,3,2,4,5,6,7]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pmulhw %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: pmulhw %xmm3, %xmm4
; SSE2-NEXT: pmullw %xmm3, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: pmulhw %xmm1, %xmm3
; SSE2-NEXT: pmullw %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: paddd %xmm3, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: pmaddwd_bad_indices:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vmovdqa (%rsi), %xmm1
; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3,4,5,10,11,12,13,12,13,10,11,12,13,14,15]
; AVX-NEXT: vpmovsxwd %xmm2, %xmm2
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,8,9,14,15,8,9,14,15,12,13,14,15]
; AVX-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
; AVX-NEXT: vpmovsxwd %xmm2, %xmm2
; AVX-NEXT: vpmovsxwd %xmm3, %xmm3
; AVX-NEXT: vpmulld %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,8,9,14,15,8,9,14,15,12,13,14,15]
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
; AVX-NEXT: vpmovsxwd %xmm1, %xmm1
; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpaddd %xmm0, %xmm2, %xmm0
Expand Down
99 changes: 41 additions & 58 deletions llvm/test/CodeGen/X86/masked_compressstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -603,11 +603,9 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i
define void @compressstore_v2f32_v2i32(float* %base, <2 x float> %V, <2 x i32> %trigger) {
; SSE2-LABEL: compressstore_v2f32_v2i32:
; SSE2: ## %bb.0:
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,1,1]
; SSE2-NEXT: movmskpd %xmm1, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: jne LBB2_1
Expand All @@ -629,8 +627,8 @@ define void @compressstore_v2f32_v2i32(float* %base, <2 x float> %V, <2 x i32> %
; SSE42-LABEL: compressstore_v2f32_v2i32:
; SSE42: ## %bb.0:
; SSE42-NEXT: pxor %xmm2, %xmm2
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; SSE42-NEXT: pcmpeqq %xmm2, %xmm1
; SSE42-NEXT: pcmpeqd %xmm1, %xmm2
; SSE42-NEXT: pmovsxdq %xmm2, %xmm1
; SSE42-NEXT: movmskpd %xmm1, %eax
; SSE42-NEXT: testb $1, %al
; SSE42-NEXT: jne LBB2_1
Expand All @@ -648,69 +646,54 @@ define void @compressstore_v2f32_v2i32(float* %base, <2 x float> %V, <2 x i32> %
; SSE42-NEXT: extractps $1, %xmm0, (%rdi)
; SSE42-NEXT: retq
;
; AVX1-LABEL: compressstore_v2f32_v2i32:
; AVX1: ## %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vmovmskpd %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: jne LBB2_1
; AVX1-NEXT: ## %bb.2: ## %else
; AVX1-NEXT: testb $2, %al
; AVX1-NEXT: jne LBB2_3
; AVX1-NEXT: LBB2_4: ## %else2
; AVX1-NEXT: retq
; AVX1-NEXT: LBB2_1: ## %cond.store
; AVX1-NEXT: vmovss %xmm0, (%rdi)
; AVX1-NEXT: addq $4, %rdi
; AVX1-NEXT: testb $2, %al
; AVX1-NEXT: je LBB2_4
; AVX1-NEXT: LBB2_3: ## %cond.store1
; AVX1-NEXT: vextractps $1, %xmm0, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: compressstore_v2f32_v2i32:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vmovmskpd %xmm1, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: jne LBB2_1
; AVX2-NEXT: ## %bb.2: ## %else
; AVX2-NEXT: testb $2, %al
; AVX2-NEXT: jne LBB2_3
; AVX2-NEXT: LBB2_4: ## %else2
; AVX2-NEXT: retq
; AVX2-NEXT: LBB2_1: ## %cond.store
; AVX2-NEXT: vmovss %xmm0, (%rdi)
; AVX2-NEXT: addq $4, %rdi
; AVX2-NEXT: testb $2, %al
; AVX2-NEXT: je LBB2_4
; AVX2-NEXT: LBB2_3: ## %cond.store1
; AVX2-NEXT: vextractps $1, %xmm0, (%rdi)
; AVX2-NEXT: retq
; AVX1OR2-LABEL: compressstore_v2f32_v2i32:
; AVX1OR2: ## %bb.0:
; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX1OR2-NEXT: vpmovsxdq %xmm1, %xmm1
; AVX1OR2-NEXT: vmovmskpd %xmm1, %eax
; AVX1OR2-NEXT: testb $1, %al
; AVX1OR2-NEXT: jne LBB2_1
; AVX1OR2-NEXT: ## %bb.2: ## %else
; AVX1OR2-NEXT: testb $2, %al
; AVX1OR2-NEXT: jne LBB2_3
; AVX1OR2-NEXT: LBB2_4: ## %else2
; AVX1OR2-NEXT: retq
; AVX1OR2-NEXT: LBB2_1: ## %cond.store
; AVX1OR2-NEXT: vmovss %xmm0, (%rdi)
; AVX1OR2-NEXT: addq $4, %rdi
; AVX1OR2-NEXT: testb $2, %al
; AVX1OR2-NEXT: je LBB2_4
; AVX1OR2-NEXT: LBB2_3: ## %cond.store1
; AVX1OR2-NEXT: vextractps $1, %xmm0, (%rdi)
; AVX1OR2-NEXT: retq
;
; AVX512F-LABEL: compressstore_v2f32_v2i32:
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX512F-NEXT: vptestnmq %zmm1, %zmm1, %k0
; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
; AVX512F-NEXT: vcompressps %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: compressstore_v2f32_v2i32:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX512VL-NEXT: vptestnmq %xmm1, %xmm1, %k1
; AVX512VL-NEXT: vcompressps %xmm0, (%rdi) {%k1}
; AVX512VL-NEXT: retq
; AVX512VLDQ-LABEL: compressstore_v2f32_v2i32:
; AVX512VLDQ: ## %bb.0:
; AVX512VLDQ-NEXT: vptestnmd %xmm1, %xmm1, %k0
; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0
; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
; AVX512VLDQ-NEXT: vcompressps %xmm0, (%rdi) {%k1}
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: compressstore_v2f32_v2i32:
; AVX512VLBW: ## %bb.0:
; AVX512VLBW-NEXT: vptestnmd %xmm1, %xmm1, %k0
; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0
; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1
; AVX512VLBW-NEXT: vcompressps %xmm0, (%rdi) {%k1}
; AVX512VLBW-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.compressstore.v2f32(<2 x float> %V, float* %base, <2 x i1> %mask)
ret void
Expand Down
102 changes: 42 additions & 60 deletions llvm/test/CodeGen/X86/masked_expandload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1117,11 +1117,9 @@ define <16 x double> @expandload_v16f64_v16i32(double* %base, <16 x double> %src
define <2 x float> @expandload_v2f32_v2i1(float* %base, <2 x float> %src0, <2 x i32> %trigger) {
; SSE2-LABEL: expandload_v2f32_v2i1:
; SSE2: ## %bb.0:
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,1,1]
; SSE2-NEXT: movmskpd %xmm1, %eax
; SSE2-NEXT: testb $1, %al
; SSE2-NEXT: jne LBB4_1
Expand All @@ -1146,8 +1144,8 @@ define <2 x float> @expandload_v2f32_v2i1(float* %base, <2 x float> %src0, <2 x
; SSE42-LABEL: expandload_v2f32_v2i1:
; SSE42: ## %bb.0:
; SSE42-NEXT: pxor %xmm2, %xmm2
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; SSE42-NEXT: pcmpeqq %xmm2, %xmm1
; SSE42-NEXT: pcmpeqd %xmm1, %xmm2
; SSE42-NEXT: pmovsxdq %xmm2, %xmm1
; SSE42-NEXT: movmskpd %xmm1, %eax
; SSE42-NEXT: testb $1, %al
; SSE42-NEXT: jne LBB4_1
Expand All @@ -1166,72 +1164,56 @@ define <2 x float> @expandload_v2f32_v2i1(float* %base, <2 x float> %src0, <2 x
; SSE42-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; SSE42-NEXT: retq
;
; AVX1-LABEL: expandload_v2f32_v2i1:
; AVX1: ## %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vmovmskpd %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: jne LBB4_1
; AVX1-NEXT: ## %bb.2: ## %else
; AVX1-NEXT: testb $2, %al
; AVX1-NEXT: jne LBB4_3
; AVX1-NEXT: LBB4_4: ## %else2
; AVX1-NEXT: retq
; AVX1-NEXT: LBB4_1: ## %cond.load
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX1-NEXT: addq $4, %rdi
; AVX1-NEXT: testb $2, %al
; AVX1-NEXT: je LBB4_4
; AVX1-NEXT: LBB4_3: ## %cond.load1
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: expandload_v2f32_v2i1:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vmovmskpd %xmm1, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: jne LBB4_1
; AVX2-NEXT: ## %bb.2: ## %else
; AVX2-NEXT: testb $2, %al
; AVX2-NEXT: jne LBB4_3
; AVX2-NEXT: LBB4_4: ## %else2
; AVX2-NEXT: retq
; AVX2-NEXT: LBB4_1: ## %cond.load
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX2-NEXT: addq $4, %rdi
; AVX2-NEXT: testb $2, %al
; AVX2-NEXT: je LBB4_4
; AVX2-NEXT: LBB4_3: ## %cond.load1
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX2-NEXT: retq
; AVX1OR2-LABEL: expandload_v2f32_v2i1:
; AVX1OR2: ## %bb.0:
; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX1OR2-NEXT: vpmovsxdq %xmm1, %xmm1
; AVX1OR2-NEXT: vmovmskpd %xmm1, %eax
; AVX1OR2-NEXT: testb $1, %al
; AVX1OR2-NEXT: jne LBB4_1
; AVX1OR2-NEXT: ## %bb.2: ## %else
; AVX1OR2-NEXT: testb $2, %al
; AVX1OR2-NEXT: jne LBB4_3
; AVX1OR2-NEXT: LBB4_4: ## %else2
; AVX1OR2-NEXT: retq
; AVX1OR2-NEXT: LBB4_1: ## %cond.load
; AVX1OR2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX1OR2-NEXT: addq $4, %rdi
; AVX1OR2-NEXT: testb $2, %al
; AVX1OR2-NEXT: je LBB4_4
; AVX1OR2-NEXT: LBB4_3: ## %cond.load1
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX1OR2-NEXT: retq
;
; AVX512F-LABEL: expandload_v2f32_v2i1:
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX512F-NEXT: vptestnmq %zmm1, %zmm1, %k0
; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1}
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: expandload_v2f32_v2i1:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX512VL-NEXT: vptestnmq %xmm1, %xmm1, %k1
; AVX512VL-NEXT: vexpandps (%rdi), %xmm0 {%k1}
; AVX512VL-NEXT: retq
; AVX512VLDQ-LABEL: expandload_v2f32_v2i1:
; AVX512VLDQ: ## %bb.0:
; AVX512VLDQ-NEXT: vptestnmd %xmm1, %xmm1, %k0
; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0
; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
; AVX512VLDQ-NEXT: vexpandps (%rdi), %xmm0 {%k1}
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: expandload_v2f32_v2i1:
; AVX512VLBW: ## %bb.0:
; AVX512VLBW-NEXT: vptestnmd %xmm1, %xmm1, %k0
; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0
; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k1
; AVX512VLBW-NEXT: vexpandps (%rdi), %xmm0 {%k1}
; AVX512VLBW-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x float> @llvm.masked.expandload.v2f32(float* %base, <2 x i1> %mask, <2 x float> %src0)
ret <2 x float> %res
Expand Down
203 changes: 77 additions & 126 deletions llvm/test/CodeGen/X86/masked_gather_scatter.ll

Large diffs are not rendered by default.

76 changes: 27 additions & 49 deletions llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,21 @@ define <2 x double> @test_gather_v2i32_index(double* %base, <2 x i32> %ind, <2 x
;
; PROMOTE_SKX-LABEL: test_gather_v2i32_index:
; PROMOTE_SKX: # %bb.0:
; PROMOTE_SKX-NEXT: vpsllq $32, %xmm0, %xmm0
; PROMOTE_SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; PROMOTE_SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; PROMOTE_SKX-NEXT: vpmovq2m %xmm1, %k1
; PROMOTE_SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
; PROMOTE_SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %xmm2 {%k1}
; PROMOTE_SKX-NEXT: vmovapd %xmm2, %xmm0
; PROMOTE_SKX-NEXT: retq
;
; PROMOTE_KNL-LABEL: test_gather_v2i32_index:
; PROMOTE_KNL: # %bb.0:
; PROMOTE_KNL-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
; PROMOTE_KNL-NEXT: vpsllq $32, %xmm0, %xmm0
; PROMOTE_KNL-NEXT: vpsraq $32, %zmm0, %zmm0
; PROMOTE_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; PROMOTE_KNL-NEXT: vpsllq $63, %xmm1, %xmm1
; PROMOTE_KNL-NEXT: vptestmq %zmm1, %zmm1, %k0
; PROMOTE_KNL-NEXT: kshiftlw $14, %k0, %k0
; PROMOTE_KNL-NEXT: kshiftrw $14, %k0, %k1
; PROMOTE_KNL-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
; PROMOTE_KNL-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k1}
; PROMOTE_KNL-NEXT: vmovapd %xmm2, %xmm0
; PROMOTE_KNL-NEXT: vzeroupper
; PROMOTE_KNL-NEXT: retq
Expand All @@ -61,11 +58,8 @@ define <2 x double> @test_gather_v2i32_index(double* %base, <2 x i32> %ind, <2 x
;
; PROMOTE_AVX2-LABEL: test_gather_v2i32_index:
; PROMOTE_AVX2: # %bb.0:
; PROMOTE_AVX2-NEXT: vpsllq $32, %xmm0, %xmm3
; PROMOTE_AVX2-NEXT: vpsrad $31, %xmm3, %xmm3
; PROMOTE_AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3]
; PROMOTE_AVX2-NEXT: vpsllq $63, %xmm1, %xmm1
; PROMOTE_AVX2-NEXT: vgatherqpd %xmm1, (%rdi,%xmm0,8), %xmm2
; PROMOTE_AVX2-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0,8), %xmm2
; PROMOTE_AVX2-NEXT: vmovapd %xmm2, %xmm0
; PROMOTE_AVX2-NEXT: retq
%gep.random = getelementptr double, double* %base, <2 x i32> %ind
Expand Down Expand Up @@ -97,21 +91,18 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32>
; PROMOTE_SKX: # %bb.0:
; PROMOTE_SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; PROMOTE_SKX-NEXT: vpmovq2m %xmm2, %k1
; PROMOTE_SKX-NEXT: vpsllq $32, %xmm1, %xmm1
; PROMOTE_SKX-NEXT: vpsraq $32, %xmm1, %xmm1
; PROMOTE_SKX-NEXT: vscatterqpd %xmm0, (%rdi,%xmm1,8) {%k1}
; PROMOTE_SKX-NEXT: vscatterdpd %xmm0, (%rdi,%xmm1,8) {%k1}
; PROMOTE_SKX-NEXT: retq
;
; PROMOTE_KNL-LABEL: test_scatter_v2i32_index:
; PROMOTE_KNL: # %bb.0:
; PROMOTE_KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; PROMOTE_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; PROMOTE_KNL-NEXT: vpsllq $32, %xmm1, %xmm1
; PROMOTE_KNL-NEXT: vpsraq $32, %zmm1, %zmm1
; PROMOTE_KNL-NEXT: vpsllq $63, %xmm2, %xmm2
; PROMOTE_KNL-NEXT: vptestmq %zmm2, %zmm2, %k0
; PROMOTE_KNL-NEXT: kshiftlw $14, %k0, %k0
; PROMOTE_KNL-NEXT: kshiftrw $14, %k0, %k1
; PROMOTE_KNL-NEXT: vscatterqpd %zmm0, (%rdi,%zmm1,8) {%k1}
; PROMOTE_KNL-NEXT: vscatterdpd %zmm0, (%rdi,%ymm1,8) {%k1}
; PROMOTE_KNL-NEXT: vzeroupper
; PROMOTE_KNL-NEXT: retq
;
Expand Down Expand Up @@ -143,9 +134,7 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32>
;
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_index:
; PROMOTE_AVX2: # %bb.0:
; PROMOTE_AVX2-NEXT: vpsllq $32, %xmm1, %xmm3
; PROMOTE_AVX2-NEXT: vpsrad $31, %xmm3, %xmm3
; PROMOTE_AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3]
; PROMOTE_AVX2-NEXT: vpmovsxdq %xmm1, %xmm1
; PROMOTE_AVX2-NEXT: vpsllq $3, %xmm1, %xmm1
; PROMOTE_AVX2-NEXT: vmovq %rdi, %xmm3
; PROMOTE_AVX2-NEXT: vpbroadcastq %xmm3, %xmm3
Expand Down Expand Up @@ -199,21 +188,20 @@ define <2 x i32> @test_gather_v2i32_data(<2 x i32*> %ptr, <2 x i1> %mask, <2 x i
; PROMOTE_SKX: # %bb.0:
; PROMOTE_SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; PROMOTE_SKX-NEXT: vpmovq2m %xmm1, %k1
; PROMOTE_SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; PROMOTE_SKX-NEXT: vpgatherqd (,%xmm0), %xmm1 {%k1}
; PROMOTE_SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; PROMOTE_SKX-NEXT: vpgatherqd (,%xmm0), %xmm2 {%k1}
; PROMOTE_SKX-NEXT: vmovdqa %xmm2, %xmm0
; PROMOTE_SKX-NEXT: retq
;
; PROMOTE_KNL-LABEL: test_gather_v2i32_data:
; PROMOTE_KNL: # %bb.0:
; PROMOTE_KNL-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; PROMOTE_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; PROMOTE_KNL-NEXT: vpsllq $63, %xmm1, %xmm1
; PROMOTE_KNL-NEXT: vptestmq %zmm1, %zmm1, %k0
; PROMOTE_KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; PROMOTE_KNL-NEXT: kshiftlw $14, %k0, %k0
; PROMOTE_KNL-NEXT: kshiftrw $14, %k0, %k1
; PROMOTE_KNL-NEXT: vpgatherqd (,%zmm0), %ymm1 {%k1}
; PROMOTE_KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; PROMOTE_KNL-NEXT: vpgatherqd (,%zmm0), %ymm2 {%k1}
; PROMOTE_KNL-NEXT: vmovdqa %xmm2, %xmm0
; PROMOTE_KNL-NEXT: vzeroupper
; PROMOTE_KNL-NEXT: retq
;
Expand All @@ -227,11 +215,10 @@ define <2 x i32> @test_gather_v2i32_data(<2 x i32*> %ptr, <2 x i1> %mask, <2 x i
;
; PROMOTE_AVX2-LABEL: test_gather_v2i32_data:
; PROMOTE_AVX2: # %bb.0:
; PROMOTE_AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; PROMOTE_AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; PROMOTE_AVX2-NEXT: vpslld $31, %xmm1, %xmm1
; PROMOTE_AVX2-NEXT: vpgatherqd %xmm1, (,%xmm0), %xmm2
; PROMOTE_AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
; PROMOTE_AVX2-NEXT: vmovdqa %xmm2, %xmm0
; PROMOTE_AVX2-NEXT: retq
%res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %ptr, i32 4, <2 x i1> %mask, <2 x i32> %src0)
ret <2 x i32>%res
Expand Down Expand Up @@ -261,16 +248,15 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mas
; PROMOTE_SKX: # %bb.0:
; PROMOTE_SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; PROMOTE_SKX-NEXT: vpmovq2m %xmm2, %k1
; PROMOTE_SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; PROMOTE_SKX-NEXT: vpscatterqd %xmm0, (,%xmm1) {%k1}
; PROMOTE_SKX-NEXT: retq
;
; PROMOTE_KNL-LABEL: test_scatter_v2i32_data:
; PROMOTE_KNL: # %bb.0:
; PROMOTE_KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; PROMOTE_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; PROMOTE_KNL-NEXT: vpsllq $63, %xmm2, %xmm2
; PROMOTE_KNL-NEXT: vptestmq %zmm2, %zmm2, %k0
; PROMOTE_KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; PROMOTE_KNL-NEXT: kshiftlw $14, %k0, %k0
; PROMOTE_KNL-NEXT: kshiftrw $14, %k0, %k1
; PROMOTE_KNL-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
Expand Down Expand Up @@ -316,7 +302,7 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mas
; PROMOTE_AVX2-NEXT: je .LBB3_4
; PROMOTE_AVX2-NEXT: .LBB3_3: # %cond.store1
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax)
; PROMOTE_AVX2-NEXT: vextractps $1, %xmm0, (%rax)
; PROMOTE_AVX2-NEXT: retq
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
ret void
Expand Down Expand Up @@ -348,22 +334,20 @@ define <2 x i32> @test_gather_v2i32_data_index(i32* %base, <2 x i32> %ind, <2 x
; PROMOTE_SKX: # %bb.0:
; PROMOTE_SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; PROMOTE_SKX-NEXT: vpmovq2m %xmm1, %k1
; PROMOTE_SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; PROMOTE_SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; PROMOTE_SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
; PROMOTE_SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; PROMOTE_SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm2 {%k1}
; PROMOTE_SKX-NEXT: vmovdqa %xmm2, %xmm0
; PROMOTE_SKX-NEXT: retq
;
; PROMOTE_KNL-LABEL: test_gather_v2i32_data_index:
; PROMOTE_KNL: # %bb.0:
; PROMOTE_KNL-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
; PROMOTE_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; PROMOTE_KNL-NEXT: vpsllq $63, %xmm1, %xmm1
; PROMOTE_KNL-NEXT: vptestmq %zmm1, %zmm1, %k0
; PROMOTE_KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; PROMOTE_KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; PROMOTE_KNL-NEXT: kshiftlw $14, %k0, %k0
; PROMOTE_KNL-NEXT: kshiftrw $14, %k0, %k1
; PROMOTE_KNL-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
; PROMOTE_KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; PROMOTE_KNL-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
; PROMOTE_KNL-NEXT: vmovdqa %xmm2, %xmm0
; PROMOTE_KNL-NEXT: vzeroupper
; PROMOTE_KNL-NEXT: retq
;
Expand All @@ -377,12 +361,10 @@ define <2 x i32> @test_gather_v2i32_data_index(i32* %base, <2 x i32> %ind, <2 x
;
; PROMOTE_AVX2-LABEL: test_gather_v2i32_data_index:
; PROMOTE_AVX2: # %bb.0:
; PROMOTE_AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; PROMOTE_AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; PROMOTE_AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
; PROMOTE_AVX2-NEXT: vpslld $31, %xmm1, %xmm1
; PROMOTE_AVX2-NEXT: vpgatherdd %xmm1, (%rdi,%xmm0,4), %xmm2
; PROMOTE_AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
; PROMOTE_AVX2-NEXT: vmovdqa %xmm2, %xmm0
; PROMOTE_AVX2-NEXT: retq
%gep.random = getelementptr i32, i32* %base, <2 x i32> %ind
%res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
Expand Down Expand Up @@ -413,17 +395,15 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32>
; PROMOTE_SKX: # %bb.0:
; PROMOTE_SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; PROMOTE_SKX-NEXT: vpmovq2m %xmm2, %k1
; PROMOTE_SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; PROMOTE_SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; PROMOTE_SKX-NEXT: vpscatterdd %xmm0, (%rdi,%xmm1,4) {%k1}
; PROMOTE_SKX-NEXT: retq
;
; PROMOTE_KNL-LABEL: test_scatter_v2i32_data_index:
; PROMOTE_KNL: # %bb.0:
; PROMOTE_KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; PROMOTE_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; PROMOTE_KNL-NEXT: vpsllq $63, %xmm2, %xmm2
; PROMOTE_KNL-NEXT: vptestmq %zmm2, %zmm2, %k0
; PROMOTE_KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; PROMOTE_KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; PROMOTE_KNL-NEXT: kshiftlw $14, %k0, %k0
; PROMOTE_KNL-NEXT: kshiftrw $14, %k0, %k1
; PROMOTE_KNL-NEXT: vpscatterdd %zmm0, (%rdi,%zmm1,4) {%k1}
Expand Down Expand Up @@ -458,9 +438,7 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32>
;
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_data_index:
; PROMOTE_AVX2: # %bb.0:
; PROMOTE_AVX2-NEXT: vpsllq $32, %xmm1, %xmm3
; PROMOTE_AVX2-NEXT: vpsrad $31, %xmm3, %xmm3
; PROMOTE_AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3]
; PROMOTE_AVX2-NEXT: vpmovsxdq %xmm1, %xmm1
; PROMOTE_AVX2-NEXT: vpsllq $2, %xmm1, %xmm1
; PROMOTE_AVX2-NEXT: vmovq %rdi, %xmm3
; PROMOTE_AVX2-NEXT: vpbroadcastq %xmm3, %xmm3
Expand All @@ -481,7 +459,7 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32>
; PROMOTE_AVX2-NEXT: je .LBB5_4
; PROMOTE_AVX2-NEXT: .LBB5_3: # %cond.store1
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax)
; PROMOTE_AVX2-NEXT: vextractps $1, %xmm0, (%rax)
; PROMOTE_AVX2-NEXT: retq
%gep = getelementptr i32, i32 *%base, <2 x i32> %ind
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %gep, i32 4, <2 x i1> %mask)
Expand Down
Loading