3,135 changes: 1,551 additions & 1,584 deletions llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll

Large diffs are not rendered by default.

177 changes: 118 additions & 59 deletions llvm/test/CodeGen/X86/vector-reduce-fmaximum.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512,AVX512VL

Expand Down Expand Up @@ -816,63 +816,122 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE41-NEXT: orps %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16f32:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like its missing a non-common check-prefix

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

regening, not sure what happened.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still not fixed

; AVX: # %bb.0:
; AVX-NEXT: vblendvps %ymm0, %ymm1, %ymm0, %ymm2
; AVX-NEXT: vblendvps %ymm0, %ymm0, %ymm1, %ymm0
; AVX-NEXT: vmaxps %ymm2, %ymm0, %ymm1
; AVX-NEXT: vcmpunordps %ymm0, %ymm0, %ymm2
; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
; AVX-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmaxps %xmm2, %xmm0, %xmm1
; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: js .LBB4_1
; AVX-NEXT: # %bb.2:
; AVX-NEXT: vmovaps %xmm0, %xmm2
; AVX-NEXT: jmp .LBB4_3
; AVX-NEXT: .LBB4_1:
; AVX-NEXT: vmovaps %xmm1, %xmm2
; AVX-NEXT: vmovaps %xmm0, %xmm1
; AVX-NEXT: .LBB4_3:
; AVX-NEXT: vmaxss %xmm2, %xmm1, %xmm2
; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm2
; AVX-NEXT: vmovd %xmm2, %eax
; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: js .LBB4_4
; AVX-NEXT: # %bb.5:
; AVX-NEXT: vmovaps %xmm2, %xmm3
; AVX-NEXT: jmp .LBB4_6
; AVX-NEXT: .LBB4_4:
; AVX-NEXT: vmovapd %xmm1, %xmm3
; AVX-NEXT: vmovaps %xmm2, %xmm1
; AVX-NEXT: .LBB4_6:
; AVX-NEXT: vmaxss %xmm3, %xmm1, %xmm2
; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1
; AVX-NEXT: vmovd %xmm1, %eax
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: js .LBB4_7
; AVX-NEXT: # %bb.8:
; AVX-NEXT: vmovaps %xmm1, %xmm2
; AVX-NEXT: jmp .LBB4_9
; AVX-NEXT: .LBB4_7:
; AVX-NEXT: vmovaps %xmm0, %xmm2
; AVX-NEXT: vmovaps %xmm1, %xmm0
; AVX-NEXT: .LBB4_9:
; AVX-NEXT: vmaxss %xmm2, %xmm0, %xmm1
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
; AVX1-LABEL: test_v16f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm2
; AVX1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm3
; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmaxps %ymm3, %ymm0, %ymm1
; AVX1-NEXT: vcmpunordps %ymm0, %ymm0, %ymm2
; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmaxps %xmm2, %xmm0, %xmm1
; AVX1-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: js .LBB4_1
; AVX1-NEXT: # %bb.2:
; AVX1-NEXT: vmovaps %xmm0, %xmm2
; AVX1-NEXT: jmp .LBB4_3
; AVX1-NEXT: .LBB4_1:
; AVX1-NEXT: vmovaps %xmm1, %xmm2
; AVX1-NEXT: vmovaps %xmm0, %xmm1
; AVX1-NEXT: .LBB4_3:
; AVX1-NEXT: vmaxss %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vmovd %xmm2, %eax
; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: js .LBB4_4
; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vmovaps %xmm2, %xmm3
; AVX1-NEXT: jmp .LBB4_6
; AVX1-NEXT: .LBB4_4:
; AVX1-NEXT: vmovapd %xmm1, %xmm3
; AVX1-NEXT: vmovaps %xmm2, %xmm1
; AVX1-NEXT: .LBB4_6:
; AVX1-NEXT: vmaxss %xmm3, %xmm1, %xmm2
; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vmovd %xmm1, %eax
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: js .LBB4_7
; AVX1-NEXT: # %bb.8:
; AVX1-NEXT: vmovaps %xmm1, %xmm2
; AVX1-NEXT: jmp .LBB4_9
; AVX1-NEXT: .LBB4_7:
; AVX1-NEXT: vmovaps %xmm0, %xmm2
; AVX1-NEXT: vmovaps %xmm1, %xmm0
; AVX1-NEXT: .LBB4_9:
; AVX1-NEXT: vmaxss %xmm2, %xmm0, %xmm1
; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v16f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vblendvps %ymm0, %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vblendvps %ymm0, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vmaxps %ymm2, %ymm0, %ymm1
; AVX2-NEXT: vcmpunordps %ymm0, %ymm0, %ymm2
; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmaxps %xmm2, %xmm0, %xmm1
; AVX2-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: js .LBB4_1
; AVX2-NEXT: # %bb.2:
; AVX2-NEXT: vmovaps %xmm0, %xmm2
; AVX2-NEXT: jmp .LBB4_3
; AVX2-NEXT: .LBB4_1:
; AVX2-NEXT: vmovaps %xmm1, %xmm2
; AVX2-NEXT: vmovaps %xmm0, %xmm1
; AVX2-NEXT: .LBB4_3:
; AVX2-NEXT: vmaxss %xmm2, %xmm1, %xmm2
; AVX2-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
; AVX2-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vmovd %xmm2, %eax
; AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: js .LBB4_4
; AVX2-NEXT: # %bb.5:
; AVX2-NEXT: vmovaps %xmm2, %xmm3
; AVX2-NEXT: jmp .LBB4_6
; AVX2-NEXT: .LBB4_4:
; AVX2-NEXT: vmovapd %xmm1, %xmm3
; AVX2-NEXT: vmovaps %xmm2, %xmm1
; AVX2-NEXT: .LBB4_6:
; AVX2-NEXT: vmaxss %xmm3, %xmm1, %xmm2
; AVX2-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3
; AVX2-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vmovd %xmm1, %eax
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX2-NEXT: testl %eax, %eax
; AVX2-NEXT: js .LBB4_7
; AVX2-NEXT: # %bb.8:
; AVX2-NEXT: vmovaps %xmm1, %xmm2
; AVX2-NEXT: jmp .LBB4_9
; AVX2-NEXT: .LBB4_7:
; AVX2-NEXT: vmovaps %xmm0, %xmm2
; AVX2-NEXT: vmovaps %xmm1, %xmm0
; AVX2-NEXT: .LBB4_9:
; AVX2-NEXT: vmaxss %xmm2, %xmm0, %xmm1
; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
; AVX2-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512BW-LABEL: test_v16f32:
; AVX512BW: # %bb.0:
Expand Down
24 changes: 11 additions & 13 deletions llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -950,12 +950,10 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) nounwind {
;
; AVX1-LABEL: icmp0_v8i32_v8i1:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vtestps %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vtestps %ymm0, %ymm0
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
Expand Down Expand Up @@ -1301,14 +1299,14 @@ define i1 @icmp0_v16i32_v16i1(<16 x i32>) nounwind {
;
; AVX1-LABEL: icmp0_v16i32_v16i1:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vcmpeqps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1329,11 +1329,9 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) nounwind {
;
; AVX1-LABEL: icmp0_v8i32_v8i1:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
; AVX1-NEXT: testb %al, %al
; AVX1-NEXT: setnp %al
Expand Down Expand Up @@ -1721,14 +1719,14 @@ define i1 @icmp0_v16i32_v16i1(<16 x i32>) nounwind {
;
; AVX1-LABEL: icmp0_v16i32_v16i1:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vcmpeqps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/X86/vector-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2419,12 +2419,9 @@ define <8 x i32> @load_sext_8i1_to_8i32(ptr%ptr) {
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_sext_8i1_to_8i32:
Expand Down
17 changes: 9 additions & 8 deletions llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -679,15 +679,16 @@ define void @PR54171(ptr %mask0, ptr %mask1, i64 %i) {
; AVX1-NEXT: # %bb.1: # %if.then
; AVX1-NEXT: vmovd %edx, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
; AVX1-NEXT: vmovdqa %xmm1, 16(%rdi)
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmovdqa %xmm0, (%rsi)
; AVX1-NEXT: vmovdqa %xmm1, 16(%rsi)
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [0.0E+0,0.0E+0,1.0E+0,1.0E+0,2.0E+0,2.0E+0,3.0E+0,3.0E+0]
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
; AVX1-NEXT: vmovaps %ymm1, (%rdi)
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,4.0E+0,5.0E+0,5.0E+0,6.0E+0,6.0E+0,7.0E+0,7.0E+0]
; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovaps %ymm0, (%rsi)
; AVX1-NEXT: .LBB18_2: # %if.end
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR54171:
Expand Down