Skip to content

Commit

Permalink
[X86] Add broadcast load unfold support for smin/umin/smax/umax.
Browse files Browse the repository at this point in the history
llvm-svn: 371366
  • Loading branch information
topperc committed Sep 9, 2019
1 parent 68b2e19 commit 8c2ab1c
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 60 deletions.
24 changes: 24 additions & 0 deletions llvm/lib/Target/X86/X86InstrFoldTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5306,6 +5306,30 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
{ X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
{ X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q },
{ X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D },
{ X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D },
{ X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D },
{ X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q },
{ X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q },
{ X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q },
{ X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D },
{ X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D },
{ X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D },
{ X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q },
{ X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q },
{ X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q },
{ X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D },
{ X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D },
{ X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D },
{ X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q },
{ X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q },
{ X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q },
{ X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D },
{ X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D },
{ X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D },
{ X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q },
{ X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q },
{ X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q },
{ X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D },
{ X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D },
{ X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D },
Expand Down
120 changes: 60 additions & 60 deletions llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2507,12 +2507,12 @@ define void @bcast_unfold_smin_v4i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_smin_v4i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB72_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
; CHECK-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %xmm0, %xmm1
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB72_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2541,12 +2541,12 @@ define void @bcast_unfold_smin_v8i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_smin_v8i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB73_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
; CHECK-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB73_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2576,12 +2576,12 @@ define void @bcast_unfold_smin_v16i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_smin_v16i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB74_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
; CHECK-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB74_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2645,12 +2645,12 @@ define void @bcast_unfold_smin_v4i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_smin_v4i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB76_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
; CHECK-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpminsq 8192(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB76_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2680,12 +2680,12 @@ define void @bcast_unfold_smin_v8i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_smin_v8i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB77_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
; CHECK-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpminsq 8192(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB77_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2715,12 +2715,12 @@ define void @bcast_unfold_smax_v4i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_smax_v4i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB78_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %xmm0, %xmm1
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB78_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2749,12 +2749,12 @@ define void @bcast_unfold_smax_v8i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_smax_v8i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB79_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB79_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2784,12 +2784,12 @@ define void @bcast_unfold_smax_v16i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_smax_v16i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB80_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB80_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2853,12 +2853,12 @@ define void @bcast_unfold_smax_v4i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_smax_v4i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB82_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
; CHECK-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpmaxsq 8192(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB82_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2888,12 +2888,12 @@ define void @bcast_unfold_smax_v8i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_smax_v8i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB83_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
; CHECK-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpmaxsq 8192(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB83_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2923,12 +2923,12 @@ define void @bcast_unfold_umin_v4i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_umin_v4i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB84_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
; CHECK-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpminud 4096(%rdi,%rax), %xmm0, %xmm1
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB84_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2957,12 +2957,12 @@ define void @bcast_unfold_umin_v8i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_umin_v8i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB85_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
; CHECK-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpminud 4096(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB85_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -2992,12 +2992,12 @@ define void @bcast_unfold_umin_v16i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_umin_v16i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB86_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
; CHECK-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpminud 4096(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB86_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -3061,12 +3061,12 @@ define void @bcast_unfold_umin_v4i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_umin_v4i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB88_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
; CHECK-NEXT: vpminuq {{.*}}(%rip){1to4}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpminuq 8192(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB88_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -3096,12 +3096,12 @@ define void @bcast_unfold_umin_v8i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_umin_v8i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB89_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
; CHECK-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpminuq 8192(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB89_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -3131,12 +3131,12 @@ define void @bcast_unfold_umax_v4i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_umax_v4i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB90_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %xmm0, %xmm1
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB90_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -3165,12 +3165,12 @@ define void @bcast_unfold_umax_v8i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_umax_v8i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB91_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB91_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -3200,12 +3200,12 @@ define void @bcast_unfold_umax_v16i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_umax_v16i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB92_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB92_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -3269,12 +3269,12 @@ define void @bcast_unfold_umax_v4i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_umax_v4i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB94_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0
; CHECK-NEXT: vpmaxuq {{.*}}(%rip){1to4}, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpmaxuq 8192(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB94_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -3304,12 +3304,12 @@ define void @bcast_unfold_umax_v8i64(i64* %arg) {
; CHECK-LABEL: bcast_unfold_umax_v8i64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB95_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0
; CHECK-NEXT: vpmaxuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax)
; CHECK-NEXT: vpmaxuq 8192(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB95_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down

0 comments on commit 8c2ab1c

Please sign in to comment.