Skip to content

Commit

Permalink
[DAG] isSplatValue - node is a splat if all demanded elts have the s…
Browse files Browse the repository at this point in the history
…ame whole constant value (#74443)
  • Loading branch information
RKSimon committed Dec 8, 2023
1 parent bdacd56 commit faecc73
Show file tree
Hide file tree
Showing 13 changed files with 2,325 additions and 2,351 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2881,6 +2881,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
}

// Fallback - this is a splat if all demanded elts are the same constant.
if (computeKnownBits(V, DemandedElts, Depth).isConstant()) {
UndefElts = ~DemandedElts;
return true;
}

return false;
}

Expand Down
17 changes: 7 additions & 10 deletions llvm/test/CodeGen/ARM/vector-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -403,17 +403,14 @@ define void @v3i8store(ptr %p) {
; CHECK-LABEL: v3i8store:
; CHECK: @ %bb.0:
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vmov.i32 d16, #0xff
; CHECK-NEXT: mov r1, sp
; CHECK-NEXT: vmov.i32 d17, #0x0
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vand d16, d17, d16
; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32]
; CHECK-NEXT: vld1.32 {d16[0]}, [r1:32]
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: str r1, [sp]
; CHECK-NEXT: vld1.32 {d16[0]}, [r2:32]
; CHECK-NEXT: strb r1, [r0, #2]
; CHECK-NEXT: vmovl.u16 q8, d16
; CHECK-NEXT: strb r2, [r0, #2]
; CHECK-NEXT: vmov.32 r1, d16[0]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov.32 r2, d16[0]
; CHECK-NEXT: strh r2, [r0]
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: bx lr
store <3 x i8> zeroinitializer, ptr %p, align 4
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,14 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0)
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: vrgatherei16.vv v12, v8, v16
; RV32-NEXT: vmv.v.i v20, 5
; RV32-NEXT: lui a0, %hi(.LCPI13_1)
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1)
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: li a0, 140
; RV32-NEXT: vle16.v v17, (a0)
; RV32-NEXT: li a0, 115
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vmv.v.i v16, 5
; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
; RV32-NEXT: vrgatherei16.vv v12, v20, v16
; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X86-NEXT: mulps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
Expand All @@ -64,8 +59,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X86-NEXT: cmpunordps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X86-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: minps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
Expand Down Expand Up @@ -135,11 +132,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; X64-NEXT: mulps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
Expand All @@ -148,8 +140,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; X64-NEXT: cmpunordps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: minps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: xorl %ebx, %ebx
; X64-NEXT: xorps %xmm3, %xmm3
Expand Down
166 changes: 90 additions & 76 deletions llvm/test/CodeGen/X86/var-permute-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,20 @@ define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
;
; AVX1-LABEL: var_shuffle_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm4
; AVX1-NEXT: vpermilpd %ymm4, %ymm2, %ymm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4i64:
Expand Down Expand Up @@ -88,15 +90,16 @@ define <8 x i32> @var_shuffle_v8i32(<8 x i32> %v, <8 x i32> %indices) nounwind {
;
; AVX1-LABEL: var_shuffle_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
; AVX1-NEXT: vpermilps %ymm1, %ymm2, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
; AVX1-NEXT: vpermilps %ymm1, %ymm3, %ymm3
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: var_shuffle_v8i32:
Expand Down Expand Up @@ -445,18 +448,20 @@ define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) noun
;
; AVX1-LABEL: var_shuffle_v4f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm4
; AVX1-NEXT: vpermilpd %ymm4, %ymm2, %ymm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4f64:
Expand Down Expand Up @@ -508,15 +513,16 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
;
; AVX1-LABEL: var_shuffle_v8f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
; AVX1-NEXT: vpermilps %ymm1, %ymm2, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
; AVX1-NEXT: vpermilps %ymm1, %ymm3, %ymm3
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: var_shuffle_v8f32:
Expand Down Expand Up @@ -569,17 +575,19 @@ define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices)
; AVX1-LABEL: var_shuffle_v4i64_from_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm3
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm2
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm1
; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4i64_from_v2i64:
Expand Down Expand Up @@ -633,14 +641,15 @@ define <8 x i32> @var_shuffle_v8i32_from_v4i32(<4 x i32> %v, <8 x i32> %indices)
; AVX1-LABEL: var_shuffle_v8i32_from_v4i32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm3
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: var_shuffle_v8i32_from_v4i32:
Expand Down Expand Up @@ -990,17 +999,19 @@ define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %in
; AVX1-LABEL: var_shuffle_v4f64_from_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm3
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: vpermilpd %ymm3, %ymm0, %ymm2
; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm1
; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4f64_from_v2f64:
Expand Down Expand Up @@ -1054,14 +1065,15 @@ define <8 x float> @var_shuffle_v8f32_from_v4f32(<4 x float> %v, <8 x i32> %indi
; AVX1-LABEL: var_shuffle_v8f32_from_v4f32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3,3,3,3]
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm3
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: var_shuffle_v8f32_from_v4f32:
Expand Down Expand Up @@ -1271,20 +1283,22 @@ define <4 x i64> @var_shuffle_v4i64_with_v16i8_indices(<4 x i64> %v, <16 x i8> %
;
; AVX1-LABEL: var_shuffle_v4i64_with_v16i8_indices:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm2
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
; AVX1-NEXT: vpaddq %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [2,2]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm4
; AVX1-NEXT: vpaddq %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4
; AVX1-NEXT: vpermilpd %ymm4, %ymm3, %ymm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: vpermilpd %ymm1, %ymm4, %ymm2
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: vblendvpd %ymm1, %ymm3, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vblendvpd %ymm3, %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4i64_with_v16i8_indices:
Expand Down
Loading

0 comments on commit faecc73

Please sign in to comment.