-
Notifications
You must be signed in to change notification settings - Fork 15.2k
DAG: Use poison when widening build_vector #167631
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesDAG: Use poison when widening build_vector regression Patch is 724.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167631.diff 39 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index dd5c011bfe784..524fe2171d412 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6070,7 +6070,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
SmallVector<SDValue, 16> NewOps(N->ops());
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
- NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
+ NewOps.append(WidenNumElts - NumElts, DAG.getPOISON(EltVT));
return DAG.getBuildVector(WidenVT, dl, NewOps);
}
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 7f07ef476b8aa..1db776ea6f616 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -3537,27 +3537,22 @@ define <7 x i32> @rotl_v7i32_c(<7 x i32> %a) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: fmov s1, w4
-; CHECK-SD-NEXT: adrp x8, .LCPI108_0
-; CHECK-SD-NEXT: adrp x9, .LCPI108_1
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI108_0]
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI108_1]
; CHECK-SD-NEXT: mov v0.s[1], w1
; CHECK-SD-NEXT: mov v1.s[1], w5
; CHECK-SD-NEXT: mov v0.s[2], w2
; CHECK-SD-NEXT: mov v1.s[2], w6
; CHECK-SD-NEXT: mov v0.s[3], w3
-; CHECK-SD-NEXT: ushl v2.4s, v1.4s, v2.4s
-; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v3.4s
-; CHECK-SD-NEXT: shl v4.4s, v0.4s, #3
-; CHECK-SD-NEXT: usra v4.4s, v0.4s, #29
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: mov w1, v4.s[1]
-; CHECK-SD-NEXT: mov w2, v4.s[2]
-; CHECK-SD-NEXT: mov w3, v4.s[3]
-; CHECK-SD-NEXT: mov w5, v0.s[1]
-; CHECK-SD-NEXT: mov w6, v0.s[2]
-; CHECK-SD-NEXT: fmov w0, s4
-; CHECK-SD-NEXT: fmov w4, s0
+; CHECK-SD-NEXT: shl v3.4s, v1.4s, #3
+; CHECK-SD-NEXT: usra v3.4s, v1.4s, #29
+; CHECK-SD-NEXT: shl v2.4s, v0.4s, #3
+; CHECK-SD-NEXT: mov w5, v3.s[1]
+; CHECK-SD-NEXT: mov w6, v3.s[2]
+; CHECK-SD-NEXT: fmov w4, s3
+; CHECK-SD-NEXT: usra v2.4s, v0.4s, #29
+; CHECK-SD-NEXT: mov w1, v2.s[1]
+; CHECK-SD-NEXT: mov w2, v2.s[2]
+; CHECK-SD-NEXT: mov w3, v2.s[3]
+; CHECK-SD-NEXT: fmov w0, s2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rotl_v7i32_c:
@@ -3614,27 +3609,22 @@ define <7 x i32> @rotr_v7i32_c(<7 x i32> %a) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: fmov s1, w4
-; CHECK-SD-NEXT: adrp x8, .LCPI109_0
-; CHECK-SD-NEXT: adrp x9, .LCPI109_1
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI109_0]
-; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI109_1]
; CHECK-SD-NEXT: mov v0.s[1], w1
; CHECK-SD-NEXT: mov v1.s[1], w5
; CHECK-SD-NEXT: mov v0.s[2], w2
; CHECK-SD-NEXT: mov v1.s[2], w6
; CHECK-SD-NEXT: mov v0.s[3], w3
-; CHECK-SD-NEXT: ushl v2.4s, v1.4s, v2.4s
-; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v3.4s
-; CHECK-SD-NEXT: shl v4.4s, v0.4s, #29
-; CHECK-SD-NEXT: usra v4.4s, v0.4s, #3
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: mov w1, v4.s[1]
-; CHECK-SD-NEXT: mov w2, v4.s[2]
-; CHECK-SD-NEXT: mov w3, v4.s[3]
-; CHECK-SD-NEXT: mov w5, v0.s[1]
-; CHECK-SD-NEXT: mov w6, v0.s[2]
-; CHECK-SD-NEXT: fmov w0, s4
-; CHECK-SD-NEXT: fmov w4, s0
+; CHECK-SD-NEXT: shl v3.4s, v1.4s, #29
+; CHECK-SD-NEXT: usra v3.4s, v1.4s, #3
+; CHECK-SD-NEXT: shl v2.4s, v0.4s, #29
+; CHECK-SD-NEXT: mov w5, v3.s[1]
+; CHECK-SD-NEXT: mov w6, v3.s[2]
+; CHECK-SD-NEXT: fmov w4, s3
+; CHECK-SD-NEXT: usra v2.4s, v0.4s, #3
+; CHECK-SD-NEXT: mov w1, v2.s[1]
+; CHECK-SD-NEXT: mov w2, v2.s[2]
+; CHECK-SD-NEXT: mov w3, v2.s[3]
+; CHECK-SD-NEXT: fmov w0, s2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rotr_v7i32_c:
@@ -4132,36 +4122,31 @@ define <7 x i32> @fshl_v7i32_c(<7 x i32> %a, <7 x i32> %b) {
; CHECK-SD-LABEL: fshl_v7i32_c:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: fmov s2, w4
-; CHECK-SD-NEXT: ldr s1, [sp, #24]
-; CHECK-SD-NEXT: fmov s3, w7
+; CHECK-SD-NEXT: fmov s1, w4
; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-NEXT: fmov s2, w7
+; CHECK-SD-NEXT: ldr s3, [sp, #24]
; CHECK-SD-NEXT: add x9, sp, #32
-; CHECK-SD-NEXT: ld1 { v1.s }[1], [x9]
-; CHECK-SD-NEXT: add x9, sp, #40
-; CHECK-SD-NEXT: adrp x10, .LCPI134_1
; CHECK-SD-NEXT: mov v0.s[1], w1
-; CHECK-SD-NEXT: mov v2.s[1], w5
-; CHECK-SD-NEXT: ldr q5, [x10, :lo12:.LCPI134_1]
-; CHECK-SD-NEXT: ld1 { v3.s }[1], [x8]
+; CHECK-SD-NEXT: mov v1.s[1], w5
+; CHECK-SD-NEXT: ld1 { v3.s }[1], [x9]
+; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8]
; CHECK-SD-NEXT: add x8, sp, #8
-; CHECK-SD-NEXT: ld1 { v1.s }[2], [x9]
-; CHECK-SD-NEXT: add x9, sp, #16
+; CHECK-SD-NEXT: add x9, sp, #40
+; CHECK-SD-NEXT: ld1 { v3.s }[2], [x9]
; CHECK-SD-NEXT: mov v0.s[2], w2
-; CHECK-SD-NEXT: mov v2.s[2], w6
-; CHECK-SD-NEXT: ld1 { v3.s }[2], [x8]
-; CHECK-SD-NEXT: adrp x8, .LCPI134_0
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI134_0]
-; CHECK-SD-NEXT: ld1 { v3.s }[3], [x9]
+; CHECK-SD-NEXT: mov v1.s[2], w6
+; CHECK-SD-NEXT: ld1 { v2.s }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #16
+; CHECK-SD-NEXT: ld1 { v2.s }[3], [x8]
; CHECK-SD-NEXT: mov v0.s[3], w3
-; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: ushl v2.4s, v2.4s, v5.4s
-; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT: shl v1.4s, v1.4s, #3
+; CHECK-SD-NEXT: usra v1.4s, v3.4s, #29
; CHECK-SD-NEXT: shl v0.4s, v0.4s, #3
; CHECK-SD-NEXT: mov w5, v1.s[1]
; CHECK-SD-NEXT: mov w6, v1.s[2]
; CHECK-SD-NEXT: fmov w4, s1
-; CHECK-SD-NEXT: usra v0.4s, v3.4s, #29
+; CHECK-SD-NEXT: usra v0.4s, v2.4s, #29
; CHECK-SD-NEXT: mov w1, v0.s[1]
; CHECK-SD-NEXT: mov w2, v0.s[2]
; CHECK-SD-NEXT: mov w3, v0.s[3]
@@ -4225,36 +4210,31 @@ define <7 x i32> @fshr_v7i32_c(<7 x i32> %a, <7 x i32> %b) {
; CHECK-SD-LABEL: fshr_v7i32_c:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: fmov s2, w4
-; CHECK-SD-NEXT: ldr s1, [sp, #24]
-; CHECK-SD-NEXT: fmov s3, w7
+; CHECK-SD-NEXT: fmov s1, w4
; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-NEXT: fmov s2, w7
+; CHECK-SD-NEXT: ldr s3, [sp, #24]
; CHECK-SD-NEXT: add x9, sp, #32
-; CHECK-SD-NEXT: ld1 { v1.s }[1], [x9]
-; CHECK-SD-NEXT: add x9, sp, #40
-; CHECK-SD-NEXT: adrp x10, .LCPI135_1
; CHECK-SD-NEXT: mov v0.s[1], w1
-; CHECK-SD-NEXT: mov v2.s[1], w5
-; CHECK-SD-NEXT: ldr q5, [x10, :lo12:.LCPI135_1]
-; CHECK-SD-NEXT: ld1 { v3.s }[1], [x8]
+; CHECK-SD-NEXT: mov v1.s[1], w5
+; CHECK-SD-NEXT: ld1 { v3.s }[1], [x9]
+; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8]
; CHECK-SD-NEXT: add x8, sp, #8
-; CHECK-SD-NEXT: ld1 { v1.s }[2], [x9]
-; CHECK-SD-NEXT: add x9, sp, #16
+; CHECK-SD-NEXT: add x9, sp, #40
+; CHECK-SD-NEXT: ld1 { v3.s }[2], [x9]
; CHECK-SD-NEXT: mov v0.s[2], w2
-; CHECK-SD-NEXT: mov v2.s[2], w6
-; CHECK-SD-NEXT: ld1 { v3.s }[2], [x8]
-; CHECK-SD-NEXT: adrp x8, .LCPI135_0
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI135_0]
-; CHECK-SD-NEXT: ld1 { v3.s }[3], [x9]
+; CHECK-SD-NEXT: mov v1.s[2], w6
+; CHECK-SD-NEXT: ld1 { v2.s }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #16
+; CHECK-SD-NEXT: ld1 { v2.s }[3], [x8]
; CHECK-SD-NEXT: mov v0.s[3], w3
-; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: ushl v2.4s, v2.4s, v5.4s
-; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b
+; CHECK-SD-NEXT: shl v1.4s, v1.4s, #29
+; CHECK-SD-NEXT: usra v1.4s, v3.4s, #3
; CHECK-SD-NEXT: shl v0.4s, v0.4s, #29
; CHECK-SD-NEXT: mov w5, v1.s[1]
; CHECK-SD-NEXT: mov w6, v1.s[2]
; CHECK-SD-NEXT: fmov w4, s1
-; CHECK-SD-NEXT: usra v0.4s, v3.4s, #3
+; CHECK-SD-NEXT: usra v0.4s, v2.4s, #3
; CHECK-SD-NEXT: mov w1, v0.s[1]
; CHECK-SD-NEXT: mov w2, v0.s[2]
; CHECK-SD-NEXT: mov w3, v0.s[3]
diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
index b85cb3a4f191c..6fff0d9b155ef 100644
--- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
@@ -450,7 +450,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; ARM7-NEXT: .short 9 @ 0x9
; ARM7-NEXT: .short 10 @ 0xa
; ARM7-NEXT: .short 10 @ 0xa
-; ARM7-NEXT: .short 10 @ 0xa
+; ARM7-NEXT: .short 0 @ 0x0
; ARM7-NEXT: .LCPI4_4:
; ARM7-NEXT: .short 341 @ 0x155
; ARM7-NEXT: .short 292 @ 0x124
@@ -502,7 +502,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; ARM8-NEXT: .short 9 @ 0x9
; ARM8-NEXT: .short 10 @ 0xa
; ARM8-NEXT: .short 10 @ 0xa
-; ARM8-NEXT: .short 10 @ 0xa
+; ARM8-NEXT: .short 0 @ 0x0
; ARM8-NEXT: .LCPI4_4:
; ARM8-NEXT: .short 341 @ 0x155
; ARM8-NEXT: .short 292 @ 0x124
@@ -554,7 +554,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; NEON7-NEXT: .short 9 @ 0x9
; NEON7-NEXT: .short 10 @ 0xa
; NEON7-NEXT: .short 10 @ 0xa
-; NEON7-NEXT: .short 10 @ 0xa
+; NEON7-NEXT: .short 0 @ 0x0
; NEON7-NEXT: .LCPI4_4:
; NEON7-NEXT: .short 341 @ 0x155
; NEON7-NEXT: .short 292 @ 0x124
@@ -606,7 +606,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; NEON8-NEXT: .short 9 @ 0x9
; NEON8-NEXT: .short 10 @ 0xa
; NEON8-NEXT: .short 10 @ 0xa
-; NEON8-NEXT: .short 10 @ 0xa
+; NEON8-NEXT: .short 0 @ 0x0
; NEON8-NEXT: .LCPI4_4:
; NEON8-NEXT: .short 341 @ 0x155
; NEON8-NEXT: .short 292 @ 0x124
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index a2fcd7962b8b0..5567310bb2a61 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -8,25 +8,15 @@
; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3
define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
-; RV32-LABEL: load_factor2_v3:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: vle32.v v10, (a0)
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vnsrl.wi v8, v10, 0
-; RV32-NEXT: vnsrl.wx v9, v10, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: load_factor2_v3:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: vle32.v v10, (a0)
-; RV64-NEXT: li a0, 32
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vnsrl.wx v9, v10, a0
-; RV64-NEXT: vnsrl.wi v8, v10, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: load_factor2_v3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wx v9, v10, a0
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: ret
%interleaved.vec = load <6 x i32>, ptr %ptr
%v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>
%v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 1, i32 3, i32 5>
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 636fdfae68438..ba9c926c57152 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -579,7 +579,7 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32MV-NEXT: vmv.v.x v10, a3
; RV32MV-NEXT: srli a3, a1, 22
; RV32MV-NEXT: or a2, a3, a2
-; RV32MV-NEXT: lui a3, 41121
+; RV32MV-NEXT: lui a3, 161
; RV32MV-NEXT: slli a1, a1, 10
; RV32MV-NEXT: srli a1, a1, 21
; RV32MV-NEXT: vslide1down.vx v10, v10, a1
@@ -636,7 +636,7 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64MV-NEXT: lui a3, %hi(.LCPI4_0)
; RV64MV-NEXT: addi a3, a3, %lo(.LCPI4_0)
; RV64MV-NEXT: vle16.v v9, (a3)
-; RV64MV-NEXT: lui a3, 41121
+; RV64MV-NEXT: lui a3, 161
; RV64MV-NEXT: slli a2, a2, 32
; RV64MV-NEXT: or a1, a1, a2
; RV64MV-NEXT: andi a2, a1, 2047
diff --git a/llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll
index a0247c29f257f..e5350409cd6ba 100644
--- a/llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll
@@ -117,7 +117,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: .short 9 @ 0x9
; CHECK-NEXT: .short 10 @ 0xa
; CHECK-NEXT: .short 10 @ 0xa
-; CHECK-NEXT: .short 10 @ 0xa
+; CHECK-NEXT: .short 0 @ 0x0
; CHECK-NEXT: .LCPI4_4:
; CHECK-NEXT: .short 341 @ 0x155
; CHECK-NEXT: .short 292 @ 0x124
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index 81529aff39ff1..19c84d42a7ea6 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -1141,8 +1141,8 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: subq $56, %rsp
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-AVX2-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,0,0,0,0,0,0]
; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
; CHECK-AVX2-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax
@@ -1171,8 +1171,8 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
;
; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
; CHECK-ONLY-AVX512F: # %bb.0:
+; CHECK-ONLY-AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
; CHECK-ONLY-AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-ONLY-AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,0,0,0,0,0,0]
; CHECK-ONLY-AVX512F-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
; CHECK-ONLY-AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; CHECK-ONLY-AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 4b0f75df83a76..ac4554176c3e7 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -679,39 +679,39 @@ define void @interleave_24i8_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,3,2,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,6,5,4,7]
; SSE2-NEXT: packuswb %xmm4, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,255,255,0,255,255,0,255,255,255,255,255,255,255,255,255]
-; SSE2-NEXT: movdqa %xmm0, %xmm6
-; SSE2-NEXT: pand %xmm5, %xmm6
-; SSE2-NEXT: pandn %xmm1, %xmm5
-; SSE2-NEXT: por %xmm6, %xmm5
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,0,65535,65535,0,65535,65535]
-; SSE2-NEXT: pand %xmm6, %xmm5
-; SSE2-NEXT: pandn %xmm3, %xmm6
-; SSE2-NEXT: por %xmm5, %xmm6
-; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm6[2,1,0,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,6,5,4,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,3,2,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[1,2,3,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,5,6,7,4]
-; SSE2-NEXT: packuswb %xmm5, %xmm5
-; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,255,0,255,255,0,255,255,255,255,255,255,255,255]
-; SSE2-NEXT: pand %xmm6, %xmm0
-; SSE2-NEXT: pandn %xmm1, %xmm6
-; SSE2-NEXT: por %xmm0, %xmm6
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
+; SSE2-NEXT: movq %xmm4, (%rsi)
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [0,255,255,0,255,255,0,255,255,255,255,255,255,255,255,255]
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,0,65535,65535,0,65535,65535]
+; SSE2-NEXT: pand %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[2,1,0,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,6,5,4,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,6,7,4]
+; SSE2-NEXT: packuswb %xmm4, %xmm4
+; SSE2-NEXT: movq %xmm4, (%rdx)
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,255,0,255,255,0,255,255,255,255,255,255,255,255]
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm0, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,65535,65535,0,65535,65535,0,65535]
-; SSE2-NEXT: pand %xmm0, %xmm6
+; SSE2-NEXT: pand %xmm0, %xmm4
; SSE2-NEXT: pandn %xmm3, %xmm0
-; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,5]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,0,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: movq %xmm4, (%rsi)
-; SSE2-NEXT: movq %xmm5, (%rdx)
; SSE2-NEXT: movq %xmm0, (%rcx)
; SSE2-NEXT: retq
;
@@ -724,16 +724,16 @@ define void @interleave_24i8_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; SSE42-NEXT: movdqa %xmm0, %xmm3
; SSE42-NEXT: pshufb {{.*#+}} xmm3 = xmm3[0,3,6,9,12,15],zero,zero,xmm3[u,u,u,u,u,u,u,u]
; SSE42-NEXT: por %xmm2, %xmm3
+; SSE42-NEXT: movq %xmm3, (%rsi)
; SSE42-NEXT: movdqa %xmm1, %xmm2
; SSE42-NEXT: pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,xmm2[0,3,6,u,u,u,u,u,u,u,u]
-; SSE42-NEXT: movdqa %xmm0, %xmm4
-; SSE42-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,4,7,10,13],zero,zero,zero,xmm4[u,u,u,u,u,u,u,u]
-; SSE42-NEXT: por %xmm2, %xmm4
+; SSE42-NEXT: movdqa %xmm0, %xmm3
+; SSE42-NEXT: pshufb {{.*#+}} xmm3 = xmm3[1,4,7,10,13],zero,zero,zero,xmm3[u,u,u,u,u,u,u,u]
+; SSE42-NEXT: por %xmm2, %xmm3
+; SSE42-NEXT: movq %xmm3, (%rdx)
; SSE42-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,xmm1[1,4,7,u,u,u,u,u,u,u,u]
; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,5,8,11,14],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; SSE42-NEXT: por %xmm1, %xmm0
-; SSE42-NEXT: movq %xmm3, (%rsi)
-; SSE42-NEXT: movq %xmm4, (%rdx)
; SSE42-NEXT: movq %xmm0, (%rcx)
; SSE42-NEXT: retq
;
@@ -744,14 +744,14 @@ define void @interleave_24i8_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm1[2,5,u,u,u,u,u,u,u,u]
; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[0,3,6,9,12,15],zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,zero,zero,zero,xmm1[0,3,6,u,u,u,u,u,u,u,u]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm0[1,4,7,10,13],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vmovq %xmm2, (%rsi)
+; AVX1-NEXT: vpshufb {{.*#+}} x...
[truncated]
|
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RISC-V changes LGTM
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
X86 LGTM (little bit of work to do for urem-seteq-illegal-types.ll regression but nothing of note)
ea2a849 to
45e4b3d
Compare
Test changes are mostly noise. There are a few improvements and a few regressions.

Test changes are mostly noise. There are a few improvements and a few regressions.