Skip to content

Commit

Permalink
[TargetLowering] SimplifyDemandedBits - fix SCALAR_TO_VECTOR knownbit…
Browse files Browse the repository at this point in the history
…s bug

We can only report the knownbits for a SCALAR_TO_VECTOR node if we only demand the 0'th element - the upper elements are undefined and shouldn't be trusted.

This is causing a number of regressions that need addressing but we need to get the bugfix in first.
  • Loading branch information
RKSimon committed Feb 28, 2020
1 parent 2809abb commit 4bc6f63
Show file tree
Hide file tree
Showing 13 changed files with 318 additions and 209 deletions.
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -875,7 +875,11 @@ bool TargetLowering::SimplifyDemandedBits(
APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
Known = SrcKnown.anyextOrTrunc(BitWidth);

// Upper elements are undef, so only get the knownbits if we just demand
// the bottom element.
if (DemandedElts == 1)
Known = SrcKnown.anyextOrTrunc(BitWidth);
break;
}
case ISD::BUILD_VECTOR:
Expand Down
24 changes: 13 additions & 11 deletions llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
Expand Up @@ -356,22 +356,24 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
; CHECK-LABEL: test16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 1
; CHECK-NEXT: lxsihzx v2, r3, r4
; CHECK-NEXT: vsplth v2, v2, 3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: vsplth v4, v3, 7
; CHECK-NEXT: add r6, r3, r4
; CHECK-NEXT: li r3, 16
; CHECK-NEXT: vmrglw v2, v2, v4
; CHECK-NEXT: lxsihzx v4, r6, r3
; CHECK-NEXT: li r7, 16
; CHECK-NEXT: lxsihzx v2, r6, r7
; CHECK-NEXT: lxsihzx v4, r3, r4
; CHECK-NEXT: li r6, 0
; CHECK-NEXT: mtvsrd f0, r6
; CHECK-NEXT: vsplth v4, v4, 3
; CHECK-NEXT: xxswapd v3, vs0
; CHECK-NEXT: vsplth v2, v2, 3
; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l
; CHECK-NEXT: vsplth v4, v4, 3
; CHECK-NEXT: vmrglh v3, v3, v4
; CHECK-NEXT: vmrglh v4, v3, v4
; CHECK-NEXT: vmrglh v2, v3, v2
; CHECK-NEXT: vsplth v3, v3, 7
; CHECK-NEXT: vmrglw v3, v4, v3
; CHECK-NEXT: lxvx v4, 0, r3
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: vperm v2, v3, v2, v4
; CHECK-NEXT: vperm v2, v2, v3, v4
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: vextuwrx r3, r3, v2
Expand Down
96 changes: 52 additions & 44 deletions llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
Expand Up @@ -1071,7 +1071,6 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; P9LE-NEXT: extsw r4, r4
; P9LE-NEXT: mulld r5, r4, r5
; P9LE-NEXT: rldicl r5, r5, 32, 32
; P9LE-NEXT: xxlxor v4, v4, v4
; P9LE-NEXT: add r4, r5, r4
; P9LE-NEXT: srwi r5, r4, 31
; P9LE-NEXT: srawi r4, r4, 9
Expand All @@ -1080,6 +1079,9 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; P9LE-NEXT: mulli r4, r4, 654
; P9LE-NEXT: subf r3, r4, r3
; P9LE-NEXT: mtvsrd f0, r3
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: mtvsrd f0, r3
; P9LE-NEXT: li r3, 4
; P9LE-NEXT: vextuhrx r3, r3, v2
; P9LE-NEXT: extsh r4, r3
Expand All @@ -1094,7 +1096,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; P9LE-NEXT: lis r5, 24749
; P9LE-NEXT: mulli r4, r4, 23
; P9LE-NEXT: subf r3, r4, r3
; P9LE-NEXT: xxswapd v3, vs0
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtvsrd f0, r3
; P9LE-NEXT: li r3, 6
; P9LE-NEXT: vextuhrx r3, r3, v2
Expand Down Expand Up @@ -1179,7 +1181,6 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; P8LE-NEXT: lis r3, 24749
; P8LE-NEXT: lis r8, -19946
; P8LE-NEXT: lis r10, -14230
; P8LE-NEXT: xxlxor v5, v5, v5
; P8LE-NEXT: ori r3, r3, 47143
; P8LE-NEXT: ori r8, r8, 17097
; P8LE-NEXT: mfvsrd r4, f0
Expand Down Expand Up @@ -1212,18 +1213,21 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; P8LE-NEXT: mulli r3, r3, 5423
; P8LE-NEXT: add r7, r7, r9
; P8LE-NEXT: mulli r8, r8, 23
; P8LE-NEXT: li r9, 0
; P8LE-NEXT: mulli r7, r7, 654
; P8LE-NEXT: mtvsrd f0, r9
; P8LE-NEXT: subf r3, r3, r5
; P8LE-NEXT: mtvsrd f0, r3
; P8LE-NEXT: subf r3, r8, r6
; P8LE-NEXT: subf r4, r7, r4
; P8LE-NEXT: xxswapd v4, vs0
; P8LE-NEXT: subf r5, r8, r6
; P8LE-NEXT: mtvsrd f1, r3
; P8LE-NEXT: mtvsrd f2, r4
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: subf r3, r7, r4
; P8LE-NEXT: mtvsrd f2, r5
; P8LE-NEXT: mtvsrd f3, r3
; P8LE-NEXT: xxswapd v2, vs1
; P8LE-NEXT: xxswapd v3, vs2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v2, v2, v3
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: vmrglh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
;
Expand Down Expand Up @@ -1328,9 +1332,11 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
; P9LE-NEXT: subf r3, r4, r3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtvsrd f0, r3
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: mtvsrd f0, r3
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxlxor v4, v4, v4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v3, v2
; P9LE-NEXT: blr
Expand Down Expand Up @@ -1388,47 +1394,49 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
; P8LE-LABEL: dont_fold_urem_i16_smax:
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: lis r6, 24749
; P8LE-NEXT: lis r7, -19946
; P8LE-NEXT: xxlxor v5, v5, v5
; P8LE-NEXT: ori r6, r6, 47143
; P8LE-NEXT: ori r7, r7, 17097
; P8LE-NEXT: mfvsrd r3, f0
; P8LE-NEXT: rldicl r4, r3, 16, 48
; P8LE-NEXT: rldicl r5, r3, 32, 48
; P8LE-NEXT: extsh r8, r4
; P8LE-NEXT: extsh r9, r5
; P8LE-NEXT: extsw r8, r8
; P8LE-NEXT: lis r3, 24749
; P8LE-NEXT: lis r8, -19946
; P8LE-NEXT: ori r3, r3, 47143
; P8LE-NEXT: ori r8, r8, 17097
; P8LE-NEXT: mfvsrd r4, f0
; P8LE-NEXT: rldicl r5, r4, 16, 48
; P8LE-NEXT: rldicl r6, r4, 32, 48
; P8LE-NEXT: extsh r7, r5
; P8LE-NEXT: extsh r9, r6
; P8LE-NEXT: extsw r7, r7
; P8LE-NEXT: extsw r9, r9
; P8LE-NEXT: mulld r6, r8, r6
; P8LE-NEXT: mulld r7, r9, r7
; P8LE-NEXT: rldicl r3, r3, 48, 48
; P8LE-NEXT: rldicl r8, r6, 32, 32
; P8LE-NEXT: mulld r3, r7, r3
; P8LE-NEXT: mulld r7, r9, r8
; P8LE-NEXT: rldicl r4, r4, 48, 48
; P8LE-NEXT: rldicl r8, r3, 1, 63
; P8LE-NEXT: rldicl r3, r3, 32, 32
; P8LE-NEXT: rldicl r7, r7, 32, 32
; P8LE-NEXT: rldicl r6, r6, 1, 63
; P8LE-NEXT: srawi r8, r8, 11
; P8LE-NEXT: srawi r3, r3, 11
; P8LE-NEXT: add r7, r7, r9
; P8LE-NEXT: add r6, r8, r6
; P8LE-NEXT: add r3, r3, r8
; P8LE-NEXT: li r9, 0
; P8LE-NEXT: srwi r8, r7, 31
; P8LE-NEXT: srawi r7, r7, 4
; P8LE-NEXT: mulli r6, r6, 5423
; P8LE-NEXT: mtvsrd f0, r9
; P8LE-NEXT: mulli r3, r3, 5423
; P8LE-NEXT: add r7, r7, r8
; P8LE-NEXT: extsh r8, r3
; P8LE-NEXT: extsh r8, r4
; P8LE-NEXT: mulli r7, r7, 23
; P8LE-NEXT: srawi r8, r8, 15
; P8LE-NEXT: subf r4, r6, r4
; P8LE-NEXT: addze r6, r8
; P8LE-NEXT: mtvsrd f0, r4
; P8LE-NEXT: slwi r4, r6, 15
; P8LE-NEXT: subf r5, r7, r5
; P8LE-NEXT: subf r3, r4, r3
; P8LE-NEXT: mtvsrd f1, r5
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtvsrd f2, r3
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: xxswapd v4, vs0
; P8LE-NEXT: subf r3, r3, r5
; P8LE-NEXT: addze r5, r8
; P8LE-NEXT: slwi r5, r5, 15
; P8LE-NEXT: subf r6, r7, r6
; P8LE-NEXT: mtvsrd f1, r3
; P8LE-NEXT: subf r3, r5, r4
; P8LE-NEXT: mtvsrd f2, r6
; P8LE-NEXT: mtvsrd f3, r3
; P8LE-NEXT: xxswapd v2, vs1
; P8LE-NEXT: xxswapd v3, vs2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v2, v2, v3
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: vmrglh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
;
Expand Down
60 changes: 32 additions & 28 deletions llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
Expand Up @@ -1006,9 +1006,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; P9LE-NEXT: subf r3, r4, r3
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: mtvsrd f0, r3
; P9LE-NEXT: li r3, 0
; P9LE-NEXT: xxswapd v2, vs0
; P9LE-NEXT: mtvsrd f0, r3
; P9LE-NEXT: vmrglh v3, v4, v3
; P9LE-NEXT: xxlxor v4, v4, v4
; P9LE-NEXT: xxswapd v4, vs0
; P9LE-NEXT: vmrglh v2, v2, v4
; P9LE-NEXT: vmrglw v2, v3, v2
; P9LE-NEXT: blr
Expand Down Expand Up @@ -1064,41 +1066,43 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; P8LE: # %bb.0:
; P8LE-NEXT: xxswapd vs0, v2
; P8LE-NEXT: li r3, 0
; P8LE-NEXT: lis r8, 24749
; P8LE-NEXT: xxlxor v5, v5, v5
; P8LE-NEXT: oris r5, r3, 45590
; P8LE-NEXT: ori r8, r8, 47143
; P8LE-NEXT: oris r3, r3, 51306
; P8LE-NEXT: ori r5, r5, 17097
; P8LE-NEXT: ori r3, r3, 30865
; P8LE-NEXT: lis r9, 24749
; P8LE-NEXT: oris r5, r3, 51306
; P8LE-NEXT: oris r3, r3, 45590
; P8LE-NEXT: ori r9, r9, 47143
; P8LE-NEXT: ori r5, r5, 30865
; P8LE-NEXT: ori r3, r3, 17097
; P8LE-NEXT: mfvsrd r4, f0
; P8LE-NEXT: rldicl r6, r4, 32, 48
; P8LE-NEXT: rldicl r7, r4, 16, 48
; P8LE-NEXT: rlwinm r9, r6, 0, 16, 31
; P8LE-NEXT: rldicl r4, r4, 48, 48
; P8LE-NEXT: mulld r5, r9, r5
; P8LE-NEXT: rlwinm r9, r7, 0, 16, 31
; P8LE-NEXT: mulld r8, r9, r8
; P8LE-NEXT: rlwinm r9, r4, 31, 17, 31
; P8LE-NEXT: mulld r3, r9, r3
; P8LE-NEXT: rldicl r5, r5, 28, 36
; P8LE-NEXT: rldicl r6, r4, 48, 48
; P8LE-NEXT: rldicl r7, r4, 32, 48
; P8LE-NEXT: rlwinm r8, r6, 31, 17, 31
; P8LE-NEXT: rldicl r4, r4, 16, 48
; P8LE-NEXT: mulld r5, r8, r5
; P8LE-NEXT: rlwinm r8, r7, 0, 16, 31
; P8LE-NEXT: mulld r3, r8, r3
; P8LE-NEXT: rlwinm r8, r4, 0, 16, 31
; P8LE-NEXT: mulld r8, r8, r9
; P8LE-NEXT: li r9, 0
; P8LE-NEXT: mtvsrd f0, r9
; P8LE-NEXT: rldicl r5, r5, 24, 40
; P8LE-NEXT: rldicl r3, r3, 28, 36
; P8LE-NEXT: mulli r5, r5, 654
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: rldicl r8, r8, 21, 43
; P8LE-NEXT: mulli r5, r5, 23
; P8LE-NEXT: rldicl r3, r3, 24, 40
; P8LE-NEXT: mulli r3, r3, 23
; P8LE-NEXT: mulli r8, r8, 5423
; P8LE-NEXT: mulli r3, r3, 654
; P8LE-NEXT: subf r5, r5, r6
; P8LE-NEXT: subf r6, r8, r7
; P8LE-NEXT: mtvsrd f0, r5
; P8LE-NEXT: subf r3, r3, r4
; P8LE-NEXT: mtvsrd f1, r6
; P8LE-NEXT: subf r3, r3, r7
; P8LE-NEXT: mtvsrd f1, r5
; P8LE-NEXT: subf r4, r8, r4
; P8LE-NEXT: mtvsrd f2, r3
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: mtvsrd f3, r4
; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v4, vs2
; P8LE-NEXT: xxswapd v5, vs3
; P8LE-NEXT: vmrglh v2, v3, v2
; P8LE-NEXT: vmrglh v3, v4, v5
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: vmrglh v3, v5, v4
; P8LE-NEXT: vmrglw v2, v3, v2
; P8LE-NEXT: blr
;
; P8BE-LABEL: dont_fold_urem_one:
Expand Down
14 changes: 10 additions & 4 deletions llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
Expand Up @@ -1400,7 +1400,9 @@ define <4 x i64> @f4xi64_i128(<4 x i64> %a) {
; AVX-64-LABEL: f4xi64_i128:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; AVX-64-NEXT: movl $1, %eax
; AVX-64-NEXT: vmovq %rax, %xmm2
; AVX-64-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1
; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -1458,15 +1460,17 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) {
; AVX-64-LABEL: f8xi64_i128:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; AVX-64-NEXT: movl $1, %eax
; AVX-64-NEXT: vmovq %rax, %xmm3
; AVX-64-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddq %xmm3, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddq %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,0,1]
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
Expand Down Expand Up @@ -1535,7 +1539,9 @@ define <8 x i64> @f8xi64_i256(<8 x i64> %a) {
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3]
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; AVX-64-NEXT: movl $1, %eax
; AVX-64-NEXT: vmovq %rax, %xmm4
; AVX-64-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
; AVX-64-NEXT: vpaddq %xmm4, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
Expand Down

0 comments on commit 4bc6f63

Please sign in to comment.