diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 572cfdad3c93b..923af28c8cf34 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44964,7 +44964,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( } case X86ISD::VPMADD52L: case X86ISD::VPMADD52H: { - KnownBits KnownOp0, KnownOp1; + KnownBits KnownOp0, KnownOp1, KnownOp2; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); @@ -44979,6 +44979,10 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( TLO, Depth + 1)) return true; + if (SimplifyDemandedBits(Op2, APInt::getAllOnes(64), OriginalDemandedElts, + KnownOp2, TLO, Depth + 1)) + return true; + KnownBits KnownMul; KnownOp0 = KnownOp0.trunc(52); KnownOp1 = KnownOp1.trunc(52); @@ -44993,8 +44997,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ADD, DL, VT, C, Op2)); } - // TODO: Compute the known bits for VPMADD52L/VPMADD52H. - break; + Known = KnownBits::add(KnownMul, KnownOp2); + return false; } } diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll index 9afc1119267ec..2cb060ea92b14 100644 --- a/llvm/test/CodeGen/X86/combine-vpmadd52.ll +++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll @@ -290,3 +290,111 @@ define <2 x i64> @test_vpmadd52h_mul_hi52_negative(<2 x i64> %x0, <2 x i64> %x1, %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2) ret <2 x i64> %1 } + +define <2 x i64> @test1_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test1_knownbits_vpmadd52l: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 4) + %and2 = and <2 x i64> %x1, splat (i64 4) + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test1_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test1_knownbits_vpmadd52h: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 3), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 3) + ret <2 x i64> %ret +} + +define <2 x i64> @test2_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test2_knownbits_vpmadd52l: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1234,1234] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 67108864) ; 1LL << 26 + %and2 = and <2 x i64> %x1, splat (i64 33554432) ; 1LL << 25 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1234), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1234) + ret <2 x i64> %ret +} + +define <2 x i64> @test2_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; CHECK-LABEL: test2_knownbits_vpmadd52h: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + ; add (1LL << 20) + 1 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1025), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test3_knownbits_vpmadd52l_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_knownbits_vpmadd52l_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX512-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovdqa %xmm2, %xmm3 +; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm3 +; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_knownbits_vpmadd52l_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa %xmm2, %xmm3 +; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 67108865) ; (1LL << 26) + 1 + %or = or <2 x i64> %x1, splat (i64 1) + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %or) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +} + +define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_knownbits_vpmadd52h_negative: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX512-NEXT: vmovdqa %xmm2, %xmm3 +; AVX512-NEXT: vpmadd52huq %xmm1, %xmm0, %xmm3 +; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_knownbits_vpmadd52h_negative: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vmovdqa %xmm2, %xmm3 +; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm0, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0 +; AVX-NEXT: retq + %and1 = and <2 x i64> %x0, splat (i64 4194304) ; 1LL << 22 + %and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30 + ; add (1LL << 20) + 1 + %madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2) + %ret = and <2 x i64> %madd, splat (i64 1) + ret <2 x i64> %ret +}