diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b10d8a80c8c9b..819e8ccd5c33f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24555,6 +24555,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false)) return R; return performFlagSettingCombine(N, DCI, AArch64ISD::SBC); + case AArch64ISD::BICi: { + APInt DemandedBits = + APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits()); + APInt DemandedElts = + APInt::getAllOnes(N->getValueType(0).getVectorNumElements()); + + if (DAG.getTargetLoweringInfo().SimplifyDemandedBits( + SDValue(N, 0), DemandedBits, DemandedElts, DCI)) + return SDValue(); + + break; + } case ISD::XOR: return performXorCombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -27595,6 +27607,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode( // used - simplify to just Val. return TLO.CombineTo(Op, ShiftR->getOperand(0)); } + case AArch64ISD::BICi: { + // Fold BICi if all destination bits already known to be zeroed + SDValue Op0 = Op.getOperand(0); + KnownBits KnownOp0 = + TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1); + // Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2)) + uint64_t BitsToClear = Op->getConstantOperandVal(1) + << Op->getConstantOperandVal(2); + APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero; + if (APInt(Known.getBitWidth(), BitsToClear) + .isSubsetOf(AlreadyZeroedBitsToClear)) + return TLO.CombineTo(Op, Op0); + + Known = KnownOp0 & + KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear)); + + return false; + } case ISD::INTRINSIC_WO_CHAIN: { if (auto ElementSize = IsSVECntIntrinsic(Op)) { unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits(); diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll index 017f382774892..f36b8440fe4bf 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll @@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16>