Skip to content

Commit

Permalink
[AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi ha…
Browse files Browse the repository at this point in the history
…ndling (#76644)

Fold BICi if all destination bits are already known to be zeroes

```llvm
define <8 x i16> @haddu_known(<8 x i8> %a0, <8 x i8> %a1) {
  %x0 = zext <8 x i8> %a0 to <8 x i16>
  %x1 = zext <8 x i8> %a1 to <8 x i16>
  %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
  %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
```

```
haddu_known:                            // @haddu_known
        ushll   v0.8h, v0.8b, #0
        ushll   v1.8h, v1.8b, #0
        uhadd   v0.8h, v0.8h, v1.8h
        bic     v0.8h, #254, lsl #8 <-- this one will be removed as we know high bits are zero extended
        ret
```

Fixes #53881
Fixes #53622
  • Loading branch information
snikitav committed Apr 6, 2024
1 parent 4cb110a commit d38bff4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24555,6 +24555,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
case AArch64ISD::BICi: {
APInt DemandedBits =
APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
APInt DemandedElts =
APInt::getAllOnes(N->getValueType(0).getVectorNumElements());

if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(
SDValue(N, 0), DemandedBits, DemandedElts, DCI))
return SDValue();

break;
}
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
Expand Down Expand Up @@ -27595,6 +27607,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
// used - simplify to just Val.
return TLO.CombineTo(Op, ShiftR->getOperand(0));
}
case AArch64ISD::BICi: {
// Fold BICi if all destination bits already known to be zeroed
SDValue Op0 = Op.getOperand(0);
KnownBits KnownOp0 =
TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1);
// Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2))
uint64_t BitsToClear = Op->getConstantOperandVal(1)
<< Op->getConstantOperandVal(2);
APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
if (APInt(Known.getBitWidth(), BitsToClear)
.isSubsetOf(AlreadyZeroedBitsToClear))
return TLO.CombineTo(Op, Op0);

Known = KnownOp0 &
KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));

return false;
}
case ISD::INTRINSIC_WO_CHAIN: {
if (auto ElementSize = IsSVECntIntrinsic(Op)) {
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand All @@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand All @@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand All @@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
Expand Down

0 comments on commit d38bff4

Please sign in to comment.