Skip to content

Commit

Permalink
[DAG] Attempt shl narrowing in SimplifyDemandedBits
Browse files Browse the repository at this point in the history
If a shl node leaves the upper half bits zero / undemanded, then see if we can profitably perform this with a half-width shl and a free trunc/zext.

Followup to D146121

Differential Revision: https://reviews.llvm.org/D155472
  • Loading branch information
RKSimon committed Oct 4, 2023
1 parent b30765c commit 7a8c04e
Show file tree
Hide file tree
Showing 41 changed files with 3,468 additions and 3,895 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -1784,6 +1784,38 @@ bool TargetLowering::SimplifyDemandedBits(
}
}

// Narrow shift to lower half - similar to ShrinkDemandedOp.
// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
unsigned HalfWidth = BitWidth / 2;
if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth) {
EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
if (isNarrowingProfitable(VT, HalfVT) &&
isTypeDesirableForOp(ISD::SHL, HalfVT) &&
isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
(!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT))) {
// Unless we aren't demanding the upper bits at all, we must ensure
// that the upper bits of the shift result are known to be zero,
// which is equivalent to the narrow shift being NUW.
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, Depth + 1);
bool IsNUW = Known0.countMinLeadingZeros() >= (ShAmt + HalfWidth);
if (IsNUW || DemandedBits.countLeadingZeros() >= HalfWidth) {
unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0, Depth + 1);
bool IsNSW = NumSignBits > (ShAmt + HalfWidth);
SDNodeFlags Flags;
Flags.setNoSignedWrap(IsNSW);
Flags.setNoUnsignedWrap(IsNUW);
SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
ShAmt, HalfVT, dl, TLO.LegalTypes());
SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
NewShiftAmt, Flags);
SDValue NewExt =
TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
return TLO.CombineTo(Op, NewExt);
}
}
}

APInt InDemandedMask = DemandedBits.lshr(ShAmt);
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
Expand Up @@ -32,7 +32,8 @@ define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %res
; the base may be the RHS operand of the load in SDAG.
; GCN-LABEL: name: test_complex_reg_offset
; GCN-DAG: %[[BASE:.*]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @0 + 4,
; GCN-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
; SDAG-DAG: %[[OFFSET:.*]]:sreg_32 = nuw nsw S_LSHL_B32
; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], killed %[[OFFSET]], 0, 0
; GISEL: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 0, 0
define amdgpu_ps void @test_complex_reg_offset(ptr addrspace(1) %out) {
Expand Down

3 comments on commit 7a8c04e

@kstoimenov
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@RKSimon I have bisected to this patch when chasing a sanitizer bot failure: https://lab.llvm.org/buildbot/#/builders/168/builds/16076

I've reverted the patch in 0a77699

@RKSimon
Copy link
Collaborator Author

@RKSimon RKSimon commented on 7a8c04e Oct 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kstoimenov Please can you provide a repro?

@RKSimon
Copy link
Collaborator Author

@RKSimon RKSimon commented on 7a8c04e Oct 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops - sorry didn't realise this was being discussed on https://reviews.llvm.org/D155472

Please sign in to comment.