Skip to content

Commit

Permalink
[SelectionDAG][DAGCombiner] Reuse exist node by reassociate
Browse files Browse the repository at this point in the history
When already have (op N0, N2), reassociate (op (op N0, N1), N2) to (op (op N0, N2), N1) to reuse the exist (op N0, N2)

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D122539
  • Loading branch information
bcl5980 committed Jun 21, 2022
1 parent 44e8a20 commit 6c951c5
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 221 deletions.
89 changes: 64 additions & 25 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1008,47 +1008,72 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// (load/store (add, (add, x, offset1), offset2)) ->
// (load/store (add, x, offset1+offset2)).

if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;
// (load/store (add, (add, x, y), offset2)) ->
// (load/store (add, (add, x, offset2), y)).

if (N0.hasOneUse())
if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;

auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N1);
if (!C1 || !C2)
if (!C2)
return false;

const APInt &C1APIntVal = C1->getAPIntValue();
const APInt &C2APIntVal = C2->getAPIntValue();
if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
return false;
if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (N0.hasOneUse())
return false;

const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
if (CombinedValueIntVal.getBitWidth() > 64)
return false;
const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();

for (SDNode *Node : N->uses()) {
auto LoadStore = dyn_cast<MemSDNode>(Node);
if (LoadStore) {
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
const APInt &C1APIntVal = C1->getAPIntValue();
if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
return false;

const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
if (CombinedValueIntVal.getBitWidth() > 64)
return false;
const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();

for (SDNode *Node : N->uses()) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
EVT VT = LoadStore->getMemoryVT();
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
continue;

// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return true;
}
}
} else {
if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
return false;

for (SDNode *Node : N->uses()) {
auto *LoadStore = dyn_cast<MemSDNode>(Node);
if (!LoadStore)
return false;

// Is x[offset2] a legal addressing mode? If so then
// reassociating the constants breaks address pattern
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
EVT VT = LoadStore->getMemoryVT();
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
continue;

// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return true;
return false;
}
return true;
}

return false;
Expand Down Expand Up @@ -1099,6 +1124,20 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return N00;
}

if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate if (op N00, N1) already exist
if (N1 != N01)
if (SDNode *ExistNode =
DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1}))
return DAG.getNode(Opc, DL, VT, SDValue(ExistNode, 0), N01);

// Reassociate if (op N01, N1) already exist
if (N1 != N00)
if (SDNode *ExistNode =
DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1}))
return DAG.getNode(Opc, DL, VT, SDValue(ExistNode, 0), N00);
}

return SDValue();
}

Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@
define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
; GCN-LABEL: xor3_i1_const:
; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
; GCN-NEXT: s_mov_b32 m0, s1
; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0
; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1
; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], -1
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1]
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
; GCN-NEXT: ; return to shader part epilog
Expand Down
Loading

0 comments on commit 6c951c5

Please sign in to comment.