Skip to content

Commit

Permalink
[SelectionDAG][DAGCombiner] Reuse exist node by reassociate
Browse files Browse the repository at this point in the history
When already have (op N0, N2), reassociate (op (op N0, N1), N2) to (op (op N0, N2), N1) to reuse the exist (op N0, N2)

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D122539
  • Loading branch information
bcl5980 committed Jun 24, 2022
1 parent 186bea3 commit 8c74205
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 220 deletions.
95 changes: 71 additions & 24 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1008,47 +1008,72 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// (load/store (add, (add, x, offset1), offset2)) ->
// (load/store (add, x, offset1+offset2)).

if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;
// (load/store (add, (add, x, y), offset2)) ->
// (load/store (add, (add, x, offset2), y)).

if (N0.hasOneUse())
if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;

auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N1);
if (!C1 || !C2)
if (!C2)
return false;

const APInt &C1APIntVal = C1->getAPIntValue();
const APInt &C2APIntVal = C2->getAPIntValue();
if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
if (C2APIntVal.getSignificantBits() > 64)
return false;

const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
if (CombinedValueIntVal.getBitWidth() > 64)
return false;
const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();

for (SDNode *Node : N->uses()) {
auto LoadStore = dyn_cast<MemSDNode>(Node);
if (LoadStore) {
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (N0.hasOneUse())
return false;

const APInt &C1APIntVal = C1->getAPIntValue();
const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
if (CombinedValueIntVal.getSignificantBits() > 64)
return false;
const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();

for (SDNode *Node : N->uses()) {
if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
// Is x[offset2] already not a legal addressing mode? If so then
// reassociating the constants breaks nothing (we test offset2 because
// that's the one we hope to fold into the load or store).
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
EVT VT = LoadStore->getMemoryVT();
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
continue;

// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return true;
}
}
} else {
if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
return false;

for (SDNode *Node : N->uses()) {
auto *LoadStore = dyn_cast<MemSDNode>(Node);
if (!LoadStore)
return false;

// Is x[offset2] a legal addressing mode? If so then
// reassociating the constants breaks address pattern
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
EVT VT = LoadStore->getMemoryVT();
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
continue;

// Would x[offset1+offset2] still be a legal addressing mode?
AM.BaseOffs = CombinedValue;
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return true;
return false;
}
return true;
}

return false;
Expand Down Expand Up @@ -1099,6 +1124,28 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return N00;
}

if (TLI.isReassocProfitable(DAG, N0, N1)) {
if (N1 != N01) {
// Reassociate if (op N00, N1) already exist
if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
// if Op (Op N00, N1), N01 already exist
// we need to stop reassciate to avoid dead loop
if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
}
}

if (N1 != N00) {
// Reassociate if (op N01, N1) already exist
if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
// if Op (Op N01, N1), N00 already exist
// we need to stop reassciate to avoid dead loop
if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
}
}
}

return SDValue();
}

Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@
define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
; GCN-LABEL: xor3_i1_const:
; GCN: ; %bb.0: ; %main_body
; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
; GCN-NEXT: s_mov_b32 m0, s1
; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0
; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1
; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], -1
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1]
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
; GCN-NEXT: ; return to shader part epilog
Expand Down
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s

target triple = "wasm32-unknown-unknown"

define i32 @reassociate_xor(float %x, float %y) {
; CHECK-LABEL: reassociate_xor:
; CHECK: .functype reassociate_xor (f32, f32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: local.set 2
; CHECK-NEXT: block
; CHECK-NEXT: local.get 1
; CHECK-NEXT: f32.const 0x1p-23
; CHECK-NEXT: f32.le
; CHECK-NEXT: local.get 0
; CHECK-NEXT: f32.const 0x1p-23
; CHECK-NEXT: f32.gt
; CHECK-NEXT: i32.ne
; CHECK-NEXT: br_if 0 # 0: down to label0
; CHECK-NEXT: # %bb.1: # %if.then.i
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.set 2
; CHECK-NEXT: .LBB0_2: # %if.end.i
; CHECK-NEXT: end_block # label0:
; CHECK-NEXT: local.get 2
; CHECK-NEXT: # fallthrough-return
entry: ; preds = %if.then, %entry
%cmp0 = fcmp ule float %x, 0x3E80000000000000
%cmp1 = fcmp ugt float %y, 0x3E80000000000000
%cmp2 = xor i1 %cmp0, %cmp1
br i1 %cmp2, label %if.end.i, label %if.then.i

if.then.i: ; preds = %if.end
br label %if.end.i

if.end.i: ; preds = %if.then.i, %if.end
%s = phi i32 [ 1, %entry ], [ 0, %if.then.i ]
ret i32 %s
}
Loading

0 comments on commit 8c74205

Please sign in to comment.