Skip to content

Commit 42e229e

Browse files
committed
[AMDGPU] fix commuted case of sub combine
Differential Revision: https://reviews.llvm.org/D58481 llvm-svn: 354543
1 parent 500606f commit 42e229e

File tree

2 files changed

+29
-5
lines changed

2 files changed

+29
-5
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8616,14 +8616,10 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
86168616
SDValue LHS = N->getOperand(0);
86178617
SDValue RHS = N->getOperand(1);
86188618

8619-
unsigned Opc = LHS.getOpcode();
8620-
if (Opc != ISD::SUBCARRY)
8621-
std::swap(RHS, LHS);
8622-
86238619
if (LHS.getOpcode() == ISD::SUBCARRY) {
86248620
// sub (subcarry x, 0, cc), y => subcarry x, y, cc
86258621
auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
8626-
if (!C || C->getZExtValue() != 0)
8622+
if (!C || !C->isNullValue())
86278623
return SDValue();
86288624
SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
86298625
return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);

llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,34 @@ bb:
127127
ret void
128128
}
129129

130+
; GCN-LABEL: {{^}}sub_sube_commuted:
131+
; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
132+
; GCN-DAG: buffer_load_dword [[V:v[0-9]+]],
133+
; GCN: v_subbrev_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]]
134+
; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]]
135+
; GCN: v_add_i32_e32 {{.*}}, 0x64, [[SUB]]
136+
137+
; GFX9-LABEL: {{^}}sub_sube_commuted:
138+
; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
139+
; GFX9-DAG: global_load_dword [[V:v[0-9]+]],
140+
; GFX9: v_subbrev_co_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]]
141+
; GFX9: v_sub_u32_e32 [[SUB:v[0-9]+]], s{{[0-9]+}}, [[SUBB]]
142+
; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[SUB]]
143+
define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) {
144+
bb:
145+
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
146+
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
147+
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
148+
%v = load i32, i32 addrspace(1)* %gep, align 4
149+
%cmp = icmp ugt i32 %x, %y
150+
%ext = sext i1 %cmp to i32
151+
%adde = add i32 %v, %ext
152+
%sub = sub i32 %adde, %a
153+
%sub2 = sub i32 100, %sub
154+
store i32 %sub2, i32 addrspace(1)* %gep, align 4
155+
ret void
156+
}
157+
130158
; GCN-LABEL: {{^}}sube_sub:
131159
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
132160
; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]

0 commit comments

Comments
 (0)