Skip to content

Commit

Permalink
[AMDGPU] Simplify setcc (sext from i1 b), -1|0, cc
Browse files Browse the repository at this point in the history
Depending on the compare code that can be either an argument of
sext or negate of it. This helps to avoid v_cndmask_b64 instruction
for sext. A reversed value can be further simplified and folded into
its parent comparison if possible.

Differential Revision: https://reviews.llvm.org/D34545

llvm-svn: 306446
  • Loading branch information
rampitec committed Jun 27, 2017
1 parent 3f91be5 commit c9bd53a
Show file tree
Hide file tree
Showing 2 changed files with 321 additions and 1 deletion.
30 changes: 29 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -5135,14 +5135,42 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = LHS.getValueType();
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

auto CRHS = dyn_cast<ConstantSDNode>(RHS);
if (!CRHS) {
CRHS = dyn_cast<ConstantSDNode>(LHS);
if (CRHS) {
std::swap(LHS, RHS);
CC = getSetCCSwappedOperands(CC);
}
}

if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
isBoolSGPR(LHS.getOperand(0))) {
// setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1
// setcc (sext from i1 cc), -1, eq|sle|uge) => cc
// setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
// setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
if ((CRHS->isAllOnesValue() &&
(CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
(CRHS->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(-1, SL, MVT::i1));
if ((CRHS->isAllOnesValue() &&
(CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
(CRHS->isNullValue() &&
(CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
return LHS.getOperand(0);
}

if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
VT != MVT::f16))
return SDValue();

// Match isinf pattern
// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
if (!CRHS)
Expand Down
292 changes: 292 additions & 0 deletions llvm/test/CodeGen/AMDGPU/setcc-sext.ll
@@ -0,0 +1,292 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

; GCN-LABEL: {{^}}setcc_sgt_true_sext:
; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_sgt_true_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp sgt i32 %ext, -1
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_sgt_true_sext_swap:
; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_sgt_true_sext_swap(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp slt i32 -1, %ext
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_ne_true_sext:
; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_ne_true_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp ne i32 %ext, -1
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_ult_true_sext:
; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_ult_true_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp ult i32 %ext, -1
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_eq_true_sext:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_eq_true_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp eq i32 %ext, -1
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_sle_true_sext:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_sle_true_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp sle i32 %ext, -1
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_uge_true_sext:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_uge_true_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp uge i32 %ext, -1
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_eq_false_sext:
; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_eq_false_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp eq i32 %ext, 0
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_sge_false_sext:
; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_sge_false_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp sge i32 %ext, 0
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_ule_false_sext:
; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_ule_false_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp ule i32 %ext, 0
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}

; GCN-LABEL: {{^}}setcc_ne_false_sext:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_ne_false_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp ne i32 %ext, 0
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}
; GCN-LABEL: {{^}}setcc_ugt_false_sext:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_ugt_false_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp ugt i32 %ext, 0
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}
; GCN-LABEL: {{^}}setcc_slt_false_sext:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_

define amdgpu_kernel void @setcc_slt_false_sext(i32 addrspace(1)* nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%cond = icmp slt i32 %ext, 0
br i1 %cond, label %then, label %endif

then:
store i32 1, i32 addrspace(1)* %arg, align 4
br label %endif

endif:
ret void
}


declare i32 @llvm.amdgcn.workitem.id.x() #0

declare i32 @llvm.amdgcn.workitem.id.y() #0

attributes #0 = { nounwind readnone speculatable }

0 comments on commit c9bd53a

Please sign in to comment.