diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 48d6b9996e572..6fec4677f796e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5125,6 +5125,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); } + // fold (setcc (trunc x) c) -> (setcc x c) + if (N0.getOpcode() == ISD::TRUNCATE && + ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) || + (N0->getFlags().hasNoSignedWrap() && + !ISD::isUnsignedIntSetCC(Cond))) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + EVT NewVT = N0.getOperand(0).getValueType(); + SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond) + ? C1.sext(NewVT.getSizeInBits()) + : C1.zext(NewVT.getSizeInBits()), + dl, NewVT); + return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -5646,6 +5660,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return N0; } + // Fold (setcc (trunc x) (trunc y)) -> (setcc x y) + if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + ((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() && + N1->getFlags().hasNoUnsignedWrap()) || + (!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() && + N1->getFlags().hasNoSignedWrap())) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + } + // Could not fold it. return SDValue(); } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6000b40694763..fc3498d7445ce 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1673,18 +1673,6 @@ def : Pat<(setcc (i16 (sext_inreg (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel (PRMT_B32rii i32:$b, 0, (to_sign_extend_selector $sel_b), PrmtNONE), (cond2cc $cc))>; -// A 16-bit comparison of truncated byte extracts can be be converted to 32-bit -// comparison because we know that the truncate is just trancating off zeros -// and that the most-significant byte is also zeros so the meaning of signed and -// unsigned comparisons will not be changed. -def : Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))), - (i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))), - cond:$cc), - (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE), - (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE), - (cond2cc $cc))>; - - def SDTDeclareArrayParam : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; def SDTDeclareScalarParam : diff --git a/llvm/test/CodeGen/NVPTX/sext-setcc.ll b/llvm/test/CodeGen/NVPTX/sext-setcc.ll index 9a67bdfeb067b..97918a6f26cdf 100644 --- a/llvm/test/CodeGen/NVPTX/sext-setcc.ll +++ b/llvm/test/CodeGen/NVPTX/sext-setcc.ll @@ -29,7 +29,6 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) { ; CHECK-LABEL: sext_setcc_v4i1_to_v4i8( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<5>; -; CHECK-NEXT: .reg .b16 %rs<5>; ; CHECK-NEXT: .reg .b32 %r<13>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: @@ -37,17 +36,13 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) { ; CHECK-NEXT: ld.param.b64 %rd1, [sext_setcc_v4i1_to_v4i8_param_0]; ; CHECK-NEXT: ld.b32 %r1, [%rd1]; ; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7770U; -; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; -; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0; +; CHECK-NEXT: setp.eq.b32 %p1, %r2, 0; ; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x7771U; -; CHECK-NEXT: cvt.u16.u32 %rs2, %r3; -; CHECK-NEXT: setp.eq.b16 %p2, %rs2, 0; +; CHECK-NEXT: setp.eq.b32 %p2, %r3, 0; ; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U; -; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; -; CHECK-NEXT: setp.eq.b16 %p3, %rs3, 0; +; CHECK-NEXT: setp.eq.b32 %p3, %r4, 0; ; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0x7773U; -; CHECK-NEXT: cvt.u16.u32 %rs4, %r5; -; CHECK-NEXT: setp.eq.b16 %p4, %rs4, 0; +; CHECK-NEXT: setp.eq.b32 %p4, %r5, 0; ; CHECK-NEXT: selp.b32 %r6, -1, 0, %p4; ; CHECK-NEXT: selp.b32 %r7, -1, 0, %p3; ; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U; diff --git a/llvm/test/CodeGen/NVPTX/trunc-setcc.ll b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll new file mode 100644 index 0000000000000..f22e37e203966 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mcpu=sm_50 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %} + +target triple = "nvptx64-nvidia-cuda" + +define i1 @trunc_nsw_singed_const(i32 %a) { +; CHECK-LABEL: trunc_nsw_singed_const( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_const_param_0]; +; CHECK-NEXT: add.s32 %r2, %r1, 1; +; CHECK-NEXT: setp.gt.s32 %p1, %r2, -1; +; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; + %a2 = add i32 %a, 1 + %b = trunc nsw i32 %a2 to i8 + %c = icmp sgt i8 %b, -1 + ret i1 %c +} + +define i1 @trunc_nuw_singed_const(i32 %a) { +; CHECK-LABEL: trunc_nuw_singed_const( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_const_param_0]; +; CHECK-NEXT: add.s16 %rs2, %rs1, 1; +; CHECK-NEXT: cvt.s16.s8 %rs3, %rs2; +; CHECK-NEXT: setp.lt.s16 %p1, %rs3, 100; +; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %a2 = add i32 %a, 1 + %b = trunc nuw i32 %a2 to i8 + %c = icmp slt i8 %b, 100 + ret i1 %c +} + +define i1 @trunc_nsw_unsinged_const(i32 %a) { +; CHECK-LABEL: trunc_nsw_unsinged_const( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_const_param_0]; +; CHECK-NEXT: add.s16 %rs2, %rs1, 1; +; CHECK-NEXT: and.b16 %rs3, %rs2, 255; +; CHECK-NEXT: setp.lt.u16 %p1, %rs3, 236; +; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %a2 = add i32 %a, 1 + %b = trunc nsw i32 %a2 to i8 + %c = icmp ult i8 %b, -20 + ret i1 %c +} + +define i1 @trunc_nuw_unsinged_const(i32 %a) { +; CHECK-LABEL: trunc_nuw_unsinged_const( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_const_param_0]; +; CHECK-NEXT: add.s32 %r2, %r1, 1; +; CHECK-NEXT: setp.gt.u32 %p1, %r2, 100; +; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; + %a2 = add i32 %a, 1 + %b = trunc nuw i32 %a2 to i8 + %c = icmp ugt i8 %b, 100 + ret i1 %c +} + + +define i1 @trunc_nsw_eq_const(i32 %a) { +; CHECK-LABEL: trunc_nsw_eq_const( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_const_param_0]; +; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99; +; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %a2 = add i32 %a, 1 + %b = trunc nsw i32 %a2 to i8 + %c = icmp eq i8 %b, 100 + ret i1 %c +} + +define i1 @trunc_nuw_eq_const(i32 %a) { +; CHECK-LABEL: trunc_nuw_eq_const( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_const_param_0]; +; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99; +; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %a2 = add i32 %a, 1 + %b = trunc nuw i32 %a2 to i8 + %c = icmp eq i8 %b, 100 + ret i1 %c +} + +;;; + +define i1 @trunc_nsw_singed(i32 %a1, i32 %a2) { +; CHECK-LABEL: trunc_nsw_singed( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_param_0]; +; CHECK-NEXT: add.s32 %r2, %r1, 1; +; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_singed_param_1]; +; CHECK-NEXT: add.s32 %r4, %r3, 7; +; CHECK-NEXT: setp.gt.s32 %p1, %r2, %r4; +; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: ret; + %b1 = add i32 %a1, 1 + %b2 = add i32 %a2, 7 + %c1 = trunc nsw i32 %b1 to i8 + %c2 = trunc nsw i32 %b2 to i8 + %c = icmp sgt i8 %c1, %c2 + ret i1 %c +} + +define i1 @trunc_nuw_singed(i32 %a1, i32 %a2) { +; CHECK-LABEL: trunc_nuw_singed( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<7>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nuw_singed_param_1]; +; CHECK-NEXT: add.s16 %rs3, %rs1, 1; +; CHECK-NEXT: cvt.s16.s8 %rs4, %rs3; +; CHECK-NEXT: add.s16 %rs5, %rs2, 6; +; CHECK-NEXT: cvt.s16.s8 %rs6, %rs5; +; CHECK-NEXT: setp.lt.s16 %p1, %rs4, %rs6; +; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %b1 = add i32 %a1, 1 + %b2 = add i32 %a2, 6 + %c1 = trunc nuw i32 %b1 to i8 + %c2 = trunc nuw i32 %b2 to i8 + %c = icmp slt i8 %c1, %c2 + ret i1 %c +} + +define i1 @trunc_nsw_unsinged(i32 %a1, i32 %a2) { +; CHECK-LABEL: trunc_nsw_unsinged( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<7>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nsw_unsinged_param_1]; +; CHECK-NEXT: add.s16 %rs3, %rs1, 1; +; CHECK-NEXT: and.b16 %rs4, %rs3, 255; +; CHECK-NEXT: add.s16 %rs5, %rs2, 4; +; CHECK-NEXT: and.b16 %rs6, %rs5, 255; +; CHECK-NEXT: setp.lt.u16 %p1, %rs4, %rs6; +; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %b1 = add i32 %a1, 1 + %b2 = add i32 %a2, 4 + %c1 = trunc nsw i32 %b1 to i8 + %c2 = trunc nsw i32 %b2 to i8 + %c = icmp ult i8 %c1, %c2 + ret i1 %c +} + +define i1 @trunc_nuw_unsinged(i32 %a1, i32 %a2) { +; CHECK-LABEL: trunc_nuw_unsinged( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_param_0]; +; CHECK-NEXT: add.s32 %r2, %r1, 1; +; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_unsinged_param_1]; +; CHECK-NEXT: add.s32 %r4, %r3, 5; +; CHECK-NEXT: setp.gt.u32 %p1, %r2, %r4; +; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: ret; + %b1 = add i32 %a1, 1 + %b2 = add i32 %a2, 5 + %c1 = trunc nuw i32 %b1 to i8 + %c2 = trunc nuw i32 %b2 to i8 + %c = icmp ugt i8 %c1, %c2 + ret i1 %c +} + + +define i1 @trunc_nsw_eq(i32 %a1, i32 %a2) { +; CHECK-LABEL: trunc_nsw_eq( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_param_0]; +; CHECK-NEXT: add.s32 %r2, %r1, 1; +; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_eq_param_1]; +; CHECK-NEXT: add.s32 %r4, %r3, 3; +; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4; +; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: ret; + %b1 = add i32 %a1, 1 + %b2 = add i32 %a2, 3 + %c1 = trunc nsw i32 %b1 to i8 + %c2 = trunc nsw i32 %b2 to i8 + %c = icmp eq i8 %c1, %c2 + ret i1 %c +} + +define i1 @trunc_nuw_eq(i32 %a1, i32 %a2) { +; CHECK-LABEL: trunc_nuw_eq( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_param_0]; +; CHECK-NEXT: add.s32 %r2, %r1, 2; +; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_eq_param_1]; +; CHECK-NEXT: add.s32 %r4, %r3, 1; +; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4; +; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: ret; + %b1 = add i32 %a1, 2 + %b2 = add i32 %a2, 1 + %c1 = trunc nuw i32 %b1 to i8 + %c2 = trunc nuw i32 %b2 to i8 + %c = icmp eq i8 %c1, %c2 + ret i1 %c +}