[X86][CodeGen] Support lowering for CCMP/CTEST #91747

KanRobert · 2024-05-10T13:57:29Z

DAG combine for CCMP and CTESTrr:

and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y)))
->
setcc(cc1, ccmp(X, Y, ~cflags/cflags, cc0/~cc0, flag0))

and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0)))
 ->
setcc(cc1, ctest(X, X, ~cflags/cflags, cc0/~cc0, flag0))

where cflags is determined by cc1.

Generic DAG combine:

cmp(setcc(cc, X), 0)
brcond ne
->
X
brcond cc

sub(setcc(cc, X), 1)
brcond ne
->
X
brcond ~cc

Post DAG transform: ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr

Pattern match for CTESTri:

X= and A, B
ctest(X, X, cflags, cc0/, flag0)
->
ctest(A, B, cflags, cc0/, flag0)

CTESTmi is already handled by the memory folding mechanism in MIR.

llvmbot · 2024-05-10T13:58:00Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: Shengchen Kan (KanRobert)

Changes

Patch is 57.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91747.diff

6 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+2-2)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+161-3)
(modified) llvm/lib/Target/X86/X86ISelLowering.h (+4)
(modified) llvm/lib/Target/X86/X86InstrConditionalCompare.td (+21)
(modified) llvm/lib/Target/X86/X86InstrFragments.td (+6)
(added) llvm/test/CodeGen/X86/apx/ccmp.ll (+1116)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 05ab6e2e48206..d5f8235d3582a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1765,8 +1765,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
     if (N->getNumValues() == RV->getNumValues())
       DAG.ReplaceAllUsesWith(N, RV.getNode());
     else {
-      assert(N->getValueType(0) == RV.getValueType() &&
-             N->getNumValues() == 1 && "Type mismatch");
+      //assert(N->getValueType(0) == RV.getValueType() &&
+      //       N->getNumValues() == 1 && "Type mismatch");
       DAG.ReplaceAllUsesWith(N, &RV);
     }
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4638f7b70358b..c9df98ffbee04 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33881,6 +33881,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(TESTUI)
   NODE_NAME_CASE(FP80_ADD)
   NODE_NAME_CASE(STRICT_FP80_ADD)
+  NODE_NAME_CASE(CCMP)
+  NODE_NAME_CASE(CTEST)
   }
   return nullptr;
 #undef NODE_NAME_CASE
@@ -54508,7 +54510,156 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
   return true;
 }
 
+
+static int getCondFlagsFromCondCode(X86::CondCode CC) {
+  // CCMP/CTEST has two conditional operands:
+  // - SCC: source conditonal code (same as CMOV)
+  // - DCF: destination conditional flags, which has 4 valid bits
+  //
+  // +----+----+----+----+
+  // | OF | SF | ZF | CF |
+  // +----+----+----+----+
+  //
+  // If SCC(source conditional code) evaluates to false, CCMP/CTEST will updates
+  // the conditional flags by as follows:
+  //
+  // OF = DCF.OF
+  // SF = DCF.SF
+  // ZF = DCF.ZF
+  // CF = DCF.CF
+  // PF = DCF.CF
+  // AF = 0 (Auxiliary Carry Flag)
+  //
+  // Otherwise, the CMP or TEST is executed and it updates the
+  // CSPAZO flags normally.
+  //
+  // NOTE:
+  // If SCC = P, then SCC evaluates to true regardless of the CSPAZO value.
+  // If SCC = NP, then SCC evaluates to false regardless of the CSPAZO value.
+
+  enum { CF = 1, ZF = 2, SF = 4, OF = 8, PF = CF };
+
+
+  switch (CC) {
+  default:
+    llvm_unreachable("Illegal condition code!");
+  case X86::COND_NO:
+  case X86::COND_NE:
+  case X86::COND_GE:
+  case X86::COND_G:
+  case X86::COND_AE:
+  case X86::COND_A:
+  case X86::COND_NS:
+  case X86::COND_NP:
+    return 0;
+  case X86::COND_O:
+    return OF;
+  case X86::COND_B:
+  case X86::COND_BE:
+    return CF;
+    break;
+  case X86::COND_E:
+  case X86::COND_LE:
+    return ZF;
+  case X86::COND_S:
+  case X86::COND_L:
+    return SF;
+  case X86::COND_P:
+    return PF;
+  }
+}
+
+static SDValue
+combineX86SubCmpToCcmpHelper(SDNode *N, SDValue Flag, SelectionDAG &DAG,
+                             TargetLowering::DAGCombinerInfo &DCI,
+                             const X86Subtarget &ST) {
+  SDValue LHS = N->getOperand(0);
+
+  if (!ST.hasCCMP() || LHS.getOpcode() != ISD::AND || !Flag.hasOneUse())
+    return SDValue();
+
+  SDValue SetCC0 = LHS.getOperand(0);
+  SDValue SetCC1 = LHS.getOperand(1);
+  if (SetCC0.getOpcode() != X86ISD::SETCC &&
+      SetCC1.getOpcode() != X86ISD::SETCC)
+    return SDValue();
+
+  SDValue Sub = SetCC1.getOperand(1);
+  // and is commutable. Try to commute the operands and then test again.
+  if (Sub.getOpcode() != X86ISD::SUB)
+    return SDValue();
+
+  SDNode *BrCond = *Flag->uses().begin();
+  if (BrCond->getOpcode() != X86ISD::BRCOND)
+    return SDValue();
+
+  X86::CondCode CC0 =
+      static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
+  if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
+    return SDValue();
+
+  SDValue CFlags = DAG.getTargetConstant(
+      getCondFlagsFromCondCode(X86::GetOppositeBranchCondition(
+          static_cast<X86::CondCode>(SetCC1.getConstantOperandVal(0)))),
+      SDLoc(BrCond), MVT::i8);
+  SDValue CCMP = DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
+                             {Sub.getOperand(0), Sub.getOperand(1), CFlags,
+                              SetCC0.getOperand(0), SetCC0.getOperand(1)});
+  DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
+
+  SmallVector<SDValue> Ops(BrCond->op_values());
+  unsigned CondNo = 2;
+  X86::CondCode OldCC =
+      static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo));
+  assert(OldCC == X86::COND_NE && "Unexpected CC");
+  if (Ops[CondNo] != SetCC1.getOperand(0)) {
+    Ops[CondNo] = SetCC1.getOperand(0);
+    SDValue NewBrCond =
+        DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
+    DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
+    DCI.recursivelyDeleteUnusedNodes(BrCond);
+  }
+  return CCMP;
+}
+
+static SDValue combineX86CmpToCcmp(SDNode *N, SelectionDAG &DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const X86Subtarget &ST) {
+  // cmp(and(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
+  // brcond ne
+  //
+  //  ->
+  //
+  //  ccmp(X, Y, cflags, cc0, flag0)
+  //  brcond cc1
+  //
+  //  where cflags is determined by cc1.
+
+  return combineX86SubCmpToCcmpHelper(N, SDValue(N, 0), DAG, DCI, ST);
+}
+
+static SDValue combineX86SubToCcmp(SDNode *N, SelectionDAG &DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const X86Subtarget &ST) {
+  // sub(and(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
+  // brcond ne
+  //
+  //  ->
+  //
+  //  ccmp(X, Y, cflags, cc0, flag0)
+  //  brcond cc1
+  //
+  //  if only flag has users, where cflags is determined by cc1.
+
+  if (N->getOpcode() != X86ISD::SUB || !isOneConstant(N->getOperand(1)) ||
+      N->hasAnyUseOfValue(0))
+    return SDValue();
+
+  return combineX86SubCmpToCcmpHelper(N, SDValue(N, 1), DAG, DCI, ST);
+}
+
 static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
+                          TargetLowering::DAGCombinerInfo &DCI,
                           const X86Subtarget &Subtarget) {
   // Only handle test patterns.
   if (!isNullConstant(N->getOperand(1)))
@@ -54523,6 +54674,9 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
   EVT VT = Op.getValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
+  if (SDValue CCMP = combineX86CmpToCcmp(N, DAG, DCI, Subtarget))
+    return CCMP;
+
   // If we have a constant logical shift that's only used in a comparison
   // against zero turn it into an equivalent AND. This allows turning it into
   // a TEST instruction later.
@@ -54651,7 +54805,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
 }
 
 static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
-                                TargetLowering::DAGCombinerInfo &DCI) {
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const X86Subtarget &ST) {
   assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
          "Expected X86ISD::ADD or X86ISD::SUB");
 
@@ -54662,6 +54817,9 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
   bool IsSub = X86ISD::SUB == N->getOpcode();
   unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
 
+  if (SDValue CCMP = combineX86SubToCcmp(N, DAG, DCI, ST))
+    return CCMP;
+
   // If we don't use the flag result, simplify back to a generic ADD/SUB.
   if (!N->hasAnyUseOfValue(1)) {
     SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -56960,11 +57118,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::BLENDV:      return combineSelect(N, DAG, DCI, Subtarget);
   case ISD::BITCAST:        return combineBitcast(N, DAG, DCI, Subtarget);
   case X86ISD::CMOV:        return combineCMov(N, DAG, DCI, Subtarget);
-  case X86ISD::CMP:         return combineCMP(N, DAG, Subtarget);
+  case X86ISD::CMP:         return combineCMP(N, DAG, DCI, Subtarget);
   case ISD::ADD:            return combineAdd(N, DAG, DCI, Subtarget);
   case ISD::SUB:            return combineSub(N, DAG, DCI, Subtarget);
   case X86ISD::ADD:
-  case X86ISD::SUB:         return combineX86AddSub(N, DAG, DCI);
+  case X86ISD::SUB:         return combineX86AddSub(N, DAG, DCI, Subtarget);
   case X86ISD::SBB:         return combineSBB(N, DAG);
   case X86ISD::ADC:         return combineADC(N, DAG, DCI);
   case ISD::MUL:            return combineMul(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index ade54f73bff09..d8596dbdddcb7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -789,6 +789,10 @@ namespace llvm {
     // Perform an FP80 add after changing precision control in FPCW.
     STRICT_FP80_ADD,
 
+    // Conditional compare instructions
+    CCMP,
+    CTEST,
+
     // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
 
diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
index e5c1143eba87f..ea558dfcb5ab7 100644
--- a/llvm/lib/Target/X86/X86InstrConditionalCompare.td
+++ b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
@@ -78,6 +78,24 @@ let mayLoad = 1 in {
   }
 }
 
+def : Pat<(X86ccmp GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP8rr GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP16rr GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP64rr GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
+
+def : Pat<(X86ccmp GR8:$src1, (i8 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP8ri GR8:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR16:$src1, (i16 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP16ri GR16:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR32:$src1, (i32 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP32ri GR32:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR64:$src1, (i64 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+          (CCMP64ri32 GR64:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+
 //===----------------------------------------------------------------------===//
 // CTEST Instructions
 //
@@ -108,3 +126,6 @@ let mayLoad = 1 in {
     def CTEST64mr: Ctest<0x85, MRMDestMem, Xi64, i64mem, GR64>;
   }
 }
+
+def : Pat<(X86ctest GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
+          (CTEST32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index f14c7200af968..664998e76353b 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -12,6 +12,9 @@ def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
 def SDTX86FCmp    : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisFP<1>,
                                          SDTCisSameAs<1, 2>]>;
 
+def SDTX86Ccmp    : SDTypeProfile<1, 5,
+                                  [SDTCisVT<3, i8>, SDTCisVT<4, i8>, SDTCisVT<5, i32>]>;
+
 def SDTX86Cmov    : SDTypeProfile<1, 4,
                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
                                    SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
@@ -138,6 +141,9 @@ def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
 def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
 def X86bt      : SDNode<"X86ISD::BT",       SDTX86CmpTest>;
 
+def X86ccmp    : SDNode<"X86ISD::CCMP",     SDTX86Ccmp>;
+def X86ctest   : SDNode<"X86ISD::CTEST",    SDTX86Ccmp>;
+
 def X86cmov    : SDNode<"X86ISD::CMOV",     SDTX86Cmov>;
 def X86brcond  : SDNode<"X86ISD::BRCOND",   SDTX86BrCond,
                         [SDNPHasChain]>;
diff --git a/llvm/test/CodeGen/X86/apx/ccmp.ll b/llvm/test/CodeGen/X86/apx/ccmp.ll
new file mode 100644
index 0000000000000..5d6c281404cb5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/ccmp.ll
@@ -0,0 +1,1116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ccmp -verify-machineinstrs -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ccmp,+ndd -verify-machineinstrs -show-mc-encoding | FileCheck %s --check-prefix=NDD
+
+define void @ccmp8rr_zf(i8 noundef %a, i8 noundef %b, i8 noundef %c) {
+; CHECK-LABEL: ccmp8rr_zf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpb %dl, %dil # encoding: [0x40,0x38,0xd7]
+; CHECK-NEXT:    ccmpneb {dfv=zf} %dl, %sil # encoding: [0x62,0xf4,0x14,0x05,0x38,0xd6]
+; CHECK-NEXT:    jne .LBB0_1 # encoding: [0x75,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    jmp foo # TAILCALL
+; CHECK-NEXT:    # encoding: [0xeb,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK-NEXT:  .LBB0_1: # %if.end
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp8rr_zf:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpb %dl, %dil # encoding: [0x40,0x38,0xd7]
+; NDD-NEXT:    ccmpneb {dfv=zf} %dl, %sil # encoding: [0x62,0xf4,0x14,0x05,0x38,0xd6]
+; NDD-NEXT:    jne .LBB0_1 # encoding: [0x75,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.2: # %if.then
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    jmp foo # TAILCALL
+; NDD-NEXT:    # encoding: [0xeb,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; NDD-NEXT:  .LBB0_1: # %if.end
+; NDD-NEXT:    retq # encoding: [0xc3]
+entry:
+  %cmp = icmp eq i8 %a, %c
+  %cmp1 = icmp eq i8 %b, %c
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...) @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+define void @ccmp16rr_sf(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+; CHECK-LABEL: ccmp16rr_sf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpw %dx, %di # encoding: [0x66,0x39,0xd7]
+; CHECK-NEXT:    ccmplew {dfv=sf} %dx, %si # encoding: [0x62,0xf4,0x25,0x0e,0x39,0xd6]
+; CHECK-NEXT:    jge .LBB1_1 # encoding: [0x7d,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    jmp foo # TAILCALL
+; CHECK-NEXT:    # encoding: [0xeb,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK-NEXT:  .LBB1_1: # %if.end
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp16rr_sf:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpw %dx, %di # encoding: [0x66,0x39,0xd7]
+; NDD-NEXT:    ccmplew {dfv=sf} %dx, %si # encoding: [0x62,0xf4,0x25,0x0e,0x39,0xd6]
+; NDD-NEXT:    jge .LBB1_1 # encoding: [0x7d,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.2: # %if.then
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    jmp foo # TAILCALL
+; NDD-NEXT:    # encoding: [0xeb,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; NDD-NEXT:  .LBB1_1: # %if.end
+; NDD-NEXT:    retq # encoding: [0xc3]
+entry:
+  %cmp = icmp sgt i16 %a, %c
+  %cmp1 = icmp slt i16 %b, %c
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...) @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+define void @ccmp32rr_cf(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+; CHECK-LABEL: ccmp32rr_cf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl %edx, %edi # encoding: [0x39,0xd7]
+; CHECK-NEXT:    ccmpbl {dfv=cf} %edx, %esi # encoding: [0x62,0xf4,0x0c,0x02,0x39,0xd6]
+; CHECK-NEXT:    ja .LBB2_1 # encoding: [0x77,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    jmp foo # TAILCALL
+; CHECK-NEXT:    # encoding: [0xeb,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK-NEXT:  .LBB2_1: # %if.end
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp32rr_cf:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpl %edx, %edi # encoding: [0x39,0xd7]
+; NDD-NEXT:    ccmpbl {dfv=cf} %edx, %esi # encoding: [0x62,0xf4,0x0c,0x02,0x39,0xd6]
+; NDD-NEXT:    ja .LBB2_1 # encoding: [0x77,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.2: # %if.then
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    jmp foo # TAILCALL
+; NDD-NEXT:    # encoding: [0xeb,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; NDD-NEXT:  .LBB2_1: # %if.end
+; NDD-NEXT:    retq # encoding: [0xc3]
+entry:
+  %cmp = icmp uge i32 %a, %c
+  %cmp1 = icmp ule i32 %b, %c
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...) @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+define void @ccmp64rr_of(i64 %a, i64 %b) {
+; CHECK-LABEL: ccmp64rr_of:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; CHECK-NEXT:    ccmpneq {dfv=of} %rsi, %rdi # encoding: [0x62,0xf4,0xc4,0x05,0x39,0xf7]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp64rr_of:
+; NDD:       # %bb.0: # %bb
+; NDD-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; NDD-NEXT:    ccmpneq {dfv=of} %rsi, %rdi # encoding: [0x62,0xf4,0xc4,0x05,0x39,0xf7]
+; NDD-NEXT:    retq # encoding: [0xc3]
+bb:
+  %cond1 = icmp eq i64 %a, 0
+  br i1 %cond1, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+  %smul = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+  %obit = extractvalue {i64, i1} %smul, 1
+  br i1 %obit, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %tmp = ptrtoint ptr null to i64
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1, %bb
+  ret void
+}
+
+define void @ccmp8ri_zf(i8 noundef %a, i8 noundef %b, i8 noundef %c) {
+; CHECK-LABEL: ccmp8ri_zf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpb %dl, %dil # encoding: [0x40,0x38,0xd7]
+; CHECK-NEXT:    ccmpleb {dfv=zf} $123, %sil # encoding: [0x62,0xf4,0x14,0x0e,0x80,0xfe,0x7b]
+; CHECK-NEXT:    jne .LBB4_1 # encoding: [0x75,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: .LBB4_1-1, kind: FK_PCRel_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    jmp foo # TAILCALL
+; CHECK-NEXT:    # encoding: [0xeb,A]
+; CHECK-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK-NEXT:  .LBB4_1: # %if.end
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: ccmp8ri_zf:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpb %dl, %dil # encoding: [0x40,0x38,0xd7]
+; NDD-NEXT:    ccmpleb {dfv=zf} $123, %sil # encoding: [0x62,0xf4,0x14,0x0e,0x80,0xfe,0x7b]
+; NDD-NEXT:    jne .LBB4_1 # encoding: [0x75,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB4_1-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.2: # %if.then
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    jmp foo # TAILCALL
+; NDD-NEXT:    # encoding: [0xeb,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; NDD-NEXT:  .LBB4_1: # %if.end
+; NDD-NEXT:    retq # encoding: [0xc3]
+entry:
+  %cmp = icmp sgt i8 %a, %c
+  %cmp1 = icmp eq ...
[truncated]

github-actions · 2024-05-10T14:00:26Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/test/CodeGen/X86/apx/ccmp.ll

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/Target/X86/X86ISelLowering.cpp

…processISelDAG, NFCI This is to simplify code for #91747

KanRobert · 2024-05-18T14:21:46Z

llvm/test/CodeGen/X86/apx/ctest.ll

+; CHECK-NEXT:    movzwl (%rsi), %eax
+; CHECK-NEXT:    andl $12345, %eax # imm = 0x3039
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    ctestnew {dfv=zf} %ax, %ax


This is sub-optimal b/c #92649

…Drr/rm -> CTESTrr/CTESTmr

llvm/lib/Target/X86/X86InstrInfo.cpp

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/test/CodeGen/X86/apx/ctest.ll

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/lib/Target/X86/X86ISelLowering.cpp

goldsteinn · 2024-05-24T16:39:45Z

LGTM. Please wait for some additional approvals to push this.

llvm/lib/Target/X86/X86InstrConditionalCompare.td

llvm/lib/Target/X86/X86ISelLowering.cpp

phoebewang · 2024-05-26T03:04:04Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

-      assert(N->getValueType(0) == RV.getValueType() &&
-             N->getNumValues() == 1 && "Type mismatch");


This can help to catch unexpected nodes. Do we need to remove both?

Yes, we need. After changing SUB to CCMP, neither of them satisfy. For this case, N->getValueType(1) == RV.getValueType() && N->getNumValues() == 2

Yes, we need. After changing SUB to CCMP, neither of them satisfy.

This means that there is a problem in combining SUB to CCMP. The assert here is to catch such kind of errors.
See my other comment.

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86ISelLowering.h

phoebewang

LGTM.

KanRobert · 2024-05-26T10:28:54Z

Thanks for the review @phoebewang @goldsteinn @e-kud !

…91747 In llvm#91747, we change the SDNode from `X86ISD::SUB` (FROM) to `X86ISD::CCMP` (TO) in the DAGCombine. The value type of X86ISD::SUB can be `i8, i32` while the value type of X86ISD::CCMP is `i32`. That means the `SDValue(FROM, 0)` is unused and may be removed. However, `transferDbgValues` assumes the value is not null, which is called by `ReplaceAllUsesWith(SDNode *, const SDValue *)`. So we need to check if the value has any use before calling the function. Note: We already have same check in `ReplaceAllUsesWith(SDNode *, SDNode *)`. This fix the error ``` SelectionDAG.cpp:10942: void llvm::SelectionDAG::transferDbgValues(llvm::SDValue, llvm::SDValue, unsigned int, unsigned int, bool): Assertion `FromNode && ToNode && "Can't modify dbg values"' failed. ``` for tests llvm/test/CodeGen/X86/apx/ccmp.ll llvm/test/CodeGen/X86/apx/ctest.ll in Release build when LLVM_ENABLE_ASSERTIONS is on.

s-barannikov · 2024-05-26T18:22:13Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

-      assert(N->getValueType(0) == RV.getValueType() &&
-             N->getNumValues() == 1 && "Type mismatch");


Yes, we need. After changing SUB to CCMP, neither of them satisfy.

This means that there is a problem in combining SUB to CCMP. The assert here is to catch such kind of errors.
See my other comment.

s-barannikov · 2024-05-26T18:24:30Z

llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -54759,6 +54923,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
  bool IsSub = X86ISD::SUB == N->getOpcode();
  unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;

+  if (IsSub && isOneConstant(N->getOperand(1)) && !N->hasAnyUseOfValue(0))
+    if (SDValue CMP = combineX86SubCmpForFlags(N, SDValue(N, 1), DAG, DCI, ST))
+      return CMP;


This should use DCI.CombineTo accepting two SDValues, the first one of which should be the first (unused) result of SUB. The SUB will be removed later on.

What does this DCI.CombineTo look like? I am trying replace the second result of SUB (old flag) with the result of CCMP (new flag). Not understand why you suggest the first parameter is the first (unused) result of SUB.

SUB has two results, CMP has one result, which corresponds to the second result of the SUB.
Something like return DCI.CombineTo(N, SDValue(N, 0), CMP) would make the removed assert not trigger.
But I see you've found another way of silencing it.

s-barannikov · 2024-05-26T18:27:26Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  // Check the only user of flag is `brcond ne`.
+  SDNode *BrCond = *Flag->uses().begin();
+  if (BrCond->getOpcode() != X86ISD::BRCOND)
+    return SDValue();


Instead of checking the users of the node, you should call this function from combineBrCond.

Is there such a limitation? Starting from the SUB/CMP seems more intuitive to me.

No, but it is the usual approach.

…lvm#91747 In llvm#91747, we changed the SDNode from `X86ISD::SUB` (FROM) to `X86ISD::CCMP` (TO) in the DAGCombine. The value type of `X86ISD::SUB` can be `i8, i32` while the value type of `X86ISD::CCMP` is i32. This breaks the assumption that the value type should match after the combine and triggers the error ``` SelectionDAG.cpp:10942: void llvm::SelectionDAG::transferDbgValues(llvm::SDValue, llvm::SDValue, unsigned int, unsigned int, bool): Assertion `FromNode && ToNode && "Can't modify dbg values"' failed. ``` when running tests llvm/test/CodeGen/X86/apx/ccmp.ll llvm/test/CodeGen/X86/apx/ctest.ll in Release build when LLVM_ENABLE_ASSERTIONS is on. In this patch, we fix it by creating a merged value.

…91747 (#93434) In #91747, we changed the SDNode from `X86ISD::SUB` (FROM) to `X86ISD::CCMP` (TO) in the DAGCombine. The value type of `X86ISD::SUB` can be `i8, i32` while the value type of `X86ISD::CCMP` is i32. This breaks the assumption that the value type should match after the combine and triggers the error ``` SelectionDAG.cpp:10942: void llvm::SelectionDAG::transferDbgValues(llvm::SDValue, llvm::SDValue, unsigned int, unsigned int, bool): Assertion `FromNode && ToNode && "Can't modify dbg values"' failed. ``` when running tests llvm/test/CodeGen/X86/apx/ccmp.ll llvm/test/CodeGen/X86/apx/ctest.ll in Release build when LLVM_ENABLE_ASSERTIONS is on. In this patch, we fix it by creating a merged value.

…orFlags, NFCI No update for non-APX tests. This patch resolves TODO in #91747.

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels May 10, 2024

KanRobert marked this pull request as draft May 10, 2024 14:50

KanRobert force-pushed the ccmp-solution2 branch from 538c360 to 6fa2c9c Compare May 10, 2024 18:18

KanRobert commented May 10, 2024

View reviewed changes

llvm/test/CodeGen/X86/apx/ccmp.ll Show resolved Hide resolved

KanRobert commented May 10, 2024

View reviewed changes

llvm/test/CodeGen/X86/apx/ccmp.ll Show resolved Hide resolved

KanRobert requested review from RKSimon, phoebewang and goldsteinn May 10, 2024 18:28

KanRobert mentioned this pull request May 10, 2024

[X86] Set up the framework for optimization of CCMP/CTEST #84603

Closed

KanRobert requested a review from e-kud May 12, 2024 03:19

This comment was marked as resolved.

Sign in to view

goldsteinn reviewed May 12, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed May 12, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelLowering.cpp Outdated Show resolved Hide resolved

e-kud reviewed May 13, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelLowering.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/X86/X86ISelLowering.cpp Outdated Show resolved Hide resolved

This comment was marked as resolved.

Sign in to view

KanRobert force-pushed the ccmp-solution2 branch from 23f1c88 to 6637ab5 Compare May 17, 2024 07:28

KanRobert marked this pull request as ready for review May 17, 2024 07:30

KanRobert added a commit that referenced this pull request May 17, 2024

[X86][CodeGen] Use switch-case for transform in X86DAGToDAGISel::Post…

98deeda

…processISelDAG, NFCI This is to simplify code for #91747

KanRobert force-pushed the ccmp-solution2 branch from 6637ab5 to 2210ff4 Compare May 17, 2024 12:12

KanRobert commented May 18, 2024

View reviewed changes

[X86][CodeGen] Support lowering for CCMP/CTEST + transform CTESTrr+AN…

5759bb6

…Drr/rm -> CTESTrr/CTESTmr

KanRobert force-pushed the ccmp-solution2 branch from 3242aac to 5759bb6 Compare May 18, 2024 14:50

goldsteinn reviewed May 18, 2024

View reviewed changes

KanRobert added 3 commits May 19, 2024 00:41

add more pattern for ctest

55121ff

rename function

9cfd8e1

simplify code

e598224

goldsteinn reviewed May 20, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelLowering.cpp Outdated Show resolved Hide resolved

support continuous ccmp/ctest

13cb41e

goldsteinn reviewed May 23, 2024

View reviewed changes

llvm/test/CodeGen/X86/apx/ctest.ll Show resolved Hide resolved

KanRobert added 2 commits May 24, 2024 08:29

add new flag

aa30fef

add more test

bef4b3b

goldsteinn reviewed May 24, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Show resolved Hide resolved

goldsteinn reviewed May 24, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed May 24, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelLowering.cpp Show resolved Hide resolved

KanRobert added 2 commits May 26, 2024 10:29

Remove variable NumOps

a619c95

correct comments

d5e565d

phoebewang reviewed May 26, 2024

View reviewed changes

KanRobert added 5 commits May 26, 2024 12:53

address review comment

3dd3f6d

change order for ISD

1ab3ba2

address review comment

d3412f8

add assertion

5aca1ab

address review comment

db14c94

phoebewang approved these changes May 26, 2024

View reviewed changes

KanRobert merged commit 331eb8a into llvm:main May 26, 2024
5 of 7 checks passed

KanRobert mentioned this pull request May 26, 2024

[SelectionDAG] Fix the assertion failure in Release build after #91747 #93416

Closed

s-barannikov reviewed May 26, 2024

View reviewed changes

KanRobert mentioned this pull request May 27, 2024

[SelectionDAG][X86] Fix the assertion failure in Release build after #91747 #93434

Merged

KanRobert added a commit that referenced this pull request May 27, 2024

[X86][DAGCombine] Remove the predicate hasCCMP() in combineX86SubCmpF…

5b14624

…orFlags, NFCI No update for non-APX tests. This patch resolves TODO in #91747.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[X86][CodeGen] Support lowering for CCMP/CTEST #91747

[X86][CodeGen] Support lowering for CCMP/CTEST #91747

KanRobert commented May 10, 2024 •

edited

Loading

llvmbot commented May 10, 2024 •

edited

Loading

github-actions bot commented May 10, 2024 •

edited

Loading

This comment was marked as resolved.

This comment was marked as resolved.

KanRobert May 18, 2024

goldsteinn commented May 24, 2024

phoebewang May 26, 2024

KanRobert May 26, 2024

s-barannikov May 26, 2024

phoebewang left a comment

KanRobert commented May 26, 2024

s-barannikov May 26, 2024

s-barannikov May 26, 2024

KanRobert May 27, 2024

s-barannikov May 27, 2024

s-barannikov May 26, 2024

KanRobert May 27, 2024

s-barannikov May 27, 2024

		assert(N->getValueType(0) == RV.getValueType() &&
		N->getNumValues() == 1 && "Type mismatch");

[X86][CodeGen] Support lowering for CCMP/CTEST #91747

[X86][CodeGen] Support lowering for CCMP/CTEST #91747

Conversation

KanRobert commented May 10, 2024 • edited Loading

llvmbot commented May 10, 2024 • edited Loading

github-actions bot commented May 10, 2024 • edited Loading

This comment was marked as resolved.

This comment was marked as resolved.

Choose a reason for hiding this comment

goldsteinn commented May 24, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

phoebewang left a comment

Choose a reason for hiding this comment

KanRobert commented May 26, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

KanRobert commented May 10, 2024 •

edited

Loading

llvmbot commented May 10, 2024 •

edited

Loading

github-actions bot commented May 10, 2024 •

edited

Loading