[GlobalIsel] Combine G_SELECT #74116

tschuett · 2023-12-01T17:40:20Z

At least AArch64 and X86 use flag setting instructions for the implementation. Inspired by visitSELECT, as the first step, this implements combines based on conditions def`d by logical binary operations aka chaining.

On Neoverse V2, sub and flagset subs have different costs and pipelines.

test plan: ninja check-llvm-codegen

At least AArch64 and X86 use flag setting instructions for the implementation. Inspired by visitSELECT, as the first step, this implements combines based on conditions def`d by logical binary operations aka chaining. On Neoverse V2, sub and flagset subs have different costs and pipelines. test plan: ninja check-llvm-codegen

llvmbot · 2023-12-01T17:40:55Z

@llvm/pr-subscribers-backend-amdgpu

Author: Thorsten Schütt (tschuett)

Changes

At least AArch64 and X86 use flag setting instructions for the implementation. Inspired by visitSELECT, as the first step, this implements combines based on conditions def`d by logical binary operations aka chaining.

On Neoverse V2, sub and flagset subs have different costs and pipelines.

test plan: ninja check-llvm-codegen

Patch is 48.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74116.diff

8 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+7)
(modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+41)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+8-1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+75)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir (+95)
(modified) llvm/test/CodeGen/AArch64/arm64-ccmp.ll (+401-214)
(modified) llvm/test/CodeGen/AArch64/dag-combine-select.ll (+12-32)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll (+10-4)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ba72a3b71ffd70b..e938e2267fdd6b7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -812,6 +812,9 @@ class CombinerHelper {
   // Given a binop \p MI, commute operands 1 and 2.
   void applyCommuteBinOpOperands(MachineInstr &MI);
 
+  // combine selects.
+  bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -902,6 +905,10 @@ class CombinerHelper {
   /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0
   bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal,
                              Register FalseVal, BuildFnTy &MatchInfo);
+
+  /// Try to combine selects where the condition is def'd by logical binary
+  /// operators.
+  bool tryCombineSelectLogical(GSelect *Select, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6ab1d4550c51ca9..9641ad6bfd919d3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -558,6 +558,47 @@ class GVecReduce : public GenericMachineInstr {
   }
 };
 
+/// Represents a logical binary operator.
+class GLogicalBinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getOperand(1).getReg(); }
+  Register getRHSReg() const { return getOperand(2).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_AND:
+    case TargetOpcode::G_OR:
+    case TargetOpcode::G_XOR:
+      return true;
+    default:
+      return false;
+    };
+  }
+};
+
+/// Represents a G_AND.
+class GAnd : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_AND;
+  }
+};
+
+/// Represents a G_OR.
+class GOr : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_OR;
+  }
+};
+
+/// Represents a G_XOR.
+class GXor : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_XOR;
+  }
+};
 
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9a84ab80157f35f..7d27482c319d6fa 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1236,6 +1236,13 @@ def select_to_minmax: GICombineRule<
          [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def match_selects : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SELECT):$root,
+        [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1309,7 +1316,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
-    fsub_to_fneg, commute_constant_to_rhs]>;
+    fsub_to_fneg, commute_constant_to_rhs, match_selects]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c2a7c2d01188129..d83215dcc83b405 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6224,3 +6224,78 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
   MI.getOperand(2).setReg(LHSReg);
   Observer.changedInstr(MI);
 }
+
+bool CombinerHelper::tryCombineSelectLogical(GSelect *Select,
+                                             BuildFnTy &MatchInfo) {
+  MachineInstr *CondDef = MRI.getVRegDef(Select->getCondReg());
+  LLT DstTy = MRI.getType(Select->getReg(0));
+  uint32_t Flags = Select->getFlags();
+  Register DstReg = Select->getReg(0);
+
+  if (isa<GLogicalBinOp>(CondDef)) {
+    // The chaining might look like a pessimization.
+    // In fact, it is an optimization. One instruction def`ing
+    // the condiiton is
+    // removed and the conditions of the selects get simplified.
+    // In the next iteration, the combiner has to do less work to
+    // further simplify them.
+    if (isa<GAnd>(CondDef)) {
+      GAnd *And = cast<GAnd>(CondDef);
+      // transform: select (and (Cond0, Cond1)), X, Y
+      // to:        select Cond0, (select Cond1, X, Y), Y
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, And->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, And->getLHSReg(), Inner, Select->getFalseReg(),
+                      Flags);
+      };
+      return true;
+    } else if (isa<GOr>(CondDef)) {
+      GOr *Or = cast<GOr>(CondDef);
+      // transform: select (or (Cond0, Cond1)), X, Y
+      // to:        select Cond0, X, (select Cond1, X, Y)
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, Or->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, Or->getLHSReg(), Select->getTrueReg(), Inner,
+                      Flags);
+      };
+      return true;
+    } else if (isa<GXor>(CondDef)) {
+      GXor *Xor = cast<GXor>(CondDef);
+      auto IConstant =
+          getIConstantVRegValWithLookThrough(Xor->getRHSReg(), MRI);
+      if (IConstant && IConstant->Value == 1) {
+        // transform: select (xor Cond0, 1), X, Y
+        // to:        select Cond0 Y, X
+        // a xor 1 reads like not a.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.setInstrAndDebugLoc(*Select);
+          B.buildSelect(DstReg, Xor->getLHSReg(), Select->getFalseReg(),
+                        Select->getTrueReg(), Flags);
+        };
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  GSelect *Select = cast<GSelect>(&MI);
+
+  // FIXME: support vector conditions
+  if (MRI.getType(Select->getCondReg()).isVector())
+    return false;
+
+  // combine selects where the condition is def'd by logical binary operations.
+  if (tryCombineSelectLogical(Select, MatchInfo))
+    return true;
+
+  return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 81d38a5b080470d..a60148ffe57162a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -298,3 +298,98 @@ body:             |
     %ext:_(s32) = G_ANYEXT %sel
     $w0 = COPY %ext(s32)
 ...
+---
+name:            and_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: and_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), [[SELECT]], %f
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_AND %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            or_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: or_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %t, [[SELECT]]
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_OR %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            xor_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: xor_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %f, %t
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %x:_(s1) = G_TRUNC %0
+    %t:_(s1) = G_TRUNC %1
+    %f:_(s1) = G_TRUNC %2
+    %one:_(s1) = G_CONSTANT i1 1
+    %cond:_(s1) = G_XOR %x, %one
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 821f6e403a27133..7bf5ac22d0ae3b0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -14,7 +14,7 @@ define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB0_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -42,7 +42,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB1_2: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_different:
@@ -55,7 +55,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB1_2: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sle i32 %a, 5
@@ -88,7 +88,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB2_3: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_flagclobber:
@@ -106,7 +106,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB2_3: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -144,7 +144,7 @@ define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB3_3: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -178,13 +178,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    ccmp w8, #16, #0, ge
 ; SDISEL-NEXT:    b.le LBB4_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB4_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: speculate_division:
@@ -194,13 +194,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    ccmp w8, #17, #0, gt
 ; GISEL-NEXT:    b.lt LBB4_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB4_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -230,13 +230,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; SDISEL-NEXT:    fccmp s0, s1, #8, ge
 ; SDISEL-NEXT:    b.ge LBB5_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB5_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_fcmp:
@@ -248,13 +248,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; GISEL-NEXT:    fccmp s0, s1, #8, gt
 ; GISEL-NEXT:    b.ge LBB5_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB5_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -318,7 +318,7 @@ define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB7_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -346,13 +346,13 @@ define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    cmp w1, #32
 ; CHECK-NEXT:    b.eq LBB8_3
 ; CHECK-NEXT:  ; %bb.2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB8_3: ; %if.then
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -380,7 +380,7 @@ define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB9_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -408,7 +408,7 @@ define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB10_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -466,10 +466,11 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_and:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #0, ne
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x8, x3, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -488,10 +489,11 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_or:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x2, x8, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -510,9 +512,10 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
 ;
 ; GISEL-LABEL: select_or_float:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
+; GISEL-NEXT:    fcsel s1, s0, s1, ne
+; GISEL-NEXT:    cmp w0, w1
 ; GISEL-NEXT:    fcsel s0, s0, s1, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
@@ -528,17 +531,19 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 ; SDISEL-NEXT:    cmp x0, #2
 ; SDISEL-NEXT:    ccmp x0, #4, #4, ne
 ; SDISEL-NEXT:    ccmp x1, #0, #0, eq
-; SDISEL-NEXT:    mov w8, #1
+; SDISEL-NEXT:    mov w8, #1 ; =0x1
 ; SDISEL-NEXT:    cinc x0, x8, eq
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: gccbug:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #2
+; GISEL-NEXT:    mov w8, #2 ; =0x2
+; GISEL-NEXT:    cmp x1, #0
+; GISEL-NEXT:    csinc x8, x8, xzr, eq
 ; GISEL-NEXT:    cmp x0, #2
-; GISEL-NEXT:    ccmp x0, #4, #4, ne
-; GISEL-NEXT:    ccmp x1, #0, #0, eq
-; GISEL-NEXT:    csinc x0, x8, xzr, eq
+; GISEL-NEXT:    csinc x9, x8, xzr, eq
+; GISEL-NEXT:    cmp x0, #4
+; GISEL-NEXT:    csel x0, x8, x9, eq
 ; GISEL-NEXT:    ret
   %cmp0 = icmp eq i64 %x1, 0
   %cmp1 = icmp eq i64 %x0, 2
@@ -552,14 +557,26 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 }
 
 define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
-; CHECK-LABEL: select_ororand:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w3, #4
-; CHECK-NEXT:    ccmp w2, #2, #0, gt
-; CHECK-NEXT:    ccmp w1, #13, #2, ge
-; CHECK-NEXT:    ccmp w0, #0, #4, ls
-; CHECK-NEXT:    csel w0, w3, wzr, eq
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_ororand:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w3, #4
+; SDISEL-NEXT:    ccmp w2, #2, #0, gt
+; SDISEL-NEXT:    ccmp w1, #13, #2, ge
+; SDISEL-NEXT:    ccmp w0, #0, #4, ls
+; SDISEL-NEXT:    csel w0, w3, wzr, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_ororand:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w3, #4
+; GISEL-NEXT:    csel w8, w3, wzr, gt
+; GISEL-NEXT:    cmp w2, #2
+; GISEL-NEXT:    csel w8, w8, wzr, lt
+; GISEL-NEXT:    cmp w1, #13
+; GISEL-NEXT:    cse...
[truncated]

llvmbot · 2023-12-01T17:40:55Z

@llvm/pr-subscribers-llvm-globalisel

Author: Thorsten Schütt (tschuett)

Changes

At least AArch64 and X86 use flag setting instructions for the implementation. Inspired by visitSELECT, as the first step, this implements combines based on conditions def`d by logical binary operations aka chaining.

On Neoverse V2, sub and flagset subs have different costs and pipelines.

test plan: ninja check-llvm-codegen

Patch is 48.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74116.diff

8 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+7)
(modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+41)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+8-1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+75)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir (+95)
(modified) llvm/test/CodeGen/AArch64/arm64-ccmp.ll (+401-214)
(modified) llvm/test/CodeGen/AArch64/dag-combine-select.ll (+12-32)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll (+10-4)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ba72a3b71ffd70b..e938e2267fdd6b7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -812,6 +812,9 @@ class CombinerHelper {
   // Given a binop \p MI, commute operands 1 and 2.
   void applyCommuteBinOpOperands(MachineInstr &MI);
 
+  // combine selects.
+  bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -902,6 +905,10 @@ class CombinerHelper {
   /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0
   bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal,
                              Register FalseVal, BuildFnTy &MatchInfo);
+
+  /// Try to combine selects where the condition is def'd by logical binary
+  /// operators.
+  bool tryCombineSelectLogical(GSelect *Select, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6ab1d4550c51ca9..9641ad6bfd919d3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -558,6 +558,47 @@ class GVecReduce : public GenericMachineInstr {
   }
 };
 
+/// Represents a logical binary operator.
+class GLogicalBinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getOperand(1).getReg(); }
+  Register getRHSReg() const { return getOperand(2).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_AND:
+    case TargetOpcode::G_OR:
+    case TargetOpcode::G_XOR:
+      return true;
+    default:
+      return false;
+    };
+  }
+};
+
+/// Represents a G_AND.
+class GAnd : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_AND;
+  }
+};
+
+/// Represents a G_OR.
+class GOr : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_OR;
+  }
+};
+
+/// Represents a G_XOR.
+class GXor : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_XOR;
+  }
+};
 
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9a84ab80157f35f..7d27482c319d6fa 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1236,6 +1236,13 @@ def select_to_minmax: GICombineRule<
          [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def match_selects : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SELECT):$root,
+        [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1309,7 +1316,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
-    fsub_to_fneg, commute_constant_to_rhs]>;
+    fsub_to_fneg, commute_constant_to_rhs, match_selects]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c2a7c2d01188129..d83215dcc83b405 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6224,3 +6224,78 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
   MI.getOperand(2).setReg(LHSReg);
   Observer.changedInstr(MI);
 }
+
+bool CombinerHelper::tryCombineSelectLogical(GSelect *Select,
+                                             BuildFnTy &MatchInfo) {
+  MachineInstr *CondDef = MRI.getVRegDef(Select->getCondReg());
+  LLT DstTy = MRI.getType(Select->getReg(0));
+  uint32_t Flags = Select->getFlags();
+  Register DstReg = Select->getReg(0);
+
+  if (isa<GLogicalBinOp>(CondDef)) {
+    // The chaining might look like a pessimization.
+    // In fact, it is an optimization. One instruction def`ing
+    // the condiiton is
+    // removed and the conditions of the selects get simplified.
+    // In the next iteration, the combiner has to do less work to
+    // further simplify them.
+    if (isa<GAnd>(CondDef)) {
+      GAnd *And = cast<GAnd>(CondDef);
+      // transform: select (and (Cond0, Cond1)), X, Y
+      // to:        select Cond0, (select Cond1, X, Y), Y
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, And->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, And->getLHSReg(), Inner, Select->getFalseReg(),
+                      Flags);
+      };
+      return true;
+    } else if (isa<GOr>(CondDef)) {
+      GOr *Or = cast<GOr>(CondDef);
+      // transform: select (or (Cond0, Cond1)), X, Y
+      // to:        select Cond0, X, (select Cond1, X, Y)
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, Or->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, Or->getLHSReg(), Select->getTrueReg(), Inner,
+                      Flags);
+      };
+      return true;
+    } else if (isa<GXor>(CondDef)) {
+      GXor *Xor = cast<GXor>(CondDef);
+      auto IConstant =
+          getIConstantVRegValWithLookThrough(Xor->getRHSReg(), MRI);
+      if (IConstant && IConstant->Value == 1) {
+        // transform: select (xor Cond0, 1), X, Y
+        // to:        select Cond0 Y, X
+        // a xor 1 reads like not a.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.setInstrAndDebugLoc(*Select);
+          B.buildSelect(DstReg, Xor->getLHSReg(), Select->getFalseReg(),
+                        Select->getTrueReg(), Flags);
+        };
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  GSelect *Select = cast<GSelect>(&MI);
+
+  // FIXME: support vector conditions
+  if (MRI.getType(Select->getCondReg()).isVector())
+    return false;
+
+  // combine selects where the condition is def'd by logical binary operations.
+  if (tryCombineSelectLogical(Select, MatchInfo))
+    return true;
+
+  return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 81d38a5b080470d..a60148ffe57162a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -298,3 +298,98 @@ body:             |
     %ext:_(s32) = G_ANYEXT %sel
     $w0 = COPY %ext(s32)
 ...
+---
+name:            and_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: and_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), [[SELECT]], %f
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_AND %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            or_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: or_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %t, [[SELECT]]
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_OR %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            xor_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: xor_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %f, %t
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %x:_(s1) = G_TRUNC %0
+    %t:_(s1) = G_TRUNC %1
+    %f:_(s1) = G_TRUNC %2
+    %one:_(s1) = G_CONSTANT i1 1
+    %cond:_(s1) = G_XOR %x, %one
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 821f6e403a27133..7bf5ac22d0ae3b0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -14,7 +14,7 @@ define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB0_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -42,7 +42,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB1_2: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_different:
@@ -55,7 +55,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB1_2: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sle i32 %a, 5
@@ -88,7 +88,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB2_3: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_flagclobber:
@@ -106,7 +106,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB2_3: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -144,7 +144,7 @@ define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB3_3: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -178,13 +178,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    ccmp w8, #16, #0, ge
 ; SDISEL-NEXT:    b.le LBB4_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB4_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: speculate_division:
@@ -194,13 +194,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    ccmp w8, #17, #0, gt
 ; GISEL-NEXT:    b.lt LBB4_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB4_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -230,13 +230,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; SDISEL-NEXT:    fccmp s0, s1, #8, ge
 ; SDISEL-NEXT:    b.ge LBB5_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB5_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_fcmp:
@@ -248,13 +248,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; GISEL-NEXT:    fccmp s0, s1, #8, gt
 ; GISEL-NEXT:    b.ge LBB5_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB5_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -318,7 +318,7 @@ define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB7_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -346,13 +346,13 @@ define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    cmp w1, #32
 ; CHECK-NEXT:    b.eq LBB8_3
 ; CHECK-NEXT:  ; %bb.2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB8_3: ; %if.then
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -380,7 +380,7 @@ define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB9_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -408,7 +408,7 @@ define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB10_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -466,10 +466,11 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_and:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #0, ne
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x8, x3, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -488,10 +489,11 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_or:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x2, x8, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -510,9 +512,10 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
 ;
 ; GISEL-LABEL: select_or_float:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
+; GISEL-NEXT:    fcsel s1, s0, s1, ne
+; GISEL-NEXT:    cmp w0, w1
 ; GISEL-NEXT:    fcsel s0, s0, s1, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
@@ -528,17 +531,19 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 ; SDISEL-NEXT:    cmp x0, #2
 ; SDISEL-NEXT:    ccmp x0, #4, #4, ne
 ; SDISEL-NEXT:    ccmp x1, #0, #0, eq
-; SDISEL-NEXT:    mov w8, #1
+; SDISEL-NEXT:    mov w8, #1 ; =0x1
 ; SDISEL-NEXT:    cinc x0, x8, eq
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: gccbug:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #2
+; GISEL-NEXT:    mov w8, #2 ; =0x2
+; GISEL-NEXT:    cmp x1, #0
+; GISEL-NEXT:    csinc x8, x8, xzr, eq
 ; GISEL-NEXT:    cmp x0, #2
-; GISEL-NEXT:    ccmp x0, #4, #4, ne
-; GISEL-NEXT:    ccmp x1, #0, #0, eq
-; GISEL-NEXT:    csinc x0, x8, xzr, eq
+; GISEL-NEXT:    csinc x9, x8, xzr, eq
+; GISEL-NEXT:    cmp x0, #4
+; GISEL-NEXT:    csel x0, x8, x9, eq
 ; GISEL-NEXT:    ret
   %cmp0 = icmp eq i64 %x1, 0
   %cmp1 = icmp eq i64 %x0, 2
@@ -552,14 +557,26 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 }
 
 define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
-; CHECK-LABEL: select_ororand:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w3, #4
-; CHECK-NEXT:    ccmp w2, #2, #0, gt
-; CHECK-NEXT:    ccmp w1, #13, #2, ge
-; CHECK-NEXT:    ccmp w0, #0, #4, ls
-; CHECK-NEXT:    csel w0, w3, wzr, eq
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_ororand:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w3, #4
+; SDISEL-NEXT:    ccmp w2, #2, #0, gt
+; SDISEL-NEXT:    ccmp w1, #13, #2, ge
+; SDISEL-NEXT:    ccmp w0, #0, #4, ls
+; SDISEL-NEXT:    csel w0, w3, wzr, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_ororand:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w3, #4
+; GISEL-NEXT:    csel w8, w3, wzr, gt
+; GISEL-NEXT:    cmp w2, #2
+; GISEL-NEXT:    csel w8, w8, wzr, lt
+; GISEL-NEXT:    cmp w1, #13
+; GISEL-NEXT:    cse...
[truncated]

llvmbot · 2023-12-01T17:40:55Z

@llvm/pr-subscribers-backend-aarch64

Author: Thorsten Schütt (tschuett)

Changes

At least AArch64 and X86 use flag setting instructions for the implementation. Inspired by visitSELECT, as the first step, this implements combines based on conditions def`d by logical binary operations aka chaining.

On Neoverse V2, sub and flagset subs have different costs and pipelines.

test plan: ninja check-llvm-codegen

Patch is 48.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74116.diff

8 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+7)
(modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+41)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+8-1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+75)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir (+95)
(modified) llvm/test/CodeGen/AArch64/arm64-ccmp.ll (+401-214)
(modified) llvm/test/CodeGen/AArch64/dag-combine-select.ll (+12-32)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll (+10-4)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ba72a3b71ffd70b..e938e2267fdd6b7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -812,6 +812,9 @@ class CombinerHelper {
   // Given a binop \p MI, commute operands 1 and 2.
   void applyCommuteBinOpOperands(MachineInstr &MI);
 
+  // combine selects.
+  bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -902,6 +905,10 @@ class CombinerHelper {
   /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0
   bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal,
                              Register FalseVal, BuildFnTy &MatchInfo);
+
+  /// Try to combine selects where the condition is def'd by logical binary
+  /// operators.
+  bool tryCombineSelectLogical(GSelect *Select, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6ab1d4550c51ca9..9641ad6bfd919d3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -558,6 +558,47 @@ class GVecReduce : public GenericMachineInstr {
   }
 };
 
+/// Represents a logical binary operator.
+class GLogicalBinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getOperand(1).getReg(); }
+  Register getRHSReg() const { return getOperand(2).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_AND:
+    case TargetOpcode::G_OR:
+    case TargetOpcode::G_XOR:
+      return true;
+    default:
+      return false;
+    };
+  }
+};
+
+/// Represents a G_AND.
+class GAnd : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_AND;
+  }
+};
+
+/// Represents a G_OR.
+class GOr : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_OR;
+  }
+};
+
+/// Represents a G_XOR.
+class GXor : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_XOR;
+  }
+};
 
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9a84ab80157f35f..7d27482c319d6fa 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1236,6 +1236,13 @@ def select_to_minmax: GICombineRule<
          [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def match_selects : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SELECT):$root,
+        [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1309,7 +1316,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
-    fsub_to_fneg, commute_constant_to_rhs]>;
+    fsub_to_fneg, commute_constant_to_rhs, match_selects]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c2a7c2d01188129..d83215dcc83b405 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6224,3 +6224,78 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
   MI.getOperand(2).setReg(LHSReg);
   Observer.changedInstr(MI);
 }
+
+bool CombinerHelper::tryCombineSelectLogical(GSelect *Select,
+                                             BuildFnTy &MatchInfo) {
+  MachineInstr *CondDef = MRI.getVRegDef(Select->getCondReg());
+  LLT DstTy = MRI.getType(Select->getReg(0));
+  uint32_t Flags = Select->getFlags();
+  Register DstReg = Select->getReg(0);
+
+  if (isa<GLogicalBinOp>(CondDef)) {
+    // The chaining might look like a pessimization.
+    // In fact, it is an optimization. One instruction def`ing
+    // the condiiton is
+    // removed and the conditions of the selects get simplified.
+    // In the next iteration, the combiner has to do less work to
+    // further simplify them.
+    if (isa<GAnd>(CondDef)) {
+      GAnd *And = cast<GAnd>(CondDef);
+      // transform: select (and (Cond0, Cond1)), X, Y
+      // to:        select Cond0, (select Cond1, X, Y), Y
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, And->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, And->getLHSReg(), Inner, Select->getFalseReg(),
+                      Flags);
+      };
+      return true;
+    } else if (isa<GOr>(CondDef)) {
+      GOr *Or = cast<GOr>(CondDef);
+      // transform: select (or (Cond0, Cond1)), X, Y
+      // to:        select Cond0, X, (select Cond1, X, Y)
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, Or->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, Or->getLHSReg(), Select->getTrueReg(), Inner,
+                      Flags);
+      };
+      return true;
+    } else if (isa<GXor>(CondDef)) {
+      GXor *Xor = cast<GXor>(CondDef);
+      auto IConstant =
+          getIConstantVRegValWithLookThrough(Xor->getRHSReg(), MRI);
+      if (IConstant && IConstant->Value == 1) {
+        // transform: select (xor Cond0, 1), X, Y
+        // to:        select Cond0 Y, X
+        // a xor 1 reads like not a.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.setInstrAndDebugLoc(*Select);
+          B.buildSelect(DstReg, Xor->getLHSReg(), Select->getFalseReg(),
+                        Select->getTrueReg(), Flags);
+        };
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  GSelect *Select = cast<GSelect>(&MI);
+
+  // FIXME: support vector conditions
+  if (MRI.getType(Select->getCondReg()).isVector())
+    return false;
+
+  // combine selects where the condition is def'd by logical binary operations.
+  if (tryCombineSelectLogical(Select, MatchInfo))
+    return true;
+
+  return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 81d38a5b080470d..a60148ffe57162a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -298,3 +298,98 @@ body:             |
     %ext:_(s32) = G_ANYEXT %sel
     $w0 = COPY %ext(s32)
 ...
+---
+name:            and_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: and_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), [[SELECT]], %f
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_AND %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            or_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: or_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %t, [[SELECT]]
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_OR %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            xor_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: xor_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %f, %t
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %x:_(s1) = G_TRUNC %0
+    %t:_(s1) = G_TRUNC %1
+    %f:_(s1) = G_TRUNC %2
+    %one:_(s1) = G_CONSTANT i1 1
+    %cond:_(s1) = G_XOR %x, %one
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 821f6e403a27133..7bf5ac22d0ae3b0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -14,7 +14,7 @@ define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB0_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -42,7 +42,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB1_2: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_different:
@@ -55,7 +55,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB1_2: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sle i32 %a, 5
@@ -88,7 +88,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB2_3: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_flagclobber:
@@ -106,7 +106,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB2_3: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -144,7 +144,7 @@ define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB3_3: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -178,13 +178,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    ccmp w8, #16, #0, ge
 ; SDISEL-NEXT:    b.le LBB4_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB4_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: speculate_division:
@@ -194,13 +194,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    ccmp w8, #17, #0, gt
 ; GISEL-NEXT:    b.lt LBB4_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB4_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -230,13 +230,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; SDISEL-NEXT:    fccmp s0, s1, #8, ge
 ; SDISEL-NEXT:    b.ge LBB5_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB5_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_fcmp:
@@ -248,13 +248,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; GISEL-NEXT:    fccmp s0, s1, #8, gt
 ; GISEL-NEXT:    b.ge LBB5_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB5_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -318,7 +318,7 @@ define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB7_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -346,13 +346,13 @@ define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    cmp w1, #32
 ; CHECK-NEXT:    b.eq LBB8_3
 ; CHECK-NEXT:  ; %bb.2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB8_3: ; %if.then
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -380,7 +380,7 @@ define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB9_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -408,7 +408,7 @@ define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB10_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -466,10 +466,11 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_and:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #0, ne
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x8, x3, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -488,10 +489,11 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_or:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x2, x8, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -510,9 +512,10 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
 ;
 ; GISEL-LABEL: select_or_float:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
+; GISEL-NEXT:    fcsel s1, s0, s1, ne
+; GISEL-NEXT:    cmp w0, w1
 ; GISEL-NEXT:    fcsel s0, s0, s1, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
@@ -528,17 +531,19 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 ; SDISEL-NEXT:    cmp x0, #2
 ; SDISEL-NEXT:    ccmp x0, #4, #4, ne
 ; SDISEL-NEXT:    ccmp x1, #0, #0, eq
-; SDISEL-NEXT:    mov w8, #1
+; SDISEL-NEXT:    mov w8, #1 ; =0x1
 ; SDISEL-NEXT:    cinc x0, x8, eq
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: gccbug:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #2
+; GISEL-NEXT:    mov w8, #2 ; =0x2
+; GISEL-NEXT:    cmp x1, #0
+; GISEL-NEXT:    csinc x8, x8, xzr, eq
 ; GISEL-NEXT:    cmp x0, #2
-; GISEL-NEXT:    ccmp x0, #4, #4, ne
-; GISEL-NEXT:    ccmp x1, #0, #0, eq
-; GISEL-NEXT:    csinc x0, x8, xzr, eq
+; GISEL-NEXT:    csinc x9, x8, xzr, eq
+; GISEL-NEXT:    cmp x0, #4
+; GISEL-NEXT:    csel x0, x8, x9, eq
 ; GISEL-NEXT:    ret
   %cmp0 = icmp eq i64 %x1, 0
   %cmp1 = icmp eq i64 %x0, 2
@@ -552,14 +557,26 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 }
 
 define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
-; CHECK-LABEL: select_ororand:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w3, #4
-; CHECK-NEXT:    ccmp w2, #2, #0, gt
-; CHECK-NEXT:    ccmp w1, #13, #2, ge
-; CHECK-NEXT:    ccmp w0, #0, #4, ls
-; CHECK-NEXT:    csel w0, w3, wzr, eq
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_ororand:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w3, #4
+; SDISEL-NEXT:    ccmp w2, #2, #0, gt
+; SDISEL-NEXT:    ccmp w1, #13, #2, ge
+; SDISEL-NEXT:    ccmp w0, #0, #4, ls
+; SDISEL-NEXT:    csel w0, w3, wzr, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_ororand:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w3, #4
+; GISEL-NEXT:    csel w8, w3, wzr, gt
+; GISEL-NEXT:    cmp w2, #2
+; GISEL-NEXT:    csel w8, w8, wzr, lt
+; GISEL-NEXT:    cmp w1, #13
+; GISEL-NEXT:    cse...
[truncated]

tschuett · 2023-12-01T19:48:16Z

llvm/test/CodeGen/AArch64/dag-combine-select.ll

+; CHECK-NEXT:    csel w10, w1, w8, eq
+; CHECK-NEXT:    str w8, [x9, :lo12:out]
+; CHECK-NEXT:    str w10, [x9, :lo12:out]
+; CHECK-NEXT:    ret


tschuett · 2023-12-01T19:48:48Z