[GlobalIsel] Combine ADDO #82927

tschuett · 2024-02-25T17:10:12Z

Perform the requested arithmetic and produce a carry output in addition to the normal result.

Clang has them as builtins (__builtin_add_overflow_p). The middle end has intrinsics for them (sadd_with_overflow).

AArch64: ADDS Add and set flags

On Neoverse V2, they run at half the throughput of basic arithmetic and have a limited set of pipelines.

llvmbot · 2024-02-25T17:10:43Z

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Thorsten Schütt (tschuett)

Changes

Perform the requested arithmetic and produce a carry output in addition to the normal result.

Clang has them as builtins (__builtin_add_overflow_p). The middle end has intrinsics for them (sadd_with_overflow).

AArch64: ADDS Add and set flags

On Neoverse V2, they run at half the throughput of basic arithmetic and have a limited set of pipelines.

Patch is 145.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82927.diff

13 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+6-6)
(modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+19)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+8-8)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+216-18)
(added) llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir (+94)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir (+1-1)
(modified) llvm/test/CodeGen/AArch64/overflow.ll (+38-40)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (+113-119)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+220-221)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+203-205)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+111-118)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (-8)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (-8)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 23728636498ba0..9e8fc5d635c50a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -696,10 +696,6 @@ class CombinerHelper {
   /// (G_*MULO x, 0) -> 0 + no carry out
   bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo);
 
-  /// Match:
-  /// (G_*ADDO x, 0) -> x + no carry out
-  bool matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo);
-
   /// Match:
   /// (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
   /// (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
@@ -810,12 +806,15 @@ class CombinerHelper {
   /// Combine selects.
   bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
 
-  /// Combine ands,
+  /// Combine ands.
   bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
 
-  /// Combine ors,
+  /// Combine ors.
   bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Combine addos.
+  bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -919,6 +918,7 @@ class CombinerHelper {
   bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
   bool isConstantSplatVector(Register Src, int64_t SplatValue,
                              bool AllowUndefs);
+  bool isConstantOrConstantVectorI(Register Src) const;
 
   std::optional<APInt> getConstantOrConstantSplatVector(Register Src);
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index f5a6528d10a973..6b03703192df91 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -359,6 +359,8 @@ class GBinOpCarryOut : public GenericMachineInstr {
   Register getCarryOutReg() const { return getReg(1); }
   MachineOperand &getLHS() { return getOperand(2); }
   MachineOperand &getRHS() { return getOperand(3); }
+  Register getLHSReg() const { return getOperand(2).getReg(); }
+  Register getRHSReg() const { return getOperand(3).getReg(); }
 
   static bool classof(const MachineInstr *MI) {
     switch (MI->getOpcode()) {
@@ -429,6 +431,23 @@ class GAddSubCarryOut : public GBinOpCarryOut {
   }
 };
 
+/// Represents overflowing add operations.
+/// G_UADDO, G_SADDO
+class GAddCarryOut : public GBinOpCarryOut {
+public:
+  bool isSigned() const { return getOpcode() == TargetOpcode::G_SADDO; }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_UADDO:
+    case TargetOpcode::G_SADDO:
+      return true;
+    default:
+      return false;
+    }
+  }
+};
+
 /// Represents overflowing add/sub operations that also consume a carry-in.
 /// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
 class GAddSubCarryInOut : public GAddSubCarryOut {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 17757ca3e41111..2ec19c68d20c42 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1059,12 +1059,6 @@ def mulo_by_0: GICombineRule<
          [{ return Helper.matchMulOBy0(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
-def addo_by_0: GICombineRule<
-  (defs root:$root, build_fn_matchinfo:$matchinfo),
-  (match (wip_match_opcode G_UADDO, G_SADDO):$root,
-         [{ return Helper.matchAddOBy0(*${root}, ${matchinfo}); }]),
-  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
-
 // Transform (uadde x, y, 0) -> (uaddo x, y)
 //           (sadde x, y, 0) -> (saddo x, y)
 //           (usube x, y, 0) -> (usubo x, y)
@@ -1260,6 +1254,12 @@ def match_ors : GICombineRule<
         [{ return Helper.matchOr(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
+def match_addos : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SADDO, G_UADDO):$root,
+        [{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 // Combines concat operations
 def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
 def combine_concat_vector : GICombineRule<
@@ -1295,7 +1295,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
 
 def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
                                      overlapping_and, mulo_by_2, mulo_by_0,
-                                     addo_by_0, adde_to_addo,
+                                     adde_to_addo,
                                      combine_minmax_nan]>;
 
 def known_bits_simplifications : GICombineGroup<[
@@ -1343,7 +1343,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, 
-    combine_concat_vector]>;
+    combine_concat_vector, match_addos]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e8a5c6fedc395a..f5ac67a642a6ad 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4936,24 +4936,6 @@ bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
   return true;
 }
 
-bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
-  // (G_*ADDO x, 0) -> x + no carry out
-  assert(MI.getOpcode() == TargetOpcode::G_UADDO ||
-         MI.getOpcode() == TargetOpcode::G_SADDO);
-  if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
-    return false;
-  Register Carry = MI.getOperand(1).getReg();
-  if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
-    return false;
-  Register Dst = MI.getOperand(0).getReg();
-  Register LHS = MI.getOperand(2).getReg();
-  MatchInfo = [=](MachineIRBuilder &B) {
-    B.buildCopy(Dst, LHS);
-    B.buildConstant(Carry, 0);
-  };
-  return true;
-}
-
 bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
   // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
   // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
@@ -6354,6 +6336,26 @@ CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
   return Value;
 }
 
+// FIXME G_SPLAT_VECTOR
+bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
+  auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+  if (IConstant)
+    return true;
+
+  GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+  if (!BuildVector)
+    return false;
+
+  unsigned NumSources = BuildVector->getNumSources();
+  for (unsigned I = 0; I < NumSources; ++I) {
+    std::optional<ValueAndVReg> IConstant =
+        getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+    if (!IConstant)
+      return false;
+  }
+  return true;
+}
+
 // TODO: use knownbits to determine zeros
 bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
                                               BuildFnTy &MatchInfo) {
@@ -6918,3 +6920,199 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
 
   return false;
 }
+
+bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
+
+  // Addo has no flags
+  Register Dst = Add->getReg(0);
+  Register Carry = Add->getReg(1);
+  Register LHS = Add->getLHSReg();
+  Register RHS = Add->getRHSReg();
+  bool IsSigned = Add->isSigned();
+  LLT DstTy = MRI.getType(Dst);
+  LLT CarryTy = MRI.getType(Carry);
+
+  // We want do fold the [u|s]addo.
+  if (!MRI.hasOneNonDBGUse(Dst))
+    return false;
+
+  // Fold addo, if the carry is dead -> add, undef.
+  if (MRI.use_nodbg_empty(Carry) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS);
+      B.buildUndef(Carry);
+    };
+    return true;
+  }
+
+  // We want do fold the [u|s]addo.
+  if (!MRI.hasOneNonDBGUse(Carry))
+    return false;
+
+  // Canonicalize constant to RHS.
+  if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
+    if (IsSigned) {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildSAddo(Dst, Carry, RHS, LHS);
+      };
+      return true;
+    } else {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildUAddo(Dst, Carry, RHS, LHS);
+      };
+      return true;
+    }
+  }
+
+  std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
+  std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
+
+  // Fold addo(c1, c2) -> c3, carry.
+  if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
+      isConstantLegalOrBeforeLegalizer(CarryTy)) {
+    // They must both have the same bitwidth. Otherwise APInt might
+    // assert. Pre legalization, they may have widely different bitwidths.
+    unsigned BitWidth =
+        std::max(MaybeLHS->getBitWidth(), MaybeRHS->getBitWidth());
+    bool Overflow;
+    APInt Result;
+    if (IsSigned) {
+      APInt LHS = MaybeLHS->sext(BitWidth);
+      APInt RHS = MaybeRHS->sext(BitWidth);
+      Result = LHS.sadd_ov(RHS, Overflow);
+    } else {
+      APInt LHS = MaybeLHS->zext(BitWidth);
+      APInt RHS = MaybeRHS->zext(BitWidth);
+      Result = LHS.uadd_ov(RHS, Overflow);
+    }
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildConstant(Dst, Result);
+      B.buildConstant(Carry, Overflow);
+    };
+    return true;
+  }
+
+  // Fold (addo x, 0) -> x, no borrow
+  if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildCopy(Dst, LHS);
+      B.buildConstant(Carry, 0);
+    };
+    return true;
+  }
+
+  // Given 2 constant operands whose sum does not overflow:
+  // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
+  // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
+  GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
+  if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
+      ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
+       (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
+    std::optional<APInt> MaybeAddRHS =
+        getConstantOrConstantSplatVector(AddLHS->getRHSReg());
+    if (MaybeAddRHS) {
+      unsigned BitWidth =
+          std::max(MaybeRHS->getBitWidth(), MaybeAddRHS->getBitWidth());
+      bool Overflow;
+      APInt NewC;
+      if (IsSigned) {
+        APInt LHS = MaybeRHS->sext(BitWidth);
+        APInt RHS = MaybeAddRHS->sext(BitWidth);
+        NewC = LHS.sadd_ov(RHS, Overflow);
+      } else {
+        APInt LHS = MaybeRHS->zext(BitWidth);
+        APInt RHS = MaybeAddRHS->zext(BitWidth);
+        NewC = LHS.uadd_ov(RHS, Overflow);
+      }
+      if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
+        if (IsSigned) {
+          MatchInfo = [=](MachineIRBuilder &B) {
+            auto ConstRHS = B.buildConstant(DstTy, NewC);
+            B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
+          };
+          return true;
+        } else {
+          MatchInfo = [=](MachineIRBuilder &B) {
+            auto ConstRHS = B.buildConstant(DstTy, NewC);
+            B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
+          };
+          return true;
+        }
+      }
+    }
+  };
+
+  // We try to combine uaddo to non-overflowing add.
+  if (!IsSigned && isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) &&
+      isConstantLegalOrBeforeLegalizer(DstTy)) {
+    ConstantRange CRLHS =
+        ConstantRange::fromKnownBits(KB->getKnownBits(LHS), false /*IsSigned*/);
+    ConstantRange CRRHS =
+        ConstantRange::fromKnownBits(KB->getKnownBits(RHS), false /*IsSigned*/);
+
+    switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
+    case ConstantRange::OverflowResult::MayOverflow:
+      return false;
+    case ConstantRange::OverflowResult::NeverOverflows: {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
+        B.buildConstant(Carry, 0);
+      };
+      return true;
+    }
+    case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+    case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildAdd(Dst, LHS, RHS);
+        B.buildConstant(Carry, 1);
+      };
+      return true;
+    }
+    };
+    return false;
+  };
+
+  // We try to combine saddo to non-overflowing add.
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
+      !isConstantLegalOrBeforeLegalizer(CarryTy))
+    return false;
+
+  // If LHS and RHS each have at least two sign bits, then there is no signed
+  // overflow.
+  if (KB->computeNumSignBits(LHS) > 1 && KB->computeNumSignBits(RHS) > 1) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+      B.buildConstant(Carry, 0);
+    };
+    return true;
+  }
+
+  ConstantRange CRLHS =
+      ConstantRange::fromKnownBits(KB->getKnownBits(LHS), true /*IsSigned*/);
+  ConstantRange CRRHS =
+      ConstantRange::fromKnownBits(KB->getKnownBits(RHS), true /*IsSigned*/);
+
+  switch (CRLHS.signedAddMayOverflow(CRRHS)) {
+  case ConstantRange::OverflowResult::MayOverflow:
+    return false;
+  case ConstantRange::OverflowResult::NeverOverflows: {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+      B.buildConstant(Carry, 0);
+    };
+    return true;
+  }
+  case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+  case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildAdd(Dst, LHS, RHS);
+      B.buildConstant(Carry, 1);
+    };
+    return true;
+  }
+  };
+
+  return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
new file mode 100644
index 00000000000000..2967230cea6174
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
@@ -0,0 +1,94 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+
+---
+name:            add_unused
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_unused
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $w0 = COPY %add(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %add:_(s32), %o:_(s1) = G_SADDO %0, %1
+    $w0 = COPY %add(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_canon
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_canon
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 10
+    ; CHECK-NEXT: %add:_(s32), %o:_(s1) = G_SADDO [[COPY]], %const
+    ; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s1)
+    ; CHECK-NEXT: $w0 = COPY %add(s32)
+    ; CHECK-NEXT: $w1 = COPY %o_wide(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %const:_(s32) = G_CONSTANT i32 10
+    %add:_(s32), %o:_(s1) = G_SADDO %const, %1
+    %o_wide:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %add(s32)
+    $w1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_const_fold
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_const_fold
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %add:_(s32) = G_CONSTANT i32 21
+    ; CHECK-NEXT: %o_wide:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY %add(s32)
+    ; CHECK-NEXT: $w1 = COPY %o_wide(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %const:_(s32) = G_CONSTANT i32 10
+    %const1:_(s32) = G_CONSTANT i32 11
+    %add:_(s32), %o:_(s1) = G_UADDO %const, %const1
+    %o_wide:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %add(s32)
+    $w1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_add_zero
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_add_zero
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w1 = COPY [[C]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %const:_(s32) = G_CONSTANT i32 10
+    %addl:_(s32) = nsw G_ADD  %2, %const
+    %const1:_(s32) = G_CONSTANT i32 -10
+    %add:_(s32), %o:_(s1) = G_SADDO %addl, %const1
+    %o_wide:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %add(s32)
+    $w1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir
index 94f56e5650b22f..9483cbf06f4057 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-addo-zero.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="addo_by_0" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="match_addos" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
 # REQUIRES: asserts
 
 # (G_*ADDO x, 0) -> x + no carry
diff --git a/llvm/test/CodeGen/AArch64/overflow.ll b/llvm/test/CodeGen/AArch64/overflow.ll
index 444aaeb0f3fe75..1fd60c03097906 100644
--- a/llvm/test/CodeGen/AArch64/overflow.ll
+++ b/llvm/test/CodeGen/AArch64/overflow.ll
@@ -19,20 +19,12 @@ entry:
 }
 
 define zeroext i1 @saddo1.i32.fold(i32 %v1, i32 %v2, ptr %res) {
-; SDAG-LABEL: saddo1.i32.fold:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #20 // =0x14
-; SDAG-NEXT:    mov w0, wzr
-; SDAG-NEXT:    str w8, [x2]
-; SDAG-NEXT:    ret
-;
-; GISEL-LABEL: saddo1.i32.fold:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    mov w8, #9 // =0x9
-; GISEL-NEXT:    adds w8, w8, #11
-; GISEL-NEXT:    cset w0, vs
-; GISEL-NEXT:    str w8, [x2]
-; GISEL-NEXT:    ret
+; CHECK-LABEL: saddo1.i32.fold:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #20 // =0x14
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    str w8, [x2]
+; CHECK-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 9, i32 11)
   %val = extractvalue {i32, i1} %t, 0
@@ -123,18 +115,11 @@ entry:
 }
 
 define zeroext i1 @saddo.canon.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) {
-; SDAG-LABEL: saddo.canon.i32:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w0, wzr
-; SDAG-NEXT:    str w4, [x5]
-; SDAG-NEXT:    ret
-;
-; GISEL-LABEL: saddo.canon.i32:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    adds w8, wzr, w4
-; GISEL-NEXT:    cset w0, vs
-; GISEL-NEXT:    str w8, [x5]
-; GISEL-NEXT:    ret
+; CHECK-LABEL: saddo.canon.i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    str w4, [x5]
+; CHECK-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 0, i32 %v5)
   ...
[truncated]

tschuett · 2024-02-25T17:12:34Z

For @uaddo.select.i64 known bits seems to fail.

tschuett · 2024-02-25T17:17:00Z

For AMDGPU, there seems be a lot of noise and some size improvements.

tschuett · 2024-02-25T17:18:02Z

For AArch64, when the condition register of scalar select is def'd by an overflow op, then we could select less instructions.

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

s-barannikov · 2024-02-25T17:39:59Z

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

@@ -429,6 +431,23 @@ class GAddSubCarryOut : public GBinOpCarryOut {
  }
 };

+/// Represents overflowing add operations.
+/// G_UADDO, G_SADDO
+class GAddCarryOut : public GBinOpCarryOut {


There is an odd interference with the more general GAddSubCarryOut. Do you really need this class?

The common pattern is to assert that only the expected opcode is in MI. I use cast<GAddCarryOut>. I don't want unnoticed sub to come in.

GAddSubCarryOut has a isSub() method for that purpose.

s-barannikov · 2024-02-25T17:47:18Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+  }
+
+  // We want do fold the [u|s]addo.
+  if (!MRI.hasOneNonDBGUse(Carry))


It is not obvious why multiple uses prevent folding / make it unprofitable. Could you clarify the comment?
Same for Dst above.

Sure will do. If we have more than one use, then we are generating new instructions and keep the old ones.

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

s-barannikov · 2024-02-25T18:06:32Z

llvm/include/llvm/Target/GlobalISel/Combine.td

+  (match (wip_match_opcode G_SADDO, G_UADDO):$root,
+        [{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+


I don't know about the current direction, but if we're going to tablegen as much as possible, then we would need separate rules for each of the transformations (constant folding / swapping constant to RHS / replacing with G_ADD etc.).

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

arsenm · 2024-02-26T14:08:51Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+  }
+
+  std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
+  std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);


early exit after RHS, then LHS

We can still try the known bits optimizations, even when both are std::nullopt.

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir

tschuett · 2024-02-26T14:46:03Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

@@ -6918,3 +6920,200 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {

  return false;
 }
+
+bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  GAddCarryOut *Add = cast<GAddCarryOut>(&MI);


Replacing this line with:

GAddSubCarryOut *Add = cast<GAddSubCarryOut>(&MI);

seems dangerous and defeats the purpose of the assert in the cast .

arsenm · 2024-02-28T05:45:36Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+  // Fold (addo x, 0) -> x, no borrow
+  if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildCopy(Dst, LHS);
+      B.buildConstant(Carry, 0);
+    };
+    return true;
+  }


could be moved to pure tablegen as a separate pattern

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

tschuett · 2024-03-12T14:10:18Z

I removed bit width and another else-after-return.

arsenm

LGTM. I think it would be better to split these different combines into separate tablegen defined roots

arsenm · 2024-03-14T06:05:50Z

llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir

+    %const:_(s32) = G_CONSTANT i32 10
+    %addl:_(s32) = nsw G_ADD  %2, %const
+    %const1:_(s32) = G_CONSTANT i32 -10
+    %add:_(s32), %o:_(s1) = G_SADDO %addl, %const1


Maybe try some non-s1 typed cases

Perform the requested arithmetic and produce a carry output in addition to the normal result. Clang has them as builtins (__builtin_add_overflow_p). The middle end has intrinsics for them (sadd_with_overflow). AArch64: ADDS Add and set flags On Neoverse V2, they run at half the throughput of basic arithmetic and have a limited set of pipelines.

jayfoad · 2024-03-14T12:10:05Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+  LLT DstTy = MRI.getType(Dst);
+  LLT CarryTy = MRI.getType(Carry);
+
+  // We want do fold the [u|s]addo.


Typo "want to", but the comment seems unrelated to the code. Why would multiple uses of the top level instruction prevent combining?

If the result Dst has multiple uses, then we cannot replace it.

Yes we can and we should. The combine will still be correct and beneficial. You can add tests for the multiple-use case.

As a general rule, if you are matching a pattern, you only need one-use checks for the inner nodes in the pattern, not the outer node. The reason for the checks is to ensure that when you remove the outer node, the inner nodes also get removed.

jayfoad · 2024-03-14T12:10:17Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+    return true;
+  }
+
+  // We want do fold the [u|s]addo.


jayfoad · 2024-03-14T12:16:44Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+      B.buildConstant(Dst, Result);
+      B.buildConstant(Carry, Overflow);


In the vector-typed case don't you need to build new splat constants here? The patch is missing vector-typed tests for all these folds.

AArch64 does not support vectorized overflow ops. buildConstant builds under the hood scalars or build vectors. Support for G_SPLAT_VECTOR is still missing.

AArch64 does not support vectorized overflow ops.

You should still add the tests. Your combine-overflow.mir test runs pre-legalization so any MIR should be allowed there.

buildConstant builds under the hood scalars or build vectors.

Ah! I didn't know that, thanks.

jayfoad · 2024-03-14T12:17:18Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+    return true;
+  }
+
+  // Fold (addo x, 0) -> x, no borrow


Nit: "carry" not "borrow"

jayfoad · 2024-03-14T12:21:38Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+      ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
+       (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {


Suggested change

((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||

(!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {

AddLHS->getFlag(IsSigned ? MachineInstr::MIFlag::NoSWrap : MachineInstr::MIFlag::NoUWrap)) {

jayfoad · 2024-03-14T12:23:32Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+      bool Overflow;
+      APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
+                            : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
+      if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {


I don't think you need to check isConstantLegalOrBeforeLegalizer(DstTy) because it's the same type as MaybeRHS and MaybeAddRHS which we already know are constants.

jayfoad · 2024-03-14T12:24:20Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildSAddo(Dst, Carry, RHS, LHS);
+      };
+      return true;


Nit: personally I think early return hurts symmetry here. I would prefer if/else followed by a single "return".

The anti-symmetry is a result of another else-after-return violation.

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

jayfoad · 2024-03-20T10:44:59Z

Ping @tschuett please address post-commit review. At least:

remove OneUse checks and add test for the multiple use case
add vector typed tests

llvm#82927

#82927

llvm#82927

llvmbot added backend:AArch64 backend:AMDGPU llvm:globalisel labels Feb 25, 2024

tschuett requested review from aemerson and arsenm February 25, 2024 17:10

s-barannikov reviewed Feb 25, 2024

View reviewed changes

arsenm reviewed Feb 26, 2024

View reviewed changes

tschuett commented Feb 26, 2024

View reviewed changes

arsenm reviewed Feb 28, 2024

View reviewed changes

tschuett force-pushed the gisel-addo branch from ba2f5eb to fbd029c Compare February 29, 2024 14:00

tschuett mentioned this pull request Mar 7, 2024

[GlobalISel] Check width of APInts in Reassoc PtrAdd combine #84335

Closed

jayfoad self-requested a review March 7, 2024 16:50

arsenm approved these changes Mar 14, 2024

View reviewed changes

tschuett added 5 commits March 14, 2024 08:36

address review comments

cea3d3d

address review comments

5addd19

remove else after return

aed590c

remove bitwidth

e668363

tschuett force-pushed the gisel-addo branch from b88f1ab to e668363 Compare March 14, 2024 07:37

tschuett merged commit 5f77461 into llvm:main Mar 14, 2024
4 checks passed

tschuett deleted the gisel-addo branch March 14, 2024 11:45

jayfoad reviewed Mar 14, 2024

View reviewed changes

tschuett added a commit to tschuett/llvm-project that referenced this pull request Mar 20, 2024

[GlobalIsel] Post-review combine ADDO

b259d02

llvm#82927

tschuett mentioned this pull request Mar 20, 2024

[GlobalIsel] Post-review combine ADDO #85961

Merged

tschuett added a commit that referenced this pull request Mar 21, 2024

[GlobalIsel] Post-review combine ADDO (#85961)

deefe3f

#82927

chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024

[GlobalIsel] Post-review combine ADDO (llvm#85961)

6fef2c9

llvm#82927

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[GlobalIsel] Combine ADDO #82927

[GlobalIsel] Combine ADDO #82927

tschuett commented Feb 25, 2024

llvmbot commented Feb 25, 2024 •

edited

tschuett commented Feb 25, 2024

tschuett commented Feb 25, 2024

tschuett commented Feb 25, 2024

s-barannikov Feb 25, 2024

tschuett Feb 25, 2024

redstar Feb 26, 2024

s-barannikov Feb 25, 2024

tschuett Feb 25, 2024

s-barannikov Feb 25, 2024

arsenm Feb 26, 2024

tschuett Feb 26, 2024

tschuett Feb 26, 2024

arsenm Feb 28, 2024

tschuett commented Mar 12, 2024

arsenm left a comment

arsenm Mar 14, 2024

jayfoad Mar 14, 2024

tschuett Mar 14, 2024

jayfoad Mar 14, 2024

jayfoad Mar 14, 2024

jayfoad Mar 14, 2024

tschuett Mar 14, 2024

jayfoad Mar 14, 2024

jayfoad Mar 14, 2024

jayfoad Mar 14, 2024

jayfoad Mar 14, 2024

jayfoad Mar 14, 2024

tschuett Mar 14, 2024

jayfoad commented Mar 20, 2024

		B.buildConstant(Dst, Result);
		B.buildConstant(Carry, Overflow);

		((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) \|\|
		(!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {

	((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) \|\|
	(!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
	AddLHS->getFlag(IsSigned ? MachineInstr::MIFlag::NoSWrap : MachineInstr::MIFlag::NoUWrap)) {

[GlobalIsel] Combine ADDO #82927

[GlobalIsel] Combine ADDO #82927

Conversation

tschuett commented Feb 25, 2024

llvmbot commented Feb 25, 2024 • edited

tschuett commented Feb 25, 2024

tschuett commented Feb 25, 2024

tschuett commented Feb 25, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

tschuett commented Mar 12, 2024

arsenm left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

jayfoad commented Mar 20, 2024

llvmbot commented Feb 25, 2024 •

edited