[GlobalIsel] Combine G_EXTRACT_VECTOR_ELT #85321

tschuett · 2024-03-14T22:09:39Z

preliminary steps

llvmbot · 2024-03-14T22:10:07Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: Thorsten Schütt (tschuett)

Changes

preliminary steps

Patch is 20.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85321.diff

8 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3)
(modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+33)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+8-1)
(modified) llvm/lib/CodeGen/GlobalISel/CMakeLists.txt (+1)
(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+3-3)
(added) llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp (+174)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir (+212-3)
(modified) llvm/test/CodeGen/AArch64/extract-vector-elt.ll (+3-15)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9e8fc5d635c50a..d2f9d74bf7d61a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -815,6 +815,9 @@ class CombinerHelper {
   /// Combine addos.
   bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Combine extract vector element.
+  bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6b03703192df91..11ec60fac3afb3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -739,6 +739,39 @@ class GOr : public GLogicalBinOp {
   };
 };
 
+/// Represents an extract vector element.
+class GExtractVectorElement : public GenericMachineInstr {
+public:
+  Register getVectorReg() const { return getOperand(1).getReg(); }
+  Register getIndexReg() const { return getOperand(2).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT;
+  }
+};
+
+/// Represents an insert vector element.
+class GInsertVectorElement : public GenericMachineInstr {
+public:
+  Register getVectorReg() const { return getOperand(1).getReg(); }
+  Register getElementReg() const { return getOperand(2).getReg(); }
+  Register getIndexReg() const { return getOperand(3).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+  }
+};
+
+/// Represents a freeze.
+class GFreeze : public GenericMachineInstr {
+public:
+  Register getSourceReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_FREEZE;
+  }
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 6980cbd04aeb1c..1c71e6b80db051 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1291,6 +1291,12 @@ def match_addos : GICombineRule<
         [{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
+def match_extract_of_element : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+        [{ return Helper.matchExtractVectorElement(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 // Combines concat operations
 def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
 def combine_concat_vector : GICombineRule<
@@ -1374,7 +1380,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos]>;
+    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+    match_extract_of_element]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index 46e6c6df5998e5..54ac7f72011a6e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMGlobalISel
   GlobalISel.cpp
   Combiner.cpp
   CombinerHelper.cpp
+  CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
   IRTranslator.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index bee49dbd0f8380..bff18a57164b34 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6943,7 +6943,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
   LLT DstTy = MRI.getType(Dst);
   LLT CarryTy = MRI.getType(Carry);
 
-  // We want do fold the [u|s]addo.
+  // We want to fold the [u|s]addo.
   if (!MRI.hasOneNonDBGUse(Dst))
     return false;
 
@@ -6957,7 +6957,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
     return true;
   }
 
-  // We want do fold the [u|s]addo.
+  // We want to fold the [u|s]addo.
   if (!MRI.hasOneNonDBGUse(Carry))
     return false;
 
@@ -6992,7 +6992,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
     return true;
   }
 
-  // Fold (addo x, 0) -> x, no borrow
+  // Fold (addo x, 0) -> x, no carry
   if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
     MatchInfo = [=](MachineIRBuilder &B) {
       B.buildCopy(Dst, LHS);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
new file mode 100644
index 00000000000000..f1b42ed549636a
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -0,0 +1,174 @@
+//===- CombinerHelperVectorOps.cpp-----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include <optional>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
+                                               BuildFnTy &MatchInfo) {
+
+  GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI);
+
+  Register Dst = Extract->getReg(0);
+  Register Vector = Extract->getVectorReg();
+  Register Index = Extract->getIndexReg();
+  LLT DstTy = MRI.getType(Dst);
+  LLT VectorTy = MRI.getType(Vector);
+
+  // The vector register can be def'd by various ops that
+  // have vector as its type. They can all be used for
+  // constant folding, scalarizing, canonicalization, or
+  // combining based on symmetry.
+  //
+  // vector like ops
+  // * build vector
+  // * build vector trunc
+  // * shuffle vector
+  // * splat vector
+  // * concat vectors
+  // * insert/extract vector element
+  // * insert/extract subvector
+  // * vector loads
+  // * scalable vector loads
+  //
+  // compute like ops
+  // * binary ops
+  // * unary ops
+  //  * exts and truncs
+  //  * casts
+  //  * fneg
+  // * select
+  // * phis
+  // * cmps
+  // * freeze
+  // * bitcast
+  // * undef
+
+  // Fold extractVectorElement(undef, undef) -> undef
+  if ((getOpcodeDef<GImplicitDef>(Vector, MRI) ||
+       getOpcodeDef<GImplicitDef>(Index, MRI)) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+    // If the Vector register is undef, then we cannot extract an element from
+    // it. An undef extract Index can be arbitrarily chosen to be an
+    // out-of-range index value, which would result in the instruction being
+    // poison.
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+    return true;
+  }
+
+  // We try to get the value of the Index register.
+  std::optional<ValueAndVReg> MaybeIndex =
+      getIConstantVRegValWithLookThrough(Index, MRI);
+  std::optional<APInt> IndexC = std::nullopt;
+
+  if (MaybeIndex)
+    IndexC = MaybeIndex->Value;
+
+  // Fold extractVectorElement(Vector, TOOLARGE) -> undef
+  if (IndexC && VectorTy.isFixedVector() &&
+      IndexC->uge(VectorTy.getNumElements()) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+    // For fixed-length vectors, it's invalid to extract out-of-range elements.
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+    return true;
+  }
+
+  // Fold extractVectorElement(freeze(FV), Index) ->
+  //     freeze(extractVectorElement(FV, Index))
+  if (auto *Freeze = getOpcodeDef<GFreeze>(Vector, MRI)) {
+    if (MRI.hasOneNonDBGUse(Freeze->getReg(0)) &&
+        isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}})) {
+      // For G_FREEZE, the input and the output types are identical.
+      // Moving the freeze from the Vector into the front of the extract
+      // preserves the freeze semantics. We check above that
+      // the Index register is not undef.
+      // Furthermore, the Vector register
+      // becomes easier to analyze. A build vector
+      // could have been hidden behind the freeze.
+      MatchInfo = [=](MachineIRBuilder &B) {
+        auto Extract =
+            B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index);
+        B.buildFreeze(Dst, Extract);
+      };
+      return true;
+    }
+  }
+
+  // Fold extractVectorElement(insertVectorElement(_, Value, Index), Index) ->
+  // Value
+  if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+    if (Insert->getIndexReg() == Index) {
+      // There is no one-use check. We have to keep the insert.
+      // We only check for equality of the Index registers.
+      // The combine is independent of their constness.
+      // We try to insert Value and then immediately extract
+      // it from the same Index.
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildCopy(Dst, Insert->getElementReg());
+      };
+      return true;
+    }
+  }
+
+  // Fold extractVectorElement(insertVectorElement(Vector, _, C1), C2),
+  // where C1 != C2
+  // -> extractVectorElement(Vector, C2)
+  if (IndexC) {
+    if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+      std::optional<ValueAndVReg> MaybeIndex =
+          getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
+      if (MaybeIndex && MaybeIndex->Value != *IndexC) {
+        // There is no one-use check. We have to keep the insert.
+        // When both Index registers are constants and not equal,
+        // we can look into the Vector register of the insert.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index);
+        };
+        return true;
+      }
+    }
+  }
+
+  // Fold extractVectorElement(BuildVector(.., V, ...), IndexOfV) -> V
+  if (IndexC) {
+    if (auto *Build = getOpcodeDef<GBuildVector>(Vector, MRI)) {
+      EVT Ty(getMVTForLLT(VectorTy));
+      if (MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+          getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) {
+        // There is a one-use check. There are more combines on build vectors.
+        // If the Index is constant, then we can extract the element from the
+        // given offset.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.buildCopy(Dst, Build->getSourceReg(IndexC->getLimitedValue()));
+        };
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index a2116ccc767112..37dc33330196a8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -192,8 +192,8 @@ body:             |
 
 ...
 ---
+# This test checks that this combine runs after the insertvec->build_vector
 name:            extract_from_insert
-alignment:       4
 tracksRegLiveness: true
 liveins:
   - { reg: '$x0' }
@@ -203,8 +203,6 @@ frameInfo:
 body:             |
   bb.1:
     liveins: $x0, $x1
-    ; This test checks that this combine runs after the insertvec->build_vector
-    ; combine.
     ; CHECK-LABEL: name: extract_from_insert
     ; CHECK: liveins: $x0, $x1
     ; CHECK-NEXT: {{  $}}
@@ -247,3 +245,214 @@ body:             |
     RET_ReallyLR implicit $x0
 
 ...
+---
+name:            extract_from_vector_undef
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_vector_undef
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = G_IMPLICIT_DEF
+    %idx:_(s32) = G_CONSTANT i32 -2
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_index_undef
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: extract_from_index_undef
+    ; CHECK: %extract:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_IMPLICIT_DEF
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_index_too_large
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_index_too_large
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_CONSTANT i32 3000
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_with_freeze
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_with_freeze
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: %idx:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    ; CHECK-NEXT: %extract:_(s64) = G_FREEZE [[EVEC]]
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = COPY $w1
+    %fvec:_(<2 x s64>) = G_FREEZE %vec
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_insert_symmetry
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_insert_symmetry
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %element:_(s64) = COPY $x1
+    ; CHECK-NEXT: $x0 = COPY %element(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = COPY $w1
+    %element:_(s64) = COPY $x1
+    %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_insert_with_different_consts
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_insert_with_different_consts
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: %idx2:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx2(s32)
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_CONSTANT i32 0
+    %idx2:_(s32) = G_CONSTANT i32 1
+    %element:_(s64) = COPY $x1
+    %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx2(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_build_vector_non_const
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_build_vector_non_const
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %idx:_(s32) = COPY $w0
+    ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
+    ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = COPY $w0
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x1
+    %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_build_vector_const
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_build_vector_const
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_CONSTANT i32 0
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x1
+    %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index c5c525a15ad9be..504222e0036e22 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -25,20 +25,9 @@ entry:
 }
 
 define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2i64_undef_vector:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: extract_v2i64_undef_vector:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sub sp, sp, #16
-; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT:    mov w9, w0
-; CHECK-GI-NEXT:    mov x8, sp
-; CHECK-GI-NEXT:    and x9, x9, #0x1
-; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
-; CHECK-GI-NEXT:    add sp, sp, #16
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: extract_v2i64_undef...
[truncated]

tschuett · 2024-03-14T22:10:57Z

I will add the complex combines as separate patterns after this combine.

aemerson

We should avoid unnecessarily calling getOpcodeDef() on a register. Instead do a lookup of the def once with getDefIgnoringCopies() and then use dyn_cast<GWhatever>

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp

tschuett · 2024-03-15T06:44:14Z

We have now named opcodes for extract/insert subvector, but we fail to select the intrinsics:

llvm-project/llvm/test/CodeGen/AArch64/extract-vector-elt.ll

Line 809 in 2a547f0

    
           %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp

tschuett · 2024-03-15T07:58:30Z

As I already said: extractVectorElement(fcmp, index) will have a low hitrate and can/will be a unique pattern. extractVectorElement(freeze, index) might be a border line case. I don't want to have two patterns for extractVectorElement(insertVectorElement, index).

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp

preliminary steps

tschuett · 2024-03-27T08:15:47Z

I reworked the combines.

aemerson

I feel that it's a bit unnecessary to split the helper into vector and non-vector combines. If we do so, which sorts of combines are we envisaging being here? Maybe we should split them as part of a series of changes to reorganize the entire Helper and keep this change to just add the new functionality?

tschuett · 2024-03-29T07:18:16Z

There is precedence in InstCombine.
https://github.com/llvm/llvm-project/tree/main/llvm/lib/Transforms/InstCombine
I could also envision CombinerHelperInteger, CombinerHelperPhi, ...
The motivation was for modularity and when the combiner grows, we can still use GitHub for git blame.

I also want to add combines for at least G_INSERT_VECTOR_ELT, G_INSERT_SUBVECTOR, G_EXTRACT_SUBVECTOR.

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp

arsenm · 2024-04-02T13:22:32Z

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
+                                    BuildFnTy &MatchInfo) {
+  MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+  Builder.setInstrAndDebugLoc(*Root);


Setting the insert point should now be unnecessary in apply functions

Rebasing made it end in #87115

llvmbot added backend:AArch64 llvm:globalisel labels Mar 14, 2024

tschuett requested review from aemerson, jayfoad and arsenm March 14, 2024 22:10

aemerson reviewed Mar 14, 2024

View reviewed changes

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp Outdated Show resolved Hide resolved

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp Outdated Show resolved Hide resolved

arsenm reviewed Mar 15, 2024

View reviewed changes

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp Outdated Show resolved Hide resolved

tschuett commented Mar 15, 2024

View reviewed changes

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp Outdated Show resolved Hide resolved

tschuett added 3 commits March 21, 2024 08:35

[GlobalIsel] Combine G_EXTRACT_VECTOR_ELT

260ce2c

preliminary steps

address review comments

d2cca65

fix typo

e4ae6eb

tschuett force-pushed the gisel-extract-vector-element0 branch from 6c23ad3 to e4ae6eb Compare March 21, 2024 07:36

rework

e048acb

aemerson reviewed Mar 29, 2024

View reviewed changes

tschuett added 4 commits March 31, 2024 16:25

add build vector trunc

0b2082b

improve build vectors

38e1e62

fix test

4151068

more patterns

845a7c9

aemerson approved these changes Apr 2, 2024

View reviewed changes

llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp Outdated Show resolved Hide resolved

fix typo

f45444e

tschuett merged commit 8bb9443 into llvm:main Apr 2, 2024
4 checks passed

tschuett deleted the gisel-extract-vector-element0 branch April 2, 2024 07:01

arsenm reviewed Apr 2, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[GlobalIsel] Combine G_EXTRACT_VECTOR_ELT #85321

[GlobalIsel] Combine G_EXTRACT_VECTOR_ELT #85321

tschuett commented Mar 14, 2024

llvmbot commented Mar 14, 2024 •

edited

tschuett commented Mar 14, 2024

aemerson left a comment •

edited

tschuett commented Mar 15, 2024

tschuett commented Mar 15, 2024

tschuett commented Mar 27, 2024

aemerson left a comment

tschuett commented Mar 29, 2024

arsenm Apr 2, 2024

tschuett Apr 2, 2024

[GlobalIsel] Combine G_EXTRACT_VECTOR_ELT #85321

[GlobalIsel] Combine G_EXTRACT_VECTOR_ELT #85321

Conversation

tschuett commented Mar 14, 2024

llvmbot commented Mar 14, 2024 • edited

tschuett commented Mar 14, 2024

aemerson left a comment • edited

Choose a reason for hiding this comment

tschuett commented Mar 15, 2024

tschuett commented Mar 15, 2024

tschuett commented Mar 27, 2024

aemerson left a comment

Choose a reason for hiding this comment

tschuett commented Mar 29, 2024

arsenm Apr 2, 2024

Choose a reason for hiding this comment

tschuett Apr 2, 2024

Choose a reason for hiding this comment

llvmbot commented Mar 14, 2024 •

edited

aemerson left a comment •

edited