-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalIsel] Combine G_EXTRACT_VECTOR_ELT #85321
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Thorsten Schütt (tschuett) Changespreliminary steps Patch is 20.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85321.diff 8 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9e8fc5d635c50a..d2f9d74bf7d61a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -815,6 +815,9 @@ class CombinerHelper {
/// Combine addos.
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Combine extract vector element.
+ bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6b03703192df91..11ec60fac3afb3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -739,6 +739,39 @@ class GOr : public GLogicalBinOp {
};
};
+/// Represents an extract vector element.
+class GExtractVectorElement : public GenericMachineInstr {
+public:
+ Register getVectorReg() const { return getOperand(1).getReg(); }
+ Register getIndexReg() const { return getOperand(2).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT;
+ }
+};
+
+/// Represents an insert vector element.
+class GInsertVectorElement : public GenericMachineInstr {
+public:
+ Register getVectorReg() const { return getOperand(1).getReg(); }
+ Register getElementReg() const { return getOperand(2).getReg(); }
+ Register getIndexReg() const { return getOperand(3).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+ }
+};
+
+/// Represents a freeze.
+class GFreeze : public GenericMachineInstr {
+public:
+ Register getSourceReg() const { return getOperand(1).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_FREEZE;
+ }
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 6980cbd04aeb1c..1c71e6b80db051 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1291,6 +1291,12 @@ def match_addos : GICombineRule<
[{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def match_extract_of_element : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+ [{ return Helper.matchExtractVectorElement(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
// Combines concat operations
def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
def combine_concat_vector : GICombineRule<
@@ -1374,7 +1380,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector, double_icmp_zero_and_or_combine, match_addos]>;
+ combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+ match_extract_of_element]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index 46e6c6df5998e5..54ac7f72011a6e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMGlobalISel
GlobalISel.cpp
Combiner.cpp
CombinerHelper.cpp
+ CombinerHelperVectorOps.cpp
GIMatchTableExecutor.cpp
GISelChangeObserver.cpp
IRTranslator.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index bee49dbd0f8380..bff18a57164b34 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6943,7 +6943,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
LLT DstTy = MRI.getType(Dst);
LLT CarryTy = MRI.getType(Carry);
- // We want do fold the [u|s]addo.
+ // We want to fold the [u|s]addo.
if (!MRI.hasOneNonDBGUse(Dst))
return false;
@@ -6957,7 +6957,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
- // We want do fold the [u|s]addo.
+ // We want to fold the [u|s]addo.
if (!MRI.hasOneNonDBGUse(Carry))
return false;
@@ -6992,7 +6992,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
- // Fold (addo x, 0) -> x, no borrow
+ // Fold (addo x, 0) -> x, no carry
if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
MatchInfo = [=](MachineIRBuilder &B) {
B.buildCopy(Dst, LHS);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
new file mode 100644
index 00000000000000..f1b42ed549636a
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -0,0 +1,174 @@
+//===- CombinerHelperVectorOps.cpp-----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include <optional>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+
+ GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI);
+
+ Register Dst = Extract->getReg(0);
+ Register Vector = Extract->getVectorReg();
+ Register Index = Extract->getIndexReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT VectorTy = MRI.getType(Vector);
+
+ // The vector register can be def'd by various ops that
+ // have vector as its type. They can all be used for
+ // constant folding, scalarizing, canonicalization, or
+ // combining based on symmetry.
+ //
+ // vector like ops
+ // * build vector
+ // * build vector trunc
+ // * shuffle vector
+ // * splat vector
+ // * concat vectors
+ // * insert/extract vector element
+ // * insert/extract subvector
+ // * vector loads
+ // * scalable vector loads
+ //
+ // compute like ops
+ // * binary ops
+ // * unary ops
+ // * exts and truncs
+ // * casts
+ // * fneg
+ // * select
+ // * phis
+ // * cmps
+ // * freeze
+ // * bitcast
+ // * undef
+
+ // Fold extractVectorElement(undef, undef) -> undef
+ if ((getOpcodeDef<GImplicitDef>(Vector, MRI) ||
+ getOpcodeDef<GImplicitDef>(Index, MRI)) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+ // If the Vector register is undef, then we cannot extract an element from
+ // it. An undef extract Index can be arbitrarily chosen to be an
+ // out-of-range index value, which would result in the instruction being
+ // poison.
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+ return true;
+ }
+
+ // We try to get the value of the Index register.
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Index, MRI);
+ std::optional<APInt> IndexC = std::nullopt;
+
+ if (MaybeIndex)
+ IndexC = MaybeIndex->Value;
+
+ // Fold extractVectorElement(Vector, TOOLARGE) -> undef
+ if (IndexC && VectorTy.isFixedVector() &&
+ IndexC->uge(VectorTy.getNumElements()) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+ // For fixed-length vectors, it's invalid to extract out-of-range elements.
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+ return true;
+ }
+
+ // Fold extractVectorElement(freeze(FV), Index) ->
+ // freeze(extractVectorElement(FV, Index))
+ if (auto *Freeze = getOpcodeDef<GFreeze>(Vector, MRI)) {
+ if (MRI.hasOneNonDBGUse(Freeze->getReg(0)) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}})) {
+ // For G_FREEZE, the input and the output types are identical.
+ // Moving the freeze from the Vector into the front of the extract
+ // preserves the freeze semantics. We check above that
+ // the Index register is not undef.
+ // Furthermore, the Vector register
+ // becomes easier to analyze. A build vector
+ // could have been hidden behind the freeze.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Extract =
+ B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index);
+ B.buildFreeze(Dst, Extract);
+ };
+ return true;
+ }
+ }
+
+ // Fold extractVectorElement(insertVectorElement(_, Value, Index), Index) ->
+ // Value
+ if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+ if (Insert->getIndexReg() == Index) {
+ // There is no one-use check. We have to keep the insert.
+ // We only check for equality of the Index registers.
+ // The combine is independent of their constness.
+ // We try to insert Value and then immediately extract
+ // it from the same Index.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildCopy(Dst, Insert->getElementReg());
+ };
+ return true;
+ }
+ }
+
+ // Fold extractVectorElement(insertVectorElement(Vector, _, C1), C2),
+ // where C1 != C2
+ // -> extractVectorElement(Vector, C2)
+ if (IndexC) {
+ if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+ std::optional<ValueAndVReg> MaybeIndex =
+ getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
+ if (MaybeIndex && MaybeIndex->Value != *IndexC) {
+ // There is no one-use check. We have to keep the insert.
+ // When both Index registers are constants and not equal,
+ // we can look into the Vector register of the insert.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index);
+ };
+ return true;
+ }
+ }
+ }
+
+ // Fold extractVectorElement(BuildVector(.., V, ...), IndexOfV) -> V
+ if (IndexC) {
+ if (auto *Build = getOpcodeDef<GBuildVector>(Vector, MRI)) {
+ EVT Ty(getMVTForLLT(VectorTy));
+ if (MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+ getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) {
+ // There is a one-use check. There are more combines on build vectors.
+ // If the Index is constant, then we can extract the element from the
+ // given offset.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildCopy(Dst, Build->getSourceReg(IndexC->getLimitedValue()));
+ };
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index a2116ccc767112..37dc33330196a8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -192,8 +192,8 @@ body: |
...
---
+# This test checks that this combine runs after the insertvec->build_vector
name: extract_from_insert
-alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
@@ -203,8 +203,6 @@ frameInfo:
body: |
bb.1:
liveins: $x0, $x1
- ; This test checks that this combine runs after the insertvec->build_vector
- ; combine.
; CHECK-LABEL: name: extract_from_insert
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
@@ -247,3 +245,214 @@ body: |
RET_ReallyLR implicit $x0
...
+---
+name: extract_from_vector_undef
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: extract_from_vector_undef
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $x0 = COPY %extract(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = G_IMPLICIT_DEF
+ %idx:_(s32) = G_CONSTANT i32 -2
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: extract_from_index_undef
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: extract_from_index_undef
+ ; CHECK: %extract:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $x0 = COPY %extract(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = COPY $q0
+ %idx:_(s32) = G_IMPLICIT_DEF
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: extract_from_index_too_large
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: extract_from_index_too_large
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $x0 = COPY %extract(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = COPY $q0
+ %idx:_(s32) = G_CONSTANT i32 3000
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: extract_with_freeze
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: extract_with_freeze
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: %idx:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+ ; CHECK-NEXT: %extract:_(s64) = G_FREEZE [[EVEC]]
+ ; CHECK-NEXT: $x0 = COPY %extract(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = COPY $q0
+ %idx:_(s32) = COPY $w1
+ %fvec:_(<2 x s64>) = G_FREEZE %vec
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: extract_from_insert_symmetry
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: extract_from_insert_symmetry
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %element:_(s64) = COPY $x1
+ ; CHECK-NEXT: $x0 = COPY %element(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = COPY $q0
+ %idx:_(s32) = COPY $w1
+ %element:_(s64) = COPY $x1
+ %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32)
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: extract_from_insert_with_different_consts
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: extract_from_insert_with_different_consts
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: %idx2:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx2(s32)
+ ; CHECK-NEXT: $x0 = COPY %extract(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = COPY $q0
+ %idx:_(s32) = G_CONSTANT i32 0
+ %idx2:_(s32) = G_CONSTANT i32 1
+ %element:_(s64) = COPY $x1
+ %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32)
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx2(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: extract_from_build_vector_non_const
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: extract_from_build_vector_non_const
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %idx:_(s32) = COPY $w0
+ ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
+ ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+ ; CHECK-NEXT: $x0 = COPY %extract(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = COPY $q0
+ %idx:_(s32) = COPY $w0
+ %arg1:_(s64) = COPY $x0
+ %arg2:_(s64) = COPY $x1
+ %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: extract_from_build_vector_const
+alignment: 4
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: extract_from_build_vector_const
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+ ; CHECK-NEXT: $x0 = COPY %arg1(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %vec:_(<2 x s64>) = COPY $q0
+ %idx:_(s32) = G_CONSTANT i32 0
+ %arg1:_(s64) = COPY $x0
+ %arg2:_(s64) = COPY $x1
+ %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+ $x0 = COPY %extract(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index c5c525a15ad9be..504222e0036e22 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -25,20 +25,9 @@ entry:
}
define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2i64_undef_vector:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: extract_v2i64_undef_vector:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub sp, sp, #16
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: mov w9, w0
-; CHECK-GI-NEXT: mov x8, sp
-; CHECK-GI-NEXT: and x9, x9, #0x1
-; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3]
-; CHECK-GI-NEXT: add sp, sp, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: extract_v2i64_undef...
[truncated]
|
I will add the complex combines as separate patterns after this combine. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should avoid unnecessarily calling getOpcodeDef()
on a register. Instead do a lookup of the def once with getDefIgnoringCopies()
and then use dyn_cast<GWhatever>
We have now named opcodes for extract/insert subvector, but we fail to select the intrinsics:
|
As I already said: extractVectorElement(fcmp, index) will have a low hitrate and can/will be a unique pattern. extractVectorElement(freeze, index) might be a border line case. I don't want to have two patterns for extractVectorElement(insertVectorElement, index). |
6c23ad3
to
e4ae6eb
Compare
I reworked the combines. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel that it's a bit unnecessary to split the helper into vector and non-vector combines. If we do so, which sorts of combines are we envisaging being here? Maybe we should split them as part of a series of changes to reorganize the entire Helper and keep this change to just add the new functionality?
There is precedence in InstCombine. I also want to add combines for at least G_INSERT_VECTOR_ELT, G_INSERT_SUBVECTOR, G_EXTRACT_SUBVECTOR. |
void CombinerHelper::applyBuildFnMO(const MachineOperand &MO, | ||
BuildFnTy &MatchInfo) { | ||
MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); | ||
Builder.setInstrAndDebugLoc(*Root); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Setting the insert point should now be unnecessary in apply functions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rebasing made it end in #87115
preliminary steps