-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[InstCombine] Constant fold binops through vector.insert
#164624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This patch improves constant folding through `llvm.vector.insert`. It does not change anything for fixed-length vectors (which can already be folded to ConstantVectors for these cases), but folds scalable vectors that otherwise would not be folded. These folds preserve the destination vector (which could be undef or poison), giving targets more freedom in lowering the operations.
@llvm/pr-subscribers-llvm-transforms Author: Benjamin Maxwell (MacDue) ChangesThis patch improves constant folding through These folds preserve the destination vector (which could be undef or poison), giving targets more freedom in lowering the operations. Full diff: https://github.com/llvm/llvm-project/pull/164624.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3f11cae143b81..05e8673b25433 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2323,6 +2323,32 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
return ConstantVector::get(NewVecC);
}
+// Match a vector.insert where both the destination and subvector are constant.
+static bool matchConstantSubVector(Value *V, Constant *&Dest,
+ Constant *&SubVector, Value *&Idx) {
+ return match(V, m_Intrinsic<Intrinsic::vector_insert>(
+ m_Constant(Dest), m_Constant(SubVector), m_Value(Idx)));
+}
+
+static Constant *matchConstantSplat(Value *V) {
+ Constant *C;
+ if (match(V, m_Constant(C)))
+ return C->getSplatValue();
+ return nullptr;
+}
+
+// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
+static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
+ Constant *Splat, bool SplatLHS,
+ const DataLayout &DL) {
+ ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
+ Constant *LHS = ConstantVector::getSplat(EC, Splat);
+ Constant *RHS = Vector;
+ if (!SplatLHS)
+ std::swap(LHS, RHS);
+ return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
+}
+
Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
if (!isa<VectorType>(Inst.getType()))
return nullptr;
@@ -2334,6 +2360,36 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
assert(cast<VectorType>(RHS->getType())->getElementCount() ==
cast<VectorType>(Inst.getType())->getElementCount());
+ auto foldConstantsThroughSubVectorInsert =
+ [&](Constant *Dest, Value *DestIdx, Type *SubVecType, Constant *SubVector,
+ Constant *Splat, bool SplatLHS) -> Instruction * {
+ SubVector =
+ constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
+ Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
+ if (!SubVector || !Dest)
+ return nullptr;
+ auto *InsertVector =
+ Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, DestIdx);
+ InsertVector->removeFromParent();
+ return InsertVector;
+ };
+
+ // If one operand is a constant splat and the other operand is a
+ // `vector.insert` where both the destination and subvector are constant,
+ // apply the operation to both the destination and subvector, returning a new
+ // constant `vector.insert`. This helps constant folding for scalable vectors.
+ for (bool SwapOperands : {false, true}) {
+ Value *Idx, *MaybeSubVector = LHS, *MaybeSplat = RHS;
+ if (SwapOperands)
+ std::swap(MaybeSplat, MaybeSubVector);
+ Constant *SubVector, *Dest, *Splat;
+ if (matchConstantSubVector(MaybeSubVector, Dest, SubVector, Idx) &&
+ (Splat = matchConstantSplat(MaybeSplat)))
+ return foldConstantsThroughSubVectorInsert(
+ Dest, Idx, SubVector->getType(), SubVector, Splat,
+ /*SplatLHS=*/SwapOperands);
+ }
+
// If both operands of the binop are vector concatenations, then perform the
// narrow binop on each pair of the source operands followed by concatenation
// of the results.
diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
new file mode 100644
index 0000000000000..aa1ac1eac3ba0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine %s | FileCheck %s
+
+define <vscale x 4 x i32> @insert_div() {
+; CHECK-LABEL: @insert_div(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 3), i64 0)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
+ %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+ ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_div_splat_lhs() {
+; CHECK-LABEL: @insert_div_splat_lhs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 0)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
+ %div = udiv <vscale x 4 x i32> splat (i32 3), %0
+ ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_div_mixed_splat() {
+; CHECK-LABEL: @insert_div_mixed_splat(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 6), <4 x i32> splat (i32 3), i64 0)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0)
+ %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+ ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_mul() {
+; CHECK-LABEL: @insert_mul(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MUL:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 7), i64 4)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[MUL]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4)
+ %mul = mul <vscale x 4 x i32> %0, splat (i32 7)
+ ret <vscale x 4 x i32> %mul
+}
+
+define <vscale x 4 x i32> @insert_add() {
+; CHECK-LABEL: @insert_add(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 16), i64 0)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0)
+ %add = add <vscale x 4 x i32> %0, splat (i32 11)
+ ret <vscale x 4 x i32> %add
+}
+
+define <vscale x 8 x i32> @insert_add_scalable_subvector() {
+; CHECK-LABEL: @insert_add_scalable_subvector(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 20), <vscale x 4 x i32> splat (i32 -4), i64 0)
+; CHECK-NEXT: ret <vscale x 8 x i32> [[ADD]]
+;
+entry:
+ %0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat(i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0)
+ %add = add <vscale x 8 x i32> %0, splat (i32 4)
+ ret <vscale x 8 x i32> %add
+}
+
+define <vscale x 4 x i32> @insert_sub() {
+; CHECK-LABEL: @insert_sub(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 8)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8)
+ %sub = add <vscale x 4 x i32> %0, splat (i32 -11)
+ ret <vscale x 4 x i32> %sub
+}
+
+define <vscale x 4 x i32> @insert_and_partially_undef() {
+; CHECK-LABEL: @insert_and_partially_undef(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> zeroinitializer, <4 x i32> splat (i32 4), i64 0)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[AND]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0)
+ %and = and <vscale x 4 x i32> %0, splat (i32 4)
+ ret <vscale x 4 x i32> %and
+}
+
+define <vscale x 4 x i32> @insert_fold_chain() {
+; CHECK-LABEL: @insert_fold_chain(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 11), <4 x i32> splat (i32 8), i64 0)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0)
+ %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+ %add = add <vscale x 4 x i32> %div, splat (i32 4)
+ ret <vscale x 4 x i32> %add
+}
+
+; TODO: This could be folded more.
+define <vscale x 4 x i32> @insert_add_both_insert_vector() {
+; CHECK-LABEL: @insert_add_both_insert_vector(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 10), <4 x i32> splat (i32 5), i64 0)
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 -1), <4 x i32> splat (i32 2), i64 0)
+; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+ %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 10), <4 x i32> splat (i32 5), i64 0)
+ %1 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 -1), <4 x i32> splat (i32 2), i64 0)
+ %add = add <vscale x 4 x i32> %0, %1
+ ret <vscale x 4 x i32> %add
+}
|
This comment was marked as off-topic.
This comment was marked as off-topic.
Note: I'm choosing to ignore the "undef depreciation" as I wanted to test a case with |
This patch improves constant folding through
llvm.vector.insert
. It does not change anything for fixed-length vectors (which can already be folded to ConstantVectors for these cases), but folds scalable vectors that otherwise would not be folded.These folds preserve the destination vector (which could be undef or poison), giving targets more freedom in lowering the operations.