Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2323,6 +2323,32 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
return ConstantVector::get(NewVecC);
}

// Match a vector.insert where both the destination and subvector are constant.
static bool matchConstantSubVector(Value *V, Constant *&Dest,
Constant *&SubVector, Value *&Idx) {
return match(V, m_Intrinsic<Intrinsic::vector_insert>(
m_Constant(Dest), m_Constant(SubVector), m_Value(Idx)));
}
Comment on lines +2327 to +2331
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add something like m_VectorInsert to PatternMatch instead? and use that inline.


static Constant *matchConstantSplat(Value *V) {
Constant *C;
if (match(V, m_Constant(C)))
return C->getSplatValue();
Comment on lines +2334 to +2336
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Constant *C;
if (match(V, m_Constant(C)))
return C->getSplatValue();
if (auto *C = dyn_cast<Constant>(V))
return C->getSplatValue();

return nullptr;
}

// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
Constant *Splat, bool SplatLHS,
const DataLayout &DL) {
ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
Constant *LHS = ConstantVector::getSplat(EC, Splat);
Constant *RHS = Vector;
if (!SplatLHS)
std::swap(LHS, RHS);
return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
}

Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
if (!isa<VectorType>(Inst.getType()))
return nullptr;
Expand All @@ -2334,6 +2360,35 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
assert(cast<VectorType>(RHS->getType())->getElementCount() ==
cast<VectorType>(Inst.getType())->getElementCount());

auto foldConstantsThroughSubVectorInsertSplat =
[&](Value *MaybeSubVector, Value *MaybeSplat,
bool SplatLHS) -> Instruction * {
Value *Idx;
Constant *SubVector, *Dest, *Splat;
Splat = matchConstantSplat(MaybeSplat);
if (!Splat || !matchConstantSubVector(MaybeSubVector, Dest, SubVector, Idx))
return nullptr;
SubVector =
constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
if (!SubVector || !Dest)
return nullptr;
auto *InsertVector =
Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
return replaceInstUsesWith(Inst, InsertVector);
};

// If one operand is a constant splat and the other operand is a
// `vector.insert` where both the destination and subvector are constant,
// apply the operation to both the destination and subvector, returning a new
// constant `vector.insert`. This helps constant folding for scalable vectors.
if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
/*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
return Folded;
if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
/*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
return Folded;

// If both operands of the binop are vector concatenations, then perform the
// narrow binop on each pair of the source operands followed by concatenation
// of the results.
Expand Down
139 changes: 139 additions & 0 deletions llvm/test/Transforms/InstCombine/constant-vector-insert.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=instcombine %s | FileCheck %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a RUN line containing the relevant -use-constant-*-splat flags. Ideally the behaviour will be the same but that's not required.

Perhaps worth adding a floating-point test?


define <vscale x 4 x i32> @insert_div() {
; CHECK-LABEL: @insert_div(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 3), i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
%div = udiv <vscale x 4 x i32> %0, splat (i32 3)
ret <vscale x 4 x i32> %div
}

define <vscale x 4 x i32> @insert_div_splat_lhs() {
; CHECK-LABEL: @insert_div_splat_lhs(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 5), <4 x i32> splat (i32 2), i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 2), <4 x i32> splat (i32 5), i64 0)
%div = udiv <vscale x 4 x i32> splat (i32 10), %0
ret <vscale x 4 x i32> %div
}

define <vscale x 4 x i32> @insert_div_mixed_splat() {
; CHECK-LABEL: @insert_div_mixed_splat(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 6), <4 x i32> splat (i32 3), i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0)
%div = udiv <vscale x 4 x i32> %0, splat (i32 3)
ret <vscale x 4 x i32> %div
}

define <vscale x 4 x i32> @insert_mul() {
; CHECK-LABEL: @insert_mul(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 7), i64 4)
; CHECK-NEXT: ret <vscale x 4 x i32> [[MUL]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4)
%mul = mul <vscale x 4 x i32> %0, splat (i32 7)
ret <vscale x 4 x i32> %mul
}

define <vscale x 4 x i32> @insert_add() {
; CHECK-LABEL: @insert_add(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 16), i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0)
%add = add <vscale x 4 x i32> %0, splat (i32 11)
ret <vscale x 4 x i32> %add
}

define <vscale x 4 x i32> @insert_add_non_splat_subvector() {
; CHECK-LABEL: @insert_add_non_splat_subvector(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 101, i32 102, i32 103, i32 104>, i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i64 0)
%add = add <vscale x 4 x i32> %0, splat (i32 100)
ret <vscale x 4 x i32> %add
}

define <vscale x 8 x i32> @insert_add_scalable_subvector() {
; CHECK-LABEL: @insert_add_scalable_subvector(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 20), <vscale x 4 x i32> splat (i32 -4), i64 0)
; CHECK-NEXT: ret <vscale x 8 x i32> [[ADD]]
;
entry:
%0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat(i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0)
%add = add <vscale x 8 x i32> %0, splat (i32 4)
ret <vscale x 8 x i32> %add
}

define <vscale x 4 x i32> @insert_sub() {
; CHECK-LABEL: @insert_sub(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SUB:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 8)
; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8)
%sub = add <vscale x 4 x i32> %0, splat (i32 -11)
ret <vscale x 4 x i32> %sub
}

define <vscale x 4 x i32> @insert_and_partially_undef() {
; CHECK-LABEL: @insert_and_partially_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> zeroinitializer, <4 x i32> splat (i32 4), i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[AND]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0)
%and = and <vscale x 4 x i32> %0, splat (i32 4)
ret <vscale x 4 x i32> %and
}

define <vscale x 4 x i32> @insert_fold_chain() {
; CHECK-LABEL: @insert_fold_chain(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 11), <4 x i32> splat (i32 8), i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0)
%div = udiv <vscale x 4 x i32> %0, splat (i32 3)
%add = add <vscale x 4 x i32> %div, splat (i32 4)
ret <vscale x 4 x i32> %add
}

; TODO: This could be folded more.
define <vscale x 4 x i32> @insert_add_both_insert_vector() {
; CHECK-LABEL: @insert_add_both_insert_vector(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 10), <4 x i32> splat (i32 5), i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 -1), <4 x i32> splat (i32 2), i64 0)
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], [[TMP1]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
entry:
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 10), <4 x i32> splat (i32 5), i64 0)
%1 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 -1), <4 x i32> splat (i32 2), i64 0)
%add = add <vscale x 4 x i32> %0, %1
ret <vscale x 4 x i32> %add
}
Loading