-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[InstCombine] Constant fold binops through vector.insert
#164624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -2323,6 +2323,32 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C, | |||||||||||
return ConstantVector::get(NewVecC); | ||||||||||||
} | ||||||||||||
|
||||||||||||
// Match a vector.insert where both the destination and subvector are constant. | ||||||||||||
static bool matchConstantSubVector(Value *V, Constant *&Dest, | ||||||||||||
Constant *&SubVector, Value *&Idx) { | ||||||||||||
return match(V, m_Intrinsic<Intrinsic::vector_insert>( | ||||||||||||
m_Constant(Dest), m_Constant(SubVector), m_Value(Idx))); | ||||||||||||
} | ||||||||||||
|
||||||||||||
static Constant *matchConstantSplat(Value *V) { | ||||||||||||
Constant *C; | ||||||||||||
if (match(V, m_Constant(C))) | ||||||||||||
return C->getSplatValue(); | ||||||||||||
Comment on lines
+2334
to
+2336
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
return nullptr; | ||||||||||||
} | ||||||||||||
|
||||||||||||
// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS). | ||||||||||||
static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, | ||||||||||||
Constant *Splat, bool SplatLHS, | ||||||||||||
const DataLayout &DL) { | ||||||||||||
ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount(); | ||||||||||||
Constant *LHS = ConstantVector::getSplat(EC, Splat); | ||||||||||||
Constant *RHS = Vector; | ||||||||||||
if (!SplatLHS) | ||||||||||||
std::swap(LHS, RHS); | ||||||||||||
return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); | ||||||||||||
} | ||||||||||||
|
||||||||||||
Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { | ||||||||||||
if (!isa<VectorType>(Inst.getType())) | ||||||||||||
return nullptr; | ||||||||||||
|
@@ -2334,6 +2360,35 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { | |||||||||||
assert(cast<VectorType>(RHS->getType())->getElementCount() == | ||||||||||||
cast<VectorType>(Inst.getType())->getElementCount()); | ||||||||||||
|
||||||||||||
auto foldConstantsThroughSubVectorInsertSplat = | ||||||||||||
[&](Value *MaybeSubVector, Value *MaybeSplat, | ||||||||||||
bool SplatLHS) -> Instruction * { | ||||||||||||
Value *Idx; | ||||||||||||
Constant *SubVector, *Dest, *Splat; | ||||||||||||
Splat = matchConstantSplat(MaybeSplat); | ||||||||||||
if (!Splat || !matchConstantSubVector(MaybeSubVector, Dest, SubVector, Idx)) | ||||||||||||
return nullptr; | ||||||||||||
SubVector = | ||||||||||||
constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL); | ||||||||||||
Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL); | ||||||||||||
if (!SubVector || !Dest) | ||||||||||||
return nullptr; | ||||||||||||
auto *InsertVector = | ||||||||||||
Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx); | ||||||||||||
return replaceInstUsesWith(Inst, InsertVector); | ||||||||||||
}; | ||||||||||||
|
||||||||||||
// If one operand is a constant splat and the other operand is a | ||||||||||||
// `vector.insert` where both the destination and subvector are constant, | ||||||||||||
// apply the operation to both the destination and subvector, returning a new | ||||||||||||
// constant `vector.insert`. This helps constant folding for scalable vectors. | ||||||||||||
if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( | ||||||||||||
/*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false)) | ||||||||||||
return Folded; | ||||||||||||
if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( | ||||||||||||
/*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true)) | ||||||||||||
return Folded; | ||||||||||||
|
||||||||||||
// If both operands of the binop are vector concatenations, then perform the | ||||||||||||
// narrow binop on each pair of the source operands followed by concatenation | ||||||||||||
// of the results. | ||||||||||||
|
MacDue marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -S -passes=instcombine %s | FileCheck %s | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a RUN line containing the relevant Perhaps worth adding a floating-point test? |
||
|
||
define <vscale x 4 x i32> @insert_div() { | ||
; CHECK-LABEL: @insert_div( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 3), i64 0) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0) | ||
%div = udiv <vscale x 4 x i32> %0, splat (i32 3) | ||
ret <vscale x 4 x i32> %div | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_div_splat_lhs() { | ||
; CHECK-LABEL: @insert_div_splat_lhs( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 5), <4 x i32> splat (i32 2), i64 0) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 2), <4 x i32> splat (i32 5), i64 0) | ||
%div = udiv <vscale x 4 x i32> splat (i32 10), %0 | ||
ret <vscale x 4 x i32> %div | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_div_mixed_splat() { | ||
; CHECK-LABEL: @insert_div_mixed_splat( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 6), <4 x i32> splat (i32 3), i64 0) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[DIV]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0) | ||
%div = udiv <vscale x 4 x i32> %0, splat (i32 3) | ||
ret <vscale x 4 x i32> %div | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_mul() { | ||
; CHECK-LABEL: @insert_mul( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[MUL:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 7), i64 4) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[MUL]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4) | ||
%mul = mul <vscale x 4 x i32> %0, splat (i32 7) | ||
ret <vscale x 4 x i32> %mul | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_add() { | ||
; CHECK-LABEL: @insert_add( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 16), i64 0) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0) | ||
%add = add <vscale x 4 x i32> %0, splat (i32 11) | ||
ret <vscale x 4 x i32> %add | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_add_non_splat_subvector() { | ||
; CHECK-LABEL: @insert_add_non_splat_subvector( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 101, i32 102, i32 103, i32 104>, i64 0) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i64 0) | ||
%add = add <vscale x 4 x i32> %0, splat (i32 100) | ||
ret <vscale x 4 x i32> %add | ||
} | ||
|
||
define <vscale x 8 x i32> @insert_add_scalable_subvector() { | ||
; CHECK-LABEL: @insert_add_scalable_subvector( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 20), <vscale x 4 x i32> splat (i32 -4), i64 0) | ||
; CHECK-NEXT: ret <vscale x 8 x i32> [[ADD]] | ||
; | ||
entry: | ||
%0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat(i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0) | ||
%add = add <vscale x 8 x i32> %0, splat (i32 4) | ||
ret <vscale x 8 x i32> %add | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_sub() { | ||
; CHECK-LABEL: @insert_sub( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[SUB:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 8) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[SUB]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8) | ||
%sub = add <vscale x 4 x i32> %0, splat (i32 -11) | ||
ret <vscale x 4 x i32> %sub | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_and_partially_undef() { | ||
; CHECK-LABEL: @insert_and_partially_undef( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[AND:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> zeroinitializer, <4 x i32> splat (i32 4), i64 0) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[AND]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0) | ||
%and = and <vscale x 4 x i32> %0, splat (i32 4) | ||
ret <vscale x 4 x i32> %and | ||
} | ||
|
||
define <vscale x 4 x i32> @insert_fold_chain() { | ||
; CHECK-LABEL: @insert_fold_chain( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 11), <4 x i32> splat (i32 8), i64 0) | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0) | ||
%div = udiv <vscale x 4 x i32> %0, splat (i32 3) | ||
%add = add <vscale x 4 x i32> %div, splat (i32 4) | ||
ret <vscale x 4 x i32> %add | ||
} | ||
|
||
; TODO: This could be folded more. | ||
define <vscale x 4 x i32> @insert_add_both_insert_vector() { | ||
; CHECK-LABEL: @insert_add_both_insert_vector( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 10), <4 x i32> splat (i32 5), i64 0) | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 -1), <4 x i32> splat (i32 2), i64 0) | ||
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], [[TMP1]] | ||
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]] | ||
; | ||
entry: | ||
%0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 10), <4 x i32> splat (i32 5), i64 0) | ||
%1 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 -1), <4 x i32> splat (i32 2), i64 0) | ||
%add = add <vscale x 4 x i32> %0, %1 | ||
ret <vscale x 4 x i32> %add | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add something like m_VectorInsert to PatternMatch instead? and use that inline.