-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Scalarize (vec_ops (insert ?, X, Idx))
when only one element is demanded
#84645
Conversation
(vec_ops (insert ?, X, Idx))
when only one element is demanded
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Patch is 49.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84645.diff 11 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 3c4c0f35eb6d48..bf46359accaaa0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2579,6 +2579,97 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
return new ShuffleVectorInst(X, Y, NewMask);
}
+// Extract `(scalar_ops... x)` from `(vector_ops... (insert ?, x, C)`
+static Value *
+getScalarizationOfInsertElement(Value *V, int ReqIndexC,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X, *Base;
+ ConstantInt *IndexC;
+ // Found a select.
+ if (match(V, m_InsertElt(m_Value(Base), m_Value(X), m_ConstantInt(IndexC)))) {
+ // See if matches the index we need.
+ if (match(IndexC, m_SpecificInt(ReqIndexC)))
+ return X;
+ // Otherwise continue searching. This is necessary for finding both elements
+ // in the common pattern:
+ // V0 = (insert poison x, 0)
+ // V1 = (insert V0, y, 1)
+ return getScalarizationOfInsertElement(Base, ReqIndexC, Builder);
+ }
+
+ // We can search through a splat of a single element for an insert.
+ int SplatIndex;
+ if (match(V, m_Shuffle(m_Value(Base), m_Value(X),
+ m_SplatOrUndefMask(SplatIndex))) &&
+ SplatIndex >= 0) {
+ if (auto *VType = dyn_cast<FixedVectorType>(V->getType())) {
+ // Chase whichever vector (Base/X) we are splatting from.
+ if (static_cast<unsigned>(SplatIndex) >= VType->getNumElements())
+ return getScalarizationOfInsertElement(
+ X, SplatIndex - VType->getNumElements(), Builder);
+ // New index we need to find is the index we are splatting from.
+ return getScalarizationOfInsertElement(Base, SplatIndex, Builder);
+ }
+ return nullptr;
+ }
+
+ // We don't want to duplicate `vector_ops...` if they have multiple uses.
+ if (!V->hasOneUse())
+ return nullptr;
+
+ Value *R = nullptr;
+ // Scalarize any unary op.
+ if (match(V, m_UnOp(m_Value(X)))) {
+ if (auto *Scalar = getScalarizationOfInsertElement(X, ReqIndexC, Builder))
+ R = Builder.CreateUnOp(cast<UnaryOperator>(V)->getOpcode(), Scalar);
+ }
+
+ // Scalarize any cast but bitcast.
+ // TODO: We skip bitcasts, but they would be okay if they are elementwise.
+ if (isa<CastInst>(V) && !match(V, m_BitCast(m_Value()))) {
+ X = cast<CastInst>(V)->getOperand(0);
+ if (auto *Scalar = getScalarizationOfInsertElement(X, ReqIndexC, Builder))
+ R = Builder.CreateCast(cast<CastInst>(V)->getOpcode(), Scalar,
+ V->getType()->getScalarType());
+ }
+
+ // Binop with a constant.
+ Constant *C;
+ if (match(V, m_c_BinOp(m_Value(X), m_ImmConstant(C)))) {
+ BinaryOperator *BO = cast<BinaryOperator>(V);
+ if (isSafeToSpeculativelyExecute(BO)) {
+ if (auto *Scalar =
+ getScalarizationOfInsertElement(X, ReqIndexC, Builder)) {
+ auto *ScalarC =
+ ConstantExpr::getExtractElement(C, Builder.getInt64(ReqIndexC));
+
+ BinaryOperator::BinaryOps Opc = BO->getOpcode();
+ if (match(V, m_c_BinOp(m_Value(X), m_ImmConstant(C))))
+ R = Builder.CreateBinOp(Opc, Scalar, ScalarC);
+ else
+ R = Builder.CreateBinOp(Opc, ScalarC, Scalar);
+ }
+ }
+ }
+
+ // Cmp with a constant.
+ CmpInst::Predicate Pred;
+ if (match(V, m_Cmp(Pred, m_Value(X), m_ImmConstant(C)))) {
+ if (auto *Scalar = getScalarizationOfInsertElement(X, ReqIndexC, Builder)) {
+ auto *ScalarC =
+ ConstantExpr::getExtractElement(C, Builder.getInt64(ReqIndexC));
+ R = Builder.CreateCmp(Pred, Scalar, ScalarC);
+ }
+ }
+ // TODO: Intrinsics
+
+ // If we created a new scalar instruction, copy flags from the vec version.
+ if (R != nullptr)
+ cast<Instruction>(R)->copyIRFlags(V);
+
+ return R;
+}
+
/// Try to replace a shuffle with an insertelement or try to replace a shuffle
/// operand with the operand of an insertelement.
static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
@@ -2616,13 +2707,11 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
if (NumElts != InpNumElts)
return nullptr;
- // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
- auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
- // We need an insertelement with a constant index.
- if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
- m_ConstantInt(IndexC))))
- return false;
+ // (shuffle (vec_ops... (insert ?, Scalar, IndexC)), V1, Mask)
+ // --> insert V1, (scalar_ops... Scalar), IndexC'
+ auto GetScalarizationOfInsertEle =
+ [&Mask, &NumElts, &IC](Value *V) -> std::pair<Value *, int> {
// Test the shuffle mask to see if it splices the inserted scalar into the
// operand 1 vector of the shuffle.
int NewInsIndex = -1;
@@ -2631,40 +2720,45 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
if (Mask[i] == -1)
continue;
- // The shuffle takes elements of operand 1 without lane changes.
- if (Mask[i] == NumElts + i)
+ // The shuffle takes elements of operand 1.
+ if (Mask[i] >= NumElts)
continue;
// The shuffle must choose the inserted scalar exactly once.
- if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
- return false;
+ if (NewInsIndex != -1)
+ return {nullptr, -1};
- // The shuffle is placing the inserted scalar into element i.
+ // The shuffle is placing the inserted scalar into element i from operand
+ // 0.
NewInsIndex = i;
}
- assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
+ // Operand is unused.
+ if (NewInsIndex < 0)
+ return {nullptr, -1};
- // Index is updated to the potentially translated insertion lane.
- IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
- return true;
- };
+ Value *Scalar =
+ getScalarizationOfInsertElement(V, Mask[NewInsIndex], IC.Builder);
- // If the shuffle is unnecessary, insert the scalar operand directly into
- // operand 1 of the shuffle. Example:
- // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
- Value *Scalar;
- ConstantInt *IndexC;
- if (isShufflingScalarIntoOp1(Scalar, IndexC))
- return InsertElementInst::Create(V1, Scalar, IndexC);
+ return {Scalar, NewInsIndex};
+ };
- // Try again after commuting shuffle. Example:
- // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
- // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
- std::swap(V0, V1);
+ auto [V0Scalar, V0NewInsertIdx] = GetScalarizationOfInsertEle(V0);
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
- if (isShufflingScalarIntoOp1(Scalar, IndexC))
- return InsertElementInst::Create(V1, Scalar, IndexC);
+ auto [V1Scalar, V1NewInsertIdx] = GetScalarizationOfInsertEle(V1);
+
+ if (V0Scalar != nullptr && V1Scalar != nullptr) {
+ Value *R = IC.Builder.CreateInsertElement(Shuf.getType(), V0Scalar,
+ V0NewInsertIdx);
+ return InsertElementInst::Create(R, V1Scalar,
+ IC.Builder.getInt64(V1NewInsertIdx));
+ } else if (V0Scalar != nullptr) {
+ return InsertElementInst::Create(V1, V0Scalar,
+ IC.Builder.getInt64(V0NewInsertIdx));
+ } else if (V1Scalar != nullptr) {
+ return InsertElementInst::Create(V0, V1Scalar,
+ IC.Builder.getInt64(V1NewInsertIdx));
+ }
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
index 6cbb2a246f5a4a..4bc3bd7bd9c203 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
@@ -547,8 +547,7 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat(
-; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
-; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 poison, i32 0, i32 4, i32 poison>
+; CHECK-NEXT: [[SPLAT:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 1
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3
; CHECK-NEXT: ret <4 x float> [[R]]
;
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
index c87e2e8596c62d..97fcc7a4b43707 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -547,8 +547,7 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat(
-; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
-; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 poison, i32 0, i32 4, i32 poison>
+; CHECK-NEXT: [[SPLAT:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 1
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3
; CHECK-NEXT: ret <4 x float> [[R]]
;
diff --git a/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll
index cc1d23943b09c8..2392bcac6eb7ec 100644
--- a/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll
@@ -88,9 +88,9 @@ define <2 x i16> @test_udiv(i16 %a, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_fdiv(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = fdiv <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = fdiv float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
@@ -105,9 +105,9 @@ define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_frem(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_frem(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = frem <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = frem float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> poison, float %a, i32 0
diff --git a/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll b/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll
index 1699418dcc28b3..f445a0134b3589 100644
--- a/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll
+++ b/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll
@@ -88,9 +88,9 @@ define <2 x i16> @test_udiv(i16 %a, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_fdiv(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = fdiv <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = fdiv float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> undef, float %a, i32 0
@@ -105,9 +105,9 @@ define <2 x float> @test_fdiv(float %a, float %b, i1 %cmp) {
; shufflevector is eliminated here.
define <2 x float> @test_frem(float %a, float %b, i1 %cmp) {
; CHECK-LABEL: @test_frem(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i64 1
-; CHECK-NEXT: [[SPLAT_OP:%.*]] = frem <2 x float> [[TMP1]], <float undef, float 3.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[SPLAT_OP]]
+; CHECK-NEXT: [[A:%.*]] = frem float [[A1:%.*]], 3.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i64 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP1]]
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%splatinsert = insertelement <2 x float> undef, float %a, i32 0
diff --git a/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll b/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
new file mode 100644
index 00000000000000..dae4ca1d8eba79
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shufflevector-inselt.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='instcombine' -S %s | FileCheck %s
+
+declare void @use.v2.float(<2 x float>)
+define <2 x float> @replace_through_casts(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[INP]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP2]], i64 1
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %inp, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %r = shufflevector <2 x float> %ui_v, <2 x float> %si_v, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_and_binop(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts_and_binop(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[INP]], 5
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[MUL]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %mul = mul nsw i16 %inp, 5
+ %v0 = insertelement <2 x i16> poison, i16 %mul, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %add, i64 1
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_and_binop_and_unop(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts_and_binop_and_unop(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 0
+ %v = insertelement <2 x i16> %v0, i16 %inp, i64 1
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 2>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_through_splat(i16 %inp) {
+; CHECK-LABEL: @replace_through_casts_through_splat(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP:%.*]], -10
+; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
+; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
+; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 0
+ %v = shufflevector <2 x i16> %v0, <2 x i16> poison, <2 x i32> zeroinitializer
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_through_splat2(i16 %inp, <2 x i16> %any) {
+; CHECK-LABEL: @replace_through_casts_through_splat2(
+; CHECK-NEXT: ret <2 x float> poison
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 0
+ %v = shufflevector <2 x i16> %v0, <2 x i16> %any, <2 x i32> <i32 1, i32 1>
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x float> %ui_v_add, <2 x float> %si_v_fneg, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %r
+}
+
+define <2 x float> @replace_through_casts_through_splat_fail(i16 %inp, <2 x i16> %any) {
+; CHECK-LABEL: @replace_through_casts_through_splat_fail(
+; CHECK-NEXT: [[V:%.*]] = shufflevector <2 x i16> [[V0:%.*]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[UI_V:%.*]] = uitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[SI_V:%.*]] = sitofp <2 x i16> [[V]] to <2 x float>
+; CHECK-NEXT: [[UI_V_ADD:%.*]] = fadd <2 x float> [[UI_V]], <float 2.000000e+00, float poison>
+; CHECK-NEXT: [[SI_V_FNEG:%.*]] = fneg <2 x float> [[SI_V]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[UI_V_ADD]], <2 x float> [[SI_V_FNEG]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[R]]
+;
+ %add = add nsw i16 %inp, -10
+ %v0 = insertelement <2 x i16> poison, i16 %add, i64 1
+ %v = shufflevector <2 x i16> %v0, <2 x i16> %any, <2 x i32> <i32 2, i32 2>
+ %ui_v = uitofp <2 x i16> %v to <2 x float>
+ %si_v = sitofp <2 x i16> %v to <2 x float>
+ %ui_v_add = fadd <2 x float> %ui_v, <float 2.0, float 3.0>
+ %si_v_fneg = fneg <2 x float> %si_v
+ %r = shufflevector <2 x...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -547,8 +547,7 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { | |||
|
|||
define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { | |||
; CHECK-LABEL: @insert_in_nonsplat( | |||
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 | |||
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 poison, i32 0, i32 4, i32 poison> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is buggy, fixing.
Note, the new function |
…element is demanded This came as a result of PR llvm#84389. SLP vectorizer can vectorize in a pattern like: ``` (blend (vec_ops0... (insert ?,X,0)), (vec_ops1... (insert ?,Y,1)) ) ``` In this case, `vec_ops0...` and `vec_ops1...` are essentially doing scalar transforms. We previously we handle things like: `(blend (insert ?,X,0), (insert ?,Y,0))` This patch extends that to look through `vec_ops...` that can be scalarized, and if its possible to scalarize all ops, it transforms the input to: ``` (blend (insert ?,(scalar_ops0... X), 0), (insert ?,(scalar_ops1... Y), 0) ) ```
b56bb3a
to
c3ef185
Compare
@dtcxzyw can you test this. IMO its not worth the extra complexity unless it shows up in realworld. |
Are you saying that SLP is creating these vectorizations but shouldn't? Isnt that a cost issue with the alt opcode cases? |
See the test |
Does not look like cost issue, looks like a corner case that should be handled separately in SLP. We have some kind of it already, but not all. There is something similar with sext/zext/trunc, I'm going to fix later, since it requires some extra stuff in minbitwidth analysis. This one should not require extra work, just need to automatically create gather node for 2-element altshuffle, if we see that the operand is going to be gather/buildvector node. Can do it tomorrow. |
Thanks :) Ill close this once your patch goes up. |
Ping @alexey-bataev Any progress? |
This came as a result of PR #84389 (see: dtcxzyw/llvm-opt-benchmark#336 (comment)). SLP vectorizer can vectorize in a
pattern like:
In this case,
vec_ops0...
andvec_ops1...
are essentially doingscalar transforms.
We previously we handle things like:
(blend (insert ?,X,0), (insert ?,Y,0))
This patch extends that to look through
vec_ops...
that can bescalarized, and if its possible to scalarize all ops, it transforms
the input to: