Skip to content

Commit

Permalink
[InstCombine] allow undef elements when forming splat from chain of i…
Browse files Browse the repository at this point in the history
…nsertelements

We allow forming a splat (broadcast) shuffle, but we were conservatively limiting
that to cases where all elements of the vector are specified. It should be safe
from a codegen perspective to allow undefined lanes of the vector because the
expansion of a splat shuffle would become the chain of inserts again.

Forming splat shuffles can reduce IR and help enable further IR transforms.
Motivating bugs:
https://bugs.llvm.org/show_bug.cgi?id=42174
https://bugs.llvm.org/show_bug.cgi?id=16739

Differential Revision: https://reviews.llvm.org/D63848

llvm-svn: 365147
  • Loading branch information
rotateright committed Jul 4, 2019
1 parent 0cd50b2 commit 75b5edf
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 16 deletions.
21 changes: 17 additions & 4 deletions llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -704,19 +704,32 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
CurrIE = NextIE;
}

// Make sure we've seen an insert into every element.
if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
// If this is just a single insertelement (not a sequence), we are done.
if (FirstIE == &InsElt)
return nullptr;

// If we are not inserting into an undef vector, make sure we've seen an
// insert into every element.
// TODO: If the base vector is not undef, it might be better to create a splat
// and then a select-shuffle (blend) with the base vector.
if (!isa<UndefValue>(FirstIE->getOperand(0)))
if (any_of(ElementPresent, [](bool Present) { return !Present; }))
return nullptr;

// Create the insert + shuffle.
Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
UndefValue *UndefVec = UndefValue::get(VecTy);
Constant *Zero = ConstantInt::get(Int32Ty, 0);
if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);

Constant *ZeroMask = ConstantVector::getSplat(NumElements, Zero);
return new ShuffleVectorInst(FirstIE, UndefVec, ZeroMask);
// Splat from element 0, but replace absent elements with undef in the mask.
SmallVector<Constant *, 16> Mask(NumElements, Zero);
for (unsigned i = 0; i != NumElements; ++i)
if (!ElementPresent[i])
Mask[i] = UndefValue::get(Int32Ty);

return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
}

/// If we have an insertelement instruction feeding into another insertelement
Expand Down
29 changes: 17 additions & 12 deletions llvm/test/Transforms/InstCombine/broadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,12 @@ define <4 x float> @good5(float %v) {
ret <4 x float> %res
}

define <4 x float> @bad1(float %arg) {
; CHECK-LABEL: @bad1(
; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 1
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; The insert is changed to allow the canonical shuffle-splat pattern from element 0.

define <4 x float> @splat_undef1(float %arg) {
; CHECK-LABEL: @splat_undef1(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> undef, float %arg, i32 1
Expand All @@ -86,11 +87,12 @@ define <4 x float> @bad1(float %arg) {
ret <4 x float> %t6
}

define <4 x float> @bad2(float %arg) {
; CHECK-LABEL: @bad2(
; Re-uses the existing first insertelement.

define <4 x float> @splat_undef2(float %arg) {
; CHECK-LABEL: @splat_undef2(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 2
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> undef, float %arg, i32 0
Expand Down Expand Up @@ -123,10 +125,13 @@ define <1 x float> @bad4(float %arg) {
ret <1 x float> %t
}

define <4 x float> @bad5(float %arg) {
; CHECK-LABEL: @bad5(
; Multiple undef elements are ok.
; TODO: Multiple uses triggers the transform at %t4, but we could form another splat from %t6 and simplify?

define <4 x float> @splat_undef3(float %arg) {
; CHECK-LABEL: @splat_undef3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 1
; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]]
Expand Down

0 comments on commit 75b5edf

Please sign in to comment.