diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 1e95f235b6533b..da5a910ca9fa18 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1657,6 +1657,47 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, return NewBO; } +/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate. +/// Example (little endian): +/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8> +static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf, + bool IsBigEndian) { + // This must be a bitcasted shuffle of 1 vector integer operand. + Type *DestType = Shuf.getType(); + Value *X; + if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) || + !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy()) + return nullptr; + + // The source type must have the same number of elements as the shuffle, + // and the source element type must be larger than the shuffle element type. + Type *SrcType = X->getType(); + if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() || + SrcType->getVectorNumElements() != DestType->getVectorNumElements() || + SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0) + return nullptr; + + assert(Shuf.changesLength() && !Shuf.increasesLength() && + "Expected a shuffle that decreases length"); + + // Last, check that the mask chooses the correct low bits for each narrow + // element in the result. + uint64_t TruncRatio = + SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits(); + ArrayRef Mask = Shuf.getShuffleMask(); + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] == UndefMaskElem) + continue; + uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio; + assert(LSBIndex <= std::numeric_limits::max() && + "Overflowed 32-bits"); + if (Mask[i] != (int)LSBIndex) + return nullptr; + } + + return new TruncInst(X, DestType); +} + /// Match a shuffle-select-shuffle pattern where the shuffles are widening and /// narrowing (concatenating with undef and extracting back to the original /// length). This allows replacing the wide select with a narrow select. @@ -1951,6 +1992,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = foldSelectShuffle(SVI, Builder, DL)) return I; + if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian())) + return I; + if (Instruction *I = narrowVectorSelect(SVI, Builder)) return I; diff --git a/llvm/test/Transforms/InstCombine/shuffle-cast.ll b/llvm/test/Transforms/InstCombine/shuffle-cast.ll index e4b21ff9e11181..fc3b4c1241a011 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-cast.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-cast.ll @@ -3,10 +3,14 @@ ; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE define <4 x i16> @trunc_little_endian(<4 x i32> %x) { -; ANY-LABEL: @trunc_little_endian( -; ANY-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> -; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> -; ANY-NEXT: ret <4 x i16> [[R]] +; LE-LABEL: @trunc_little_endian( +; LE-NEXT: [[R:%.*]] = trunc <4 x i32> [[X:%.*]] to <4 x i16> +; LE-NEXT: ret <4 x i16> [[R]] +; +; BE-LABEL: @trunc_little_endian( +; BE-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> +; BE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> +; BE-NEXT: ret <4 x i16> [[R]] ; %b = bitcast <4 x i32> %x to <8 x i16> %r = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -14,10 +18,14 @@ define <4 x i16> @trunc_little_endian(<4 x i32> %x) { } define <4 x i16> @trunc_big_endian(<4 x i32> %x) { -; ANY-LABEL: @trunc_big_endian( -; ANY-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> -; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> -; ANY-NEXT: ret <4 x i16> [[R]] +; LE-LABEL: @trunc_big_endian( +; LE-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> +; LE-NEXT: ret <4 x i16> [[R]] +; +; BE-LABEL: @trunc_big_endian( +; BE-NEXT: [[R:%.*]] = trunc <4 x i32> [[X:%.*]] to <4 x i16> +; BE-NEXT: ret <4 x i16> [[R]] ; %b = bitcast <4 x i32> %x to <8 x i16> %r = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -26,12 +34,20 @@ define <4 x i16> @trunc_big_endian(<4 x i32> %x) { declare void @use_v8i16(<8 x i16>) +; Extra use is ok. + define <2 x i16> @trunc_little_endian_extra_use(<2 x i64> %x) { -; ANY-LABEL: @trunc_little_endian_extra_use( -; ANY-NEXT: [[B:%.*]] = bitcast <2 x i64> [[X:%.*]] to <8 x i16> -; ANY-NEXT: call void @use_v8i16(<8 x i16> [[B]]) -; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <2 x i32> -; ANY-NEXT: ret <2 x i16> [[R]] +; LE-LABEL: @trunc_little_endian_extra_use( +; LE-NEXT: [[B:%.*]] = bitcast <2 x i64> [[X:%.*]] to <8 x i16> +; LE-NEXT: call void @use_v8i16(<8 x i16> [[B]]) +; LE-NEXT: [[R:%.*]] = trunc <2 x i64> [[X]] to <2 x i16> +; LE-NEXT: ret <2 x i16> [[R]] +; +; BE-LABEL: @trunc_little_endian_extra_use( +; BE-NEXT: [[B:%.*]] = bitcast <2 x i64> [[X:%.*]] to <8 x i16> +; BE-NEXT: call void @use_v8i16(<8 x i16> [[B]]) +; BE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <2 x i32> +; BE-NEXT: ret <2 x i16> [[R]] ; %b = bitcast <2 x i64> %x to <8 x i16> call void @use_v8i16(<8 x i16> %b) @@ -41,12 +57,20 @@ define <2 x i16> @trunc_little_endian_extra_use(<2 x i64> %x) { declare void @use_v12i11(<12 x i11>) +; Weird types are ok. + define <4 x i11> @trunc_big_endian_extra_use(<4 x i33> %x) { -; ANY-LABEL: @trunc_big_endian_extra_use( -; ANY-NEXT: [[B:%.*]] = bitcast <4 x i33> [[X:%.*]] to <12 x i11> -; ANY-NEXT: call void @use_v12i11(<12 x i11> [[B]]) -; ANY-NEXT: [[R:%.*]] = shufflevector <12 x i11> [[B]], <12 x i11> undef, <4 x i32> -; ANY-NEXT: ret <4 x i11> [[R]] +; LE-LABEL: @trunc_big_endian_extra_use( +; LE-NEXT: [[B:%.*]] = bitcast <4 x i33> [[X:%.*]] to <12 x i11> +; LE-NEXT: call void @use_v12i11(<12 x i11> [[B]]) +; LE-NEXT: [[R:%.*]] = shufflevector <12 x i11> [[B]], <12 x i11> undef, <4 x i32> +; LE-NEXT: ret <4 x i11> [[R]] +; +; BE-LABEL: @trunc_big_endian_extra_use( +; BE-NEXT: [[B:%.*]] = bitcast <4 x i33> [[X:%.*]] to <12 x i11> +; BE-NEXT: call void @use_v12i11(<12 x i11> [[B]]) +; BE-NEXT: [[R:%.*]] = trunc <4 x i33> [[X]] to <4 x i11> +; BE-NEXT: ret <4 x i11> [[R]] ; %b = bitcast <4 x i33> %x to <12 x i11> call void @use_v12i11(<12 x i11> %b) @@ -54,3 +78,46 @@ define <4 x i11> @trunc_big_endian_extra_use(<4 x i33> %x) { ret <4 x i11> %r } +define <4 x i16> @wrong_cast1(i128 %x) { +; ANY-LABEL: @wrong_cast1( +; ANY-NEXT: [[B:%.*]] = bitcast i128 [[X:%.*]] to <8 x i16> +; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> +; ANY-NEXT: ret <4 x i16> [[R]] +; + %b = bitcast i128 %x to <8 x i16> + %r = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + ret <4 x i16> %r +} + +define <4 x i16> @wrong_cast2(<4 x float> %x) { +; ANY-LABEL: @wrong_cast2( +; ANY-NEXT: [[B:%.*]] = bitcast <4 x float> [[X:%.*]] to <8 x i16> +; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> +; ANY-NEXT: ret <4 x i16> [[R]] +; + %b = bitcast <4 x float> %x to <8 x i16> + %r = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + ret <4 x i16> %r +} + +define <4 x half> @wrong_cast3(<4 x i32> %x) { +; ANY-LABEL: @wrong_cast3( +; ANY-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x half> +; ANY-NEXT: [[R:%.*]] = shufflevector <8 x half> [[B]], <8 x half> undef, <4 x i32> +; ANY-NEXT: ret <4 x half> [[R]] +; + %b = bitcast <4 x i32> %x to <8 x half> + %r = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> + ret <4 x half> %r +} + +define <2 x i16> @wrong_shuffle(<4 x i32> %x) { +; ANY-LABEL: @wrong_shuffle( +; ANY-NEXT: [[B:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> +; ANY-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <2 x i32> +; ANY-NEXT: ret <2 x i16> [[R]] +; + %b = bitcast <4 x i32> %x to <8 x i16> + %r = shufflevector <8 x i16> %b, <8 x i16> undef, <2 x i32> + ret <2 x i16> %r +} diff --git a/llvm/test/Transforms/PhaseOrdering/vector-trunc.ll b/llvm/test/Transforms/PhaseOrdering/vector-trunc.ll index 494b9a7a6c0bca..52a1fe7d897dcc 100644 --- a/llvm/test/Transforms/PhaseOrdering/vector-trunc.ll +++ b/llvm/test/Transforms/PhaseOrdering/vector-trunc.ll @@ -4,8 +4,7 @@ define <4 x i16> @truncate(<4 x i32> %x) { ; ANY-LABEL: @truncate( -; ANY-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16> -; ANY-NEXT: [[V3:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> +; ANY-NEXT: [[V3:%.*]] = trunc <4 x i32> [[X:%.*]] to <4 x i16> ; ANY-NEXT: ret <4 x i16> [[V3]] ; %x0 = extractelement <4 x i32> %x, i32 0