diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index afdddad10cea2..58f9dcedb15f0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -690,6 +690,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + ConstantInt *Cst; // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -758,7 +759,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // more efficiently. Support vector types. Cleanup code by using m_OneUse. // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. - Value *A = nullptr; ConstantInt *Cst = nullptr; + Value *A = nullptr; if (Src->hasOneUse() && match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { // We have three types to worry about here, the type of A, the source of @@ -843,6 +844,38 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; + // Whenever an element is extracted from a vector, and then truncated, + // canonicalize by converting it to a bitcast followed by an + // extractelement. + // + // Example (little endian): + // trunc (extractelement <4 x i64> %X, 0) to i32 + // ---> + // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0 + Value *VecOp; + if (match(Src, + m_OneUse(m_ExtractElement(m_Value(VecOp), m_ConstantInt(Cst))))) { + Type *VecOpTy = VecOp->getType(); + unsigned DestScalarSize = DestTy->getScalarSizeInBits(); + unsigned VecOpScalarSize = VecOpTy->getScalarSizeInBits(); + unsigned VecNumElts = VecOpTy->getVectorNumElements(); + + // A badly fit destination size would result in an invalid cast. + if (VecOpScalarSize % DestScalarSize == 0) { + uint64_t TruncRatio = VecOpScalarSize / DestScalarSize; + uint64_t BitCastNumElts = VecNumElts * TruncRatio; + uint64_t VecOpIdx = Cst->getZExtValue(); + uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1 + : VecOpIdx * TruncRatio; + assert(BitCastNumElts <= std::numeric_limits::max() && + "overflow 32-bits"); + + Type *BitCastTo = VectorType::get(DestTy, BitCastNumElts); + Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo); + return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx)); + } + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/ExtractCast.ll b/llvm/test/Transforms/InstCombine/ExtractCast.ll index 91df10241d448..9dc9396837c2c 100644 --- a/llvm/test/Transforms/InstCombine/ExtractCast.ll +++ b/llvm/test/Transforms/InstCombine/ExtractCast.ll @@ -3,8 +3,8 @@ define i32 @a(<4 x i64> %I) { ; CHECK-LABEL: @a( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[I:%.*]], i32 3 -; CHECK-NEXT: [[K:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[I:%.*]] to <8 x i32> +; CHECK-NEXT: [[K:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 ; CHECK-NEXT: ret i32 [[K]] ; %J = trunc <4 x i64> %I to <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/trunc-extractelement.ll b/llvm/test/Transforms/InstCombine/trunc-extractelement.ll index 9f4ff5d9ce22e..c44577d1f1d1b 100644 --- a/llvm/test/Transforms/InstCombine/trunc-extractelement.ll +++ b/llvm/test/Transforms/InstCombine/trunc-extractelement.ll @@ -3,10 +3,15 @@ ; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE define i32 @shrinkExtractElt_i64_to_i32_0(<3 x i64> %x) { -; ANY-LABEL: @shrinkExtractElt_i64_to_i32_0( -; ANY-NEXT: [[E:%.*]] = extractelement <3 x i64> [[X:%.*]], i32 0 -; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i32 -; ANY-NEXT: ret i32 [[T]] +; LE-LABEL: @shrinkExtractElt_i64_to_i32_0( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; LE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 0 +; LE-NEXT: ret i32 [[T]] +; +; BE-LABEL: @shrinkExtractElt_i64_to_i32_0( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; BE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 1 +; BE-NEXT: ret i32 [[T]] ; %e = extractelement <3 x i64> %x, i32 0 %t = trunc i64 %e to i32 @@ -14,10 +19,15 @@ define i32 @shrinkExtractElt_i64_to_i32_0(<3 x i64> %x) { } define i32 @shrinkExtractElt_i64_to_i32_1(<3 x i64> %x) { -; ANY-LABEL: @shrinkExtractElt_i64_to_i32_1( -; ANY-NEXT: [[E:%.*]] = extractelement <3 x i64> [[X:%.*]], i32 1 -; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i32 -; ANY-NEXT: ret i32 [[T]] +; LE-LABEL: @shrinkExtractElt_i64_to_i32_1( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; LE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 2 +; LE-NEXT: ret i32 [[T]] +; +; BE-LABEL: @shrinkExtractElt_i64_to_i32_1( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; BE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 3 +; BE-NEXT: ret i32 [[T]] ; %e = extractelement <3 x i64> %x, i32 1 %t = trunc i64 %e to i32 @@ -25,10 +35,15 @@ define i32 @shrinkExtractElt_i64_to_i32_1(<3 x i64> %x) { } define i32 @shrinkExtractElt_i64_to_i32_2(<3 x i64> %x) { -; ANY-LABEL: @shrinkExtractElt_i64_to_i32_2( -; ANY-NEXT: [[E:%.*]] = extractelement <3 x i64> [[X:%.*]], i32 2 -; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i32 -; ANY-NEXT: ret i32 [[T]] +; LE-LABEL: @shrinkExtractElt_i64_to_i32_2( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; LE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 4 +; LE-NEXT: ret i32 [[T]] +; +; BE-LABEL: @shrinkExtractElt_i64_to_i32_2( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> +; BE-NEXT: [[T:%.*]] = extractelement <6 x i32> [[TMP1]], i32 5 +; BE-NEXT: ret i32 [[T]] ; %e = extractelement <3 x i64> %x, i32 2 %t = trunc i64 %e to i32 @@ -36,10 +51,15 @@ define i32 @shrinkExtractElt_i64_to_i32_2(<3 x i64> %x) { } define i16 @shrinkExtractElt_i64_to_i16_0(<3 x i64> %x) { -; ANY-LABEL: @shrinkExtractElt_i64_to_i16_0( -; ANY-NEXT: [[E:%.*]] = extractelement <3 x i64> [[X:%.*]], i16 0 -; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i16 -; ANY-NEXT: ret i16 [[T]] +; LE-LABEL: @shrinkExtractElt_i64_to_i16_0( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; LE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 0 +; LE-NEXT: ret i16 [[T]] +; +; BE-LABEL: @shrinkExtractElt_i64_to_i16_0( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; BE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 3 +; BE-NEXT: ret i16 [[T]] ; %e = extractelement <3 x i64> %x, i16 0 %t = trunc i64 %e to i16 @@ -47,10 +67,15 @@ define i16 @shrinkExtractElt_i64_to_i16_0(<3 x i64> %x) { } define i16 @shrinkExtractElt_i64_to_i16_1(<3 x i64> %x) { -; ANY-LABEL: @shrinkExtractElt_i64_to_i16_1( -; ANY-NEXT: [[E:%.*]] = extractelement <3 x i64> [[X:%.*]], i16 1 -; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i16 -; ANY-NEXT: ret i16 [[T]] +; LE-LABEL: @shrinkExtractElt_i64_to_i16_1( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; LE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 4 +; LE-NEXT: ret i16 [[T]] +; +; BE-LABEL: @shrinkExtractElt_i64_to_i16_1( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; BE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 7 +; BE-NEXT: ret i16 [[T]] ; %e = extractelement <3 x i64> %x, i16 1 %t = trunc i64 %e to i16 @@ -58,10 +83,15 @@ define i16 @shrinkExtractElt_i64_to_i16_1(<3 x i64> %x) { } define i16 @shrinkExtractElt_i64_to_i16_2(<3 x i64> %x) { -; ANY-LABEL: @shrinkExtractElt_i64_to_i16_2( -; ANY-NEXT: [[E:%.*]] = extractelement <3 x i64> [[X:%.*]], i16 2 -; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i16 -; ANY-NEXT: ret i16 [[T]] +; LE-LABEL: @shrinkExtractElt_i64_to_i16_2( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; LE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 8 +; LE-NEXT: ret i16 [[T]] +; +; BE-LABEL: @shrinkExtractElt_i64_to_i16_2( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <12 x i16> +; BE-NEXT: [[T:%.*]] = extractelement <12 x i16> [[TMP1]], i32 11 +; BE-NEXT: ret i16 [[T]] ; %e = extractelement <3 x i64> %x, i16 2 %t = trunc i64 %e to i16 @@ -70,10 +100,15 @@ define i16 @shrinkExtractElt_i64_to_i16_2(<3 x i64> %x) { ; Crazy types may be ok. define i11 @shrinkExtractElt_i33_to_11_2(<3 x i33> %x) { -; ANY-LABEL: @shrinkExtractElt_i33_to_11_2( -; ANY-NEXT: [[E:%.*]] = extractelement <3 x i33> [[X:%.*]], i16 2 -; ANY-NEXT: [[T:%.*]] = trunc i33 [[E]] to i11 -; ANY-NEXT: ret i11 [[T]] +; LE-LABEL: @shrinkExtractElt_i33_to_11_2( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i33> [[X:%.*]] to <9 x i11> +; LE-NEXT: [[T:%.*]] = extractelement <9 x i11> [[TMP1]], i32 6 +; LE-NEXT: ret i11 [[T]] +; +; BE-LABEL: @shrinkExtractElt_i33_to_11_2( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x i33> [[X:%.*]] to <9 x i11> +; BE-NEXT: [[T:%.*]] = extractelement <9 x i11> [[TMP1]], i32 8 +; BE-NEXT: ret i11 [[T]] ; %e = extractelement <3 x i33> %x, i16 2 %t = trunc i33 %e to i11 @@ -122,13 +157,17 @@ define i16 @shrinkExtractElt_i64_to_i16_2_extra_use(<3 x i64> %x) { ; Check to ensure PR45314 remains fixed. define <4 x i64> @PR45314(<4 x i64> %x) { -; ANY-LABEL: @PR45314( -; ANY-NEXT: [[E:%.*]] = extractelement <4 x i64> [[X:%.*]], i32 0 -; ANY-NEXT: [[T:%.*]] = trunc i64 [[E]] to i32 -; ANY-NEXT: [[I:%.*]] = insertelement <8 x i32> undef, i32 [[T]], i32 0 -; ANY-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[I]], <8 x i32> undef, <8 x i32> zeroinitializer -; ANY-NEXT: [[B:%.*]] = bitcast <8 x i32> [[S]] to <4 x i64> -; ANY-NEXT: ret <4 x i64> [[B]] +; LE-LABEL: @PR45314( +; LE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32> +; LE-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer +; LE-NEXT: [[B:%.*]] = bitcast <8 x i32> [[S]] to <4 x i64> +; LE-NEXT: ret <4 x i64> [[B]] +; +; BE-LABEL: @PR45314( +; BE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32> +; BE-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> +; BE-NEXT: [[B:%.*]] = bitcast <8 x i32> [[S]] to <4 x i64> +; BE-NEXT: ret <4 x i64> [[B]] ; %e = extractelement <4 x i64> %x, i32 0 %t = trunc i64 %e to i32