diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 286120ed534b7..ca7bb69b17f7c 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -689,15 +689,18 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for // scalable type is unknown; Second, we cannot reason if the narrowed shuffle // mask for scalable type is a splat or not. - // 2) Disallow non-vector casts and length-changing shuffles. + // 2) Disallow non-vector casts. // TODO: We could allow any shuffle. + auto *DestTy = dyn_cast(I.getType()); auto *SrcTy = dyn_cast(V->getType()); - if (!SrcTy || I.getOperand(0)->getType() != SrcTy) + if (!DestTy || !SrcTy) return false; - auto *DestTy = cast(I.getType()); unsigned DestEltSize = DestTy->getScalarSizeInBits(); unsigned SrcEltSize = SrcTy->getScalarSizeInBits(); + if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0) + return false; + SmallVector NewMask; if (DestEltSize <= SrcEltSize) { // The bitcast is from wide to narrow/equal elements. The shuffle mask can @@ -714,10 +717,15 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { return false; } + // Bitcast the shuffle src - keep its original width but using the destination + // scalar type. + unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize; + auto *ShuffleTy = FixedVectorType::get(DestTy->getScalarType(), NumSrcElts); + // The new shuffle must not cost more than the old shuffle. The bitcast is // moved ahead of the shuffle, so assume that it has the same cost as before. InstructionCost DestCost = TTI.getShuffleCost( - TargetTransformInfo::SK_PermuteSingleSrc, DestTy, NewMask); + TargetTransformInfo::SK_PermuteSingleSrc, ShuffleTy, NewMask); InstructionCost SrcCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy, Mask); if (DestCost > SrcCost || !DestCost.isValid()) @@ -725,7 +733,7 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC' ++NumShufOfBitcast; - Value *CastV = Builder.CreateBitCast(V, DestTy); + Value *CastV = Builder.CreateBitCast(V, ShuffleTy); Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask); replaceValue(I, *Shuf); return true; diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll index 318aa33d6b11c..471424dfaca2d 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll @@ -33,13 +33,18 @@ define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { ret <4 x float> %r } -; TODO - length-changing shuffle +; Length-changing shuffles define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_narrow_element_subvector( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[R]] +; SSE-LABEL: @bitcast_shuf_narrow_element_subvector( +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> +; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> +; SSE-NEXT: ret <16 x i8> [[R]] +; +; AVX-LABEL: @bitcast_shuf_narrow_element_subvector( +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> +; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <16 x i32> +; AVX-NEXT: ret <16 x i8> [[R]] ; %shuf = shufflevector <2 x i32> %v, <2 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to <16 x i8> @@ -47,10 +52,15 @@ define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { } define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { -; CHECK-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16> -; CHECK-NEXT: ret <16 x i16> [[R]] +; SSE-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> +; SSE-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16> +; SSE-NEXT: ret <16 x i16> [[R]] +; +; AVX-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> +; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> +; AVX-NEXT: ret <16 x i16> [[R]] ; %shuf = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> %r = bitcast <4 x i64> %shuf to <16 x i16> @@ -58,10 +68,15 @@ define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { } define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_extract_subvector( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[R]] +; SSE-LABEL: @bitcast_shuf_extract_subvector( +; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V:%.*]] to <32 x i8> +; SSE-NEXT: [[R:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <16 x i32> +; SSE-NEXT: ret <16 x i8> [[R]] +; +; AVX-LABEL: @bitcast_shuf_extract_subvector( +; AVX-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> +; AVX-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> +; AVX-NEXT: ret <16 x i8> [[R]] ; %shuf = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to <16 x i8> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index e094d62c80a5e..1e0a5ec187e55 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -33,13 +33,18 @@ define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { ret <4 x float> %r } -; TODO - Length-changing shuffle +; Length-changing shuffles define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_narrow_element_subvector( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[R]] +; SSE-LABEL: @bitcast_shuf_narrow_element_subvector( +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> +; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> +; SSE-NEXT: ret <16 x i8> [[R]] +; +; AVX-LABEL: @bitcast_shuf_narrow_element_subvector( +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> +; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <16 x i32> +; AVX-NEXT: ret <16 x i8> [[R]] ; %shuf = shufflevector <2 x i32> %v, <2 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to <16 x i8> @@ -47,10 +52,15 @@ define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { } define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { -; CHECK-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16> -; CHECK-NEXT: ret <16 x i16> [[R]] +; SSE-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> +; SSE-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16> +; SSE-NEXT: ret <16 x i16> [[R]] +; +; AVX-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> +; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> +; AVX-NEXT: ret <16 x i16> [[R]] ; %shuf = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> %r = bitcast <4 x i64> %shuf to <16 x i16> @@ -58,10 +68,15 @@ define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { } define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_extract_subvector( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[R]] +; SSE-LABEL: @bitcast_shuf_extract_subvector( +; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V:%.*]] to <32 x i8> +; SSE-NEXT: [[R:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <16 x i32> +; SSE-NEXT: ret <16 x i8> [[R]] +; +; AVX-LABEL: @bitcast_shuf_extract_subvector( +; AVX-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> +; AVX-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> +; AVX-NEXT: ret <16 x i8> [[R]] ; %shuf = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> %r = bitcast <4 x i32> %shuf to <16 x i8>