diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 5c60fad6f91aa..a4b77dad6891b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2178,35 +2178,6 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, return true; } -/// Test whether a vector type is viable for promotion. -/// -/// This implements the necessary checking for \c checkVectorTypesForPromotion -/// (and thus isVectorPromotionViable) over all slices of the alloca for the -/// given VectorType. -static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, - const DataLayout &DL, unsigned VScale) { - uint64_t ElementSize = - DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); - - // While the definition of LLVM vectors is bitpacked, we don't support sizes - // that aren't byte sized. - if (ElementSize % 8) - return false; - assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 && - "vector size not a multiple of element size?"); - ElementSize /= 8; - - for (const Slice &S : P) - if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale)) - return false; - - for (const Slice *S : P.splitSliceTails()) - if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale)) - return false; - - return true; -} - /// Test whether any vector type in \p CandidateTys is viable for promotion. /// /// This implements the necessary checking for \c isVectorPromotionViable over @@ -2291,8 +2262,31 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, std::numeric_limits::max(); }); + auto CheckVectorTypeForPromotion = [&](VectorType *VTy) -> bool { + uint64_t ElementSize = + DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); + + // While the definition of LLVM vectors is bitpacked, we don't support sizes + // that aren't byte sized. + if (ElementSize % 8) + return false; + assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 && + "vector size not a multiple of element size?"); + ElementSize /= 8; + + for (const Slice &S : P) + if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale)) + return false; + + for (const Slice *S : P.splitSliceTails()) + if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale)) + return false; + + return true; + }; + // Try each vector type, and return the one which works. for (VectorType *VTy : CandidateTys) - if (checkVectorTypeForPromotion(P, VTy, DL, VScale)) + if (CheckVectorTypeForPromotion(VTy)) return VTy; return nullptr; @@ -5209,63 +5203,69 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { /// promoted. AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P) { - // Try to compute a friendly type for this partition of the alloca. This - // won't always succeed, in which case we fall back to a legal integer type - // or an i8 array of an appropriate size. - Type *SliceTy = nullptr; - VectorType *SliceVecTy = nullptr; + // Try to compute a friendly type for `PartitionTy`, which is the type of the + // new alloca for this partition. This won't always succeed, in which case we + // fall back to a legal integer type or an i8 array of an appropriate size. + Type *PartitionTy = nullptr; + IntegerType *LargestIntegerUsedTy = nullptr; + bool IsVectorPromotable = false; const DataLayout &DL = AI.getDataLayout(); unsigned VScale = AI.getFunction()->getVScaleValue(); - std::pair CommonUseTy = - findCommonType(P.begin(), P.end(), P.endOffset()); - // Do all uses operate on the same type? - if (CommonUseTy.first) { - TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first); - if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) { - SliceTy = CommonUseTy.first; - SliceVecTy = dyn_cast(SliceTy); - } - } - // If not, can we find an appropriate subtype in the original allocated type? - if (!SliceTy) - if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), - P.beginOffset(), P.size())) - SliceTy = TypePartitionTy; - - // If still not, can we use the largest bitwidth integer type used? - if (!SliceTy && CommonUseTy.second) - if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size()) { - SliceTy = CommonUseTy.second; - SliceVecTy = dyn_cast(SliceTy); + // First check if the partition can be promoted to a vector. If it can, we are + // done. + VectorType *VecTy = isVectorPromotionViable(P, DL, VScale); + if (VecTy) { + PartitionTy = VecTy; + IsVectorPromotable = true; + } + + if (!PartitionTy) { + // Otherwise, check if there is a common type that all slices of the + // partition use. Collect the largest integer type used as a backup. + auto CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()); + LargestIntegerUsedTy = CommonUseTy.second; + // If there is a common type that spans the partition, use it. + if (CommonUseTy.first) { + TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first); + if (CommonUseSize.isFixed() && + CommonUseSize.getFixedValue() >= P.size()) { + PartitionTy = CommonUseTy.first; + } } - if ((!SliceTy || (SliceTy->isArrayTy() && - SliceTy->getArrayElementType()->isIntegerTy())) && - DL.isLegalInteger(P.size() * 8)) { - SliceTy = Type::getIntNTy(*C, P.size() * 8); } - // If the common use types are not viable for promotion then attempt to find - // another type that is viable. - if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL, VScale)) + if (!PartitionTy) + // Otherwise, check if there is an appropriate subtype of the original + // alloca type to use. if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), - P.beginOffset(), P.size())) { - VectorType *TypePartitionVecTy = dyn_cast(TypePartitionTy); - if (TypePartitionVecTy && - checkVectorTypeForPromotion(P, TypePartitionVecTy, DL, VScale)) - SliceTy = TypePartitionTy; - } - - if (!SliceTy) - SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); - assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size()); - - bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); - - VectorType *VecTy = - IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL, VScale); - if (VecTy) - SliceTy = VecTy; + P.beginOffset(), P.size())) + PartitionTy = TypePartitionTy; + + // If the type has not been selected yet OR if the type selected is a + // non-promotable aggregate + if ((!PartitionTy || + (!IsVectorPromotable && !PartitionTy->isSingleValueType()))) + // If the largest integer type used spans the partition, use it. + if (LargestIntegerUsedTy && + DL.getTypeAllocSize(LargestIntegerUsedTy).getFixedValue() >= P.size()) + PartitionTy = LargestIntegerUsedTy; + + // If the type has not been selected yet OR if the type selected is a + // non-promotable array of integers, try to select a legal integer type of the + // same size as the alloca. + if ((!PartitionTy || (PartitionTy->isArrayTy() && !IsVectorPromotable)) && + DL.isLegalInteger(P.size() * 8)) + PartitionTy = Type::getIntNTy(*C, P.size() * 8); + + // Finally, if nothing worked, fall back to an i8 array of the appropriate + // size. + if (!PartitionTy) + PartitionTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); + assert(DL.getTypeAllocSize(PartitionTy).getFixedValue() >= P.size()); + + bool IsIntegerPromotable = + IsVectorPromotable ? false : isIntegerWideningViable(P, PartitionTy, DL); // Check for the case where we're going to rewrite to a new alloca of the // exact same type as the original, and with the same access offsets. In that @@ -5274,7 +5274,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // P.beginOffset() can be non-zero even with the same type in a case with // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll). AllocaInst *NewAI; - if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) { + if (PartitionTy == AI.getAllocatedType() && P.beginOffset() == 0) { NewAI = &AI; // FIXME: We should be able to bail at this point with "nothing changed". // FIXME: We might want to defer PHI speculation until after here. @@ -5284,10 +5284,10 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset()); // If we will get at least this much alignment from the type alone, leave // the alloca's alignment unconstrained. - const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy); + const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(PartitionTy); NewAI = new AllocaInst( - SliceTy, AI.getAddressSpace(), nullptr, - IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment, + PartitionTy, AI.getAddressSpace(), nullptr, + IsUnconstrained ? DL.getPrefTypeAlign(PartitionTy) : Alignment, AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), AI.getIterator()); // Copy the old AI debug location over to the new one. diff --git a/llvm/test/Transforms/SROA/prefer-integer-partition.ll b/llvm/test/Transforms/SROA/prefer-integer-partition.ll new file mode 100644 index 0000000000000..e4c7633a2aa2e --- /dev/null +++ b/llvm/test/Transforms/SROA/prefer-integer-partition.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +; Test that SROA converts array types to integer types for promotion. + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32-ni:10:11:12:13" + +define void @test_float_array_only_intrinsics() { +; CHECK-LABEL: @test_float_array_only_intrinsics( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %src = alloca [2 x float], align 4 + %dst = alloca [2 x float], align 4 + + call void @llvm.lifetime.start.p0(i64 8, ptr %src) + call void @llvm.lifetime.start.p0(i64 8, ptr %dst) + + ; Only intrinsic uses - no scalar loads/stores to establish common type + call void @llvm.memset.p0.i64(ptr %src, i8 42, i64 8, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 8, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr %src, ptr %dst, i64 8, i1 false) + + call void @llvm.lifetime.end.p0(i64 8, ptr %dst) + call void @llvm.lifetime.end.p0(i64 8, ptr %src) + ret void +} + +define void @test_mixed_types() { +; CHECK-LABEL: @test_mixed_types( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 42 to float +; CHECK-NEXT: ret void +; +entry: + %alloca = alloca [2 x half] + store i32 42, ptr %alloca + %val = load float, ptr %alloca + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) \ No newline at end of file