diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index cd81fb702f4d4..a575dc74d8db5 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2257,6 +2257,41 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, return nullptr; } +static VectorType *createAndCheckVectorTypesForPromotion( + SetVector &OtherTys, ArrayRef CandidateTysCopy, + function_ref CheckCandidateType, Partition &P, + const DataLayout &DL, SmallVectorImpl &CandidateTys, + bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, + bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) { + [[maybe_unused]] VectorType *OriginalElt = + CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr; + // Consider additional vector types where the element type size is a + // multiple of load/store element size. + for (Type *Ty : OtherTys) { + if (!VectorType::isValidElementType(Ty)) + continue; + unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue(); + // Make a copy of CandidateTys and iterate through it, because we + // might append to CandidateTys in the loop. + for (VectorType *const VTy : CandidateTysCopy) { + // The elements in the copy should remain invariant throughout the loop + assert(CandidateTysCopy[0] == OriginalElt && "Different Element"); + unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue(); + unsigned ElementSize = + DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); + if (TypeSize != VectorSize && TypeSize != ElementSize && + VectorSize % TypeSize == 0) { + VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false); + CheckCandidateType(NewVTy); + } + } + } + + return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy, + CommonEltTy, HaveVecPtrTy, + HaveCommonVecPtrTy, CommonVecPtrTy); +} + /// Test whether the given alloca partitioning and range of slices can be /// promoted to a vector. /// @@ -2271,6 +2306,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // we have different element types. SmallVector CandidateTys; SetVector LoadStoreTys; + SetVector DeferredTys; Type *CommonEltTy = nullptr; VectorType *CommonVecPtrTy = nullptr; bool HaveVecPtrTy = false; @@ -2314,42 +2350,32 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { Ty = SI->getValueOperand()->getType(); else continue; + + auto CandTy = Ty->getScalarType(); + if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() || + S.endOffset() != P.endOffset())) { + DeferredTys.insert(Ty); + continue; + } + LoadStoreTys.insert(Ty); // Consider any loads or stores that are the exact size of the slice. if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset()) CheckCandidateType(Ty); } - if (auto *VTy = checkVectorTypesForPromotion( - P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, + SmallVector CandidateTysCopy = CandidateTys; + if (auto *VTy = createAndCheckVectorTypesForPromotion( + LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL, + CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy, CommonVecPtrTy)) return VTy; - // Consider additional vector types where the element type size is a - // multiple of load/store element size. - for (Type *Ty : LoadStoreTys) { - if (!VectorType::isValidElementType(Ty)) - continue; - unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue(); - // Make a copy of CandidateTys and iterate through it, because we might - // append to CandidateTys in the loop. - SmallVector CandidateTysCopy = CandidateTys; - CandidateTys.clear(); - for (VectorType *&VTy : CandidateTysCopy) { - unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue(); - unsigned ElementSize = - DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); - if (TypeSize != VectorSize && TypeSize != ElementSize && - VectorSize % TypeSize == 0) { - VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false); - CheckCandidateType(NewVTy); - } - } - } - - return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy, - CommonEltTy, HaveVecPtrTy, - HaveCommonVecPtrTy, CommonVecPtrTy); + CandidateTys.clear(); + return createAndCheckVectorTypesForPromotion( + DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys, + HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy, + CommonVecPtrTy); } /// Test whether a slice of an alloca is valid for integer widening. diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index 1691f7733acea..9170067122588 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -1392,6 +1392,68 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) { ret <4 x ptr> %sroaval } +define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) { +; CHECK-LABEL: @validLoadStoreTy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0 +; CHECK-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0 +; CHECK-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1 +; CHECK-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP0]] +; +; DEBUG-LABEL: @validLoadStoreTy( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]] +; DEBUG-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]] +; DEBUG-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]] +; DEBUG-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]] +; DEBUG-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]] +; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]] +; DEBUG-NEXT: ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]] +; +entry: + %cond = alloca <4 x i32>, align 8 + %coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0 + store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8 + %m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0 + %0 = load <4 x i32>, ptr %m5, align 8 + ret <4 x i32> %0 +} + +; The following test should not crash the compiler +; (calls to CheckCandidateType from createAndCheckVectorTypesForPromotion may change the memory to hold CandidateTys.data()) +define noundef zeroext i1 @CandidateTysRealloc() personality ptr null { +entry: + %alloca = alloca <4x i32>, align 16 + store <4 x i32> , ptr %alloca, align 16 + br label %bb.1 + +bb.1: + br label %bb.1 + +bb.2: + %Load0 = load <4 x i32>, ptr %alloca, align 16 + store <4 x i32> zeroinitializer, ptr %alloca, align 16 + %Load1 = load <4 x i32>, ptr %alloca, align 16 + br label %bb.3 + +bb.3: + br label %bb.3 + +bb.4: + %Load2 = load i64, ptr %alloca, align 16 + %Load3 = load <4 x i32>, ptr %alloca, align 16 + store <4 x i32> zeroinitializer, ptr %alloca, align 16 + br label %bb.5 + +bb.5: + br label %bb.5 +} + declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) declare void @llvm.lifetime.end.p0(i64, ptr) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: