Skip to content

Commit

Permalink
[SROA]: Only defer trying partial sized ptr or ptr vector types
Browse files Browse the repository at this point in the history
Change-Id: Ic77f87290905addadd5819dff2d0c62f031022ab
(cherry picked from commit 1e828f8)
  • Loading branch information
jrbyrnes authored and llvmbot committed Mar 23, 2024
1 parent 0e16af8 commit 368dc85
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 27 deletions.
80 changes: 53 additions & 27 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Expand Up @@ -2257,6 +2257,41 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
return nullptr;
}

static VectorType *createAndCheckVectorTypesForPromotion(
SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
function_ref<void(Type *)> CheckCandidateType, Partition &P,
const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
[[maybe_unused]] VectorType *OriginalElt =
CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
// Consider additional vector types where the element type size is a
// multiple of load/store element size.
for (Type *Ty : OtherTys) {
if (!VectorType::isValidElementType(Ty))
continue;
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
// Make a copy of CandidateTys and iterate through it, because we
// might append to CandidateTys in the loop.
for (VectorType *const VTy : CandidateTysCopy) {
// The elements in the copy should remain invariant throughout the loop
assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
unsigned ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
if (TypeSize != VectorSize && TypeSize != ElementSize &&
VectorSize % TypeSize == 0) {
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
CheckCandidateType(NewVTy);
}
}
}

return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
CommonEltTy, HaveVecPtrTy,
HaveCommonVecPtrTy, CommonVecPtrTy);
}

/// Test whether the given alloca partitioning and range of slices can be
/// promoted to a vector.
///
Expand All @@ -2271,6 +2306,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
SetVector<Type *> LoadStoreTys;
SetVector<Type *> DeferredTys;
Type *CommonEltTy = nullptr;
VectorType *CommonVecPtrTy = nullptr;
bool HaveVecPtrTy = false;
Expand Down Expand Up @@ -2314,42 +2350,32 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
Ty = SI->getValueOperand()->getType();
else
continue;

auto CandTy = Ty->getScalarType();
if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
S.endOffset() != P.endOffset())) {
DeferredTys.insert(Ty);
continue;
}

LoadStoreTys.insert(Ty);
// Consider any loads or stores that are the exact size of the slice.
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
CheckCandidateType(Ty);
}

if (auto *VTy = checkVectorTypesForPromotion(
P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
if (auto *VTy = createAndCheckVectorTypesForPromotion(
LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
HaveCommonVecPtrTy, CommonVecPtrTy))
return VTy;

// Consider additional vector types where the element type size is a
// multiple of load/store element size.
for (Type *Ty : LoadStoreTys) {
if (!VectorType::isValidElementType(Ty))
continue;
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
// Make a copy of CandidateTys and iterate through it, because we might
// append to CandidateTys in the loop.
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
CandidateTys.clear();
for (VectorType *&VTy : CandidateTysCopy) {
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
unsigned ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
if (TypeSize != VectorSize && TypeSize != ElementSize &&
VectorSize % TypeSize == 0) {
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
CheckCandidateType(NewVTy);
}
}
}

return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
CommonEltTy, HaveVecPtrTy,
HaveCommonVecPtrTy, CommonVecPtrTy);
CandidateTys.clear();
return createAndCheckVectorTypesForPromotion(
DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
CommonVecPtrTy);
}

/// Test whether a slice of an alloca is valid for integer widening.
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/Transforms/SROA/vector-promotion.ll
Expand Up @@ -1388,6 +1388,68 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) {
ret <4 x ptr> %sroaval
}

define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) {
; CHECK-LABEL: @validLoadStoreTy(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0
; CHECK-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0
; CHECK-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1
; CHECK-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP0]]
;
; DEBUG-LABEL: @validLoadStoreTy(
; DEBUG-NEXT: entry:
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]]
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]]
; DEBUG-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]]
; DEBUG-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]]
; DEBUG-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]]
; DEBUG-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]]
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]]
; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]]
; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]]
; DEBUG-NEXT: ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]]
;
entry:
%cond = alloca <4 x i32>, align 8
%coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8
%m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
%0 = load <4 x i32>, ptr %m5, align 8
ret <4 x i32> %0
}

; The following test should not crash the compiler
; (calls to CheckCandidateType from createAndCheckVectorTypesForPromotion may change the memory to hold CandidateTys.data())
define noundef zeroext i1 @CandidateTysRealloc() personality ptr null {
entry:
%alloca = alloca <4x i32>, align 16
store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %alloca, align 16
br label %bb.1

bb.1:
br label %bb.1

bb.2:
%Load0 = load <4 x i32>, ptr %alloca, align 16
store <4 x i32> zeroinitializer, ptr %alloca, align 16
%Load1 = load <4 x i32>, ptr %alloca, align 16
br label %bb.3

bb.3:
br label %bb.3

bb.4:
%Load2 = load i64, ptr %alloca, align 16
%Load3 = load <4 x i32>, ptr %alloca, align 16
store <4 x i32> zeroinitializer, ptr %alloca, align 16
br label %bb.5

bb.5:
br label %bb.5
}

declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
declare void @llvm.lifetime.end.p0(i64, ptr)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
Expand Down

0 comments on commit 368dc85

Please sign in to comment.