llvm · YonahGoldberg · Nov 12, 2025 · Nov 12, 2025 · Nov 13, 2025 · Nov 13, 2025
diff --git a/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem.c
@@ -56,8 +56,8 @@ float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A]] to double
 // CHECK-NEXT:    [[TMP1:%.*]] = fmul double [[TMP0]], [[B]]
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast double [[TMP1]] to <1 x double>
-// CHECK-NEXT:    ret <1 x double> [[TMP2]]
+// CHECK-NEXT:    [[REF_TMP_I_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double [[TMP1]], i32 0
+// CHECK-NEXT:    ret <1 x double> [[REF_TMP_I_0_VEC_INSERT]]
 //
 float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
   return vmul_n_f64(a, b);
@@ -552,12 +552,12 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
 // CHECK-LABEL: define dso_local <1 x double> @test_vmulx_lane_f64_0(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
-// CHECK-NEXT:    [[VGET_LANE9:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK-NEXT:    [[__PROMOTE_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FD6304BC43AB5C2, i32 0
+// CHECK-NEXT:    [[__PROMOTE2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FEE211E215AEEF3, i32 0
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], i32 0
+// CHECK-NEXT:    [[VGET_LANE9:%.*]] = extractelement <1 x double> [[__PROMOTE2_SROA_0_0_VEC_INSERT]], i32 0
 // CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE9]])
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], double [[VMULXD_F64_I]], i32 0
 // CHECK-NEXT:    ret <1 x double> [[VSET_LANE]]
 //
 float64x1_t test_vmulx_lane_f64_0() {
@@ -574,13 +574,13 @@ float64x1_t test_vmulx_lane_f64_0() {
 // CHECK-LABEL: define dso_local <1 x double> @test_vmulx_laneq_f64_2(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
-// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[__PROMOTE_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FD6304BC43AB5C2, i32 0
+// CHECK-NEXT:    [[__PROMOTE2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FEE211E215AEEF3, i32 0
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], <1 x double> [[__PROMOTE2_SROA_0_0_VEC_INSERT]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], i32 0
 // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
 // CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], double [[VMULXD_F64_I]], i32 0
 // CHECK-NEXT:    ret <1 x double> [[VSET_LANE]]
 //
 float64x1_t test_vmulx_laneq_f64_2() {

diff --git a/clang/test/CodeGen/arm-bf16-convert-intrinsics.c b/clang/test/CodeGen/arm-bf16-convert-intrinsics.c
@@ -196,35 +196,33 @@ bfloat16x4_t test_vcvt_bf16_f32(float32x4_t a) {
 //
 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_low_bf16_f32(
 // CHECK-A32-HARDFP-NEXT:  entry:
-// CHECK-A32-HARDFP-NEXT:    [[TMP0:%.*]] = bitcast i64 0 to <4 x bfloat>
-// CHECK-A32-HARDFP-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
-// CHECK-A32-HARDFP-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-A32-HARDFP-NEXT:    [[VCVTFP2BF_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-A32-HARDFP-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-A32-HARDFP-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-HARDFP-NEXT:    [[VCVTFP2BF_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
 // CHECK-A32-HARDFP-NEXT:    [[VCVTFP2BF1_I:%.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> [[VCVTFP2BF_I]])
-// CHECK-A32-HARDFP-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[TMP0]], <4 x bfloat> [[VCVTFP2BF1_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-A32-HARDFP-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> zeroinitializer, <4 x bfloat> [[VCVTFP2BF1_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-A32-HARDFP-NEXT:    ret <8 x bfloat> [[SHUFFLE_I]]
 //
 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_low_bf16_f32(
 // CHECK-A32-SOFTFP-NEXT:  entry:
-// CHECK-A32-SOFTFP-NEXT:    [[TMP0:%.*]] = bitcast i64 0 to <4 x bfloat>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-A32-SOFTFP-NEXT:    [[VCVTFP2BF_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-SOFTFP-NEXT:    [[VCVTFP2BF_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
 // CHECK-A32-SOFTFP-NEXT:    [[VCVTFP2BF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> [[VCVTFP2BF_I]])
-// CHECK-A32-SOFTFP-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP4:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <2 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <4 x bfloat>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP0]] to <2 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP7:%.*]] = bitcast <4 x bfloat> [[TMP5]] to <2 x i32>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[TMP2]] to <2 x i32>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x bfloat>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP5:%.*]] = bitcast <4 x bfloat> zeroinitializer to <2 x i32>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP4]] to <2 x i32>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP5]] to <4 x bfloat>
 // CHECK-A32-SOFTFP-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP6]] to <4 x bfloat>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP7]] to <4 x bfloat>
-// CHECK-A32-SOFTFP-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[TMP8]], <4 x bfloat> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP10:%.*]] = bitcast <8 x bfloat> [[SHUFFLE_I]] to <4 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <8 x bfloat>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP12:%.*]] = bitcast <8 x bfloat> [[TMP11]] to <4 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[TMP12]] to <8 x bfloat>
-// CHECK-A32-SOFTFP-NEXT:    [[TMP14:%.*]] = bitcast <8 x bfloat> [[TMP13]] to <4 x i32>
-// CHECK-A32-SOFTFP-NEXT:    ret <4 x i32> [[TMP14]]
+// CHECK-A32-SOFTFP-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[TMP7]], <4 x bfloat> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP9:%.*]] = bitcast <8 x bfloat> [[SHUFFLE_I]] to <4 x i32>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <8 x bfloat>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP11:%.*]] = bitcast <8 x bfloat> [[TMP10]] to <4 x i32>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <8 x bfloat>
+// CHECK-A32-SOFTFP-NEXT:    [[TMP13:%.*]] = bitcast <8 x bfloat> [[TMP12]] to <4 x i32>
+// CHECK-A32-SOFTFP-NEXT:    ret <4 x i32> [[TMP13]]
 //
 bfloat16x8_t test_vcvtq_low_bf16_f32(float32x4_t a) {
   return vcvtq_low_bf16_f32(a);

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1482,19 +1482,23 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
 
 /// Walk the range of a partitioning looking for a common type to cover this
 /// sequence of slices.
-static std::pair<Type *, IntegerType *>
+/// Returns: {CommonType, LargestIntegerType, OnlyIntrinsicUsers}
+static std::tuple<Type *, IntegerType *, bool>
 findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
                uint64_t EndOffset) {
   Type *Ty = nullptr;
   bool TyIsCommon = true;
   IntegerType *ITy = nullptr;
+  bool OnlyIntrinsicUsers = true;
 
   // Note that we need to look at *every* alloca slice's Use to ensure we
   // always get consistent results regardless of the order of slices.
   for (AllocaSlices::const_iterator I = B; I != E; ++I) {
     Use *U = I->getUse();
     if (isa<IntrinsicInst>(*U->getUser()))
       continue;
+    // We found a non-intrinsic user
+    OnlyIntrinsicUsers = false;
     if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
       continue;
 
@@ -1528,7 +1532,7 @@ findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
       Ty = UserTy;
   }
 
-  return {TyIsCommon ? Ty : nullptr, ITy};
+  return {TyIsCommon ? Ty : nullptr, ITy, OnlyIntrinsicUsers};
 }
 
 /// PHI instructions that use an alloca and are subsequently loaded can be
@@ -5209,63 +5213,92 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
 /// promoted.
 AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
                                    Partition &P) {
+  const DataLayout &DL = AI.getDataLayout();
   // Try to compute a friendly type for this partition of the alloca. This
   // won't always succeed, in which case we fall back to a legal integer type
   // or an i8 array of an appropriate size.
-  Type *SliceTy = nullptr;
-  VectorType *SliceVecTy = nullptr;
-  const DataLayout &DL = AI.getDataLayout();
-  unsigned VScale = AI.getFunction()->getVScaleValue();
-
-  std::pair<Type *, IntegerType *> CommonUseTy =
-      findCommonType(P.begin(), P.end(), P.endOffset());
-  // Do all uses operate on the same type?
-  if (CommonUseTy.first) {
-    TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);
-    if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) {
-      SliceTy = CommonUseTy.first;
-      SliceVecTy = dyn_cast<VectorType>(SliceTy);
-    }
-  }
-  // If not, can we find an appropriate subtype in the original allocated type?
-  if (!SliceTy)
-    if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
-                                                 P.beginOffset(), P.size()))
-      SliceTy = TypePartitionTy;
-
-  // If still not, can we use the largest bitwidth integer type used?
-  if (!SliceTy && CommonUseTy.second)
-    if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size()) {
-      SliceTy = CommonUseTy.second;
-      SliceVecTy = dyn_cast<VectorType>(SliceTy);
+  auto SelectPartitionTy = [&]() -> std::tuple<Type *, bool, VectorType *> {
+    // First check if the partition is viable for vetor promotion.
+    //
+    // We prefer vector promotion over integer widening promotion when:
+    // - The vector element type is a floating-point type.
+    // - All the loads/stores to the alloca are vector loads/stores to the
+    // entire alloca.
+    //
+    // Otherwise when there is a integer vector with mixed
+    // loads/stores we prefer integer widening promotion because it's more
+    // likely the user is doing bitwise arithmetic and we generate better code.
+    VectorType *VecTy =
+        isVectorPromotionViable(P, DL, AI.getFunction()->getVScaleValue());
+    // If the vector element type is a floating-point type, we prefer vector
+    // promotion.
+    if (VecTy && VecTy->getElementType()->isFloatingPointTy())
+      return {VecTy, false, VecTy};
+
+    // Check if there is a common type that all slices of the partition use that
+    // spans the partition.
+    auto [CommonUseTy, LargestIntTy, OnlyIntrinsicUsers] =
+        findCommonType(P.begin(), P.end(), P.endOffset());
+    if (CommonUseTy) {
+      TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy);
+      if (CommonUseSize.isFixed() &&
+          CommonUseSize.getFixedValue() >= P.size()) {
+        // We prefer vector promotion here because if vector promotion is viable
+        // and there is a common type used, then it implies the second listed
+        // condition for prefering vector promotion is true.
+        if (VecTy)
+          return {VecTy, false, VecTy};
+        return {CommonUseTy, isIntegerWideningViable(P, CommonUseTy, DL),
+                nullptr};
+      }
     }
-  if ((!SliceTy || (SliceTy->isArrayTy() &&
-                    SliceTy->getArrayElementType()->isIntegerTy())) &&
-      DL.isLegalInteger(P.size() * 8)) {
-    SliceTy = Type::getIntNTy(*C, P.size() * 8);
-  }
 
-  // If the common use types are not viable for promotion then attempt to find
-  // another type that is viable.
-  if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL, VScale))
+    // If there are only intrinsic users, try to represent as a legal integer
+    // type because we are probably just copying data around and the integer can
+    // be promoted.
+    if (OnlyIntrinsicUsers && DL.isLegalInteger(P.size() * 8))
+      return {Type::getIntNTy(*C, P.size() * 8), false, nullptr};
+
+    // Can we find an appropriate subtype in the original allocated
+    // type?
     if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
                                                  P.beginOffset(), P.size())) {
-      VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
-      if (TypePartitionVecTy &&
-          checkVectorTypeForPromotion(P, TypePartitionVecTy, DL, VScale))
-        SliceTy = TypePartitionTy;
+      // If the partition is an integer array that can be spanned by a legal
+      // integer type, prefer to represent it as a legal integer type because
+      // it's more likely to be promotable.
+      if (TypePartitionTy->isArrayTy() &&
+          TypePartitionTy->getArrayElementType()->isIntegerTy() &&
+          DL.isLegalInteger(P.size() * 8))
+        TypePartitionTy = Type::getIntNTy(*C, P.size() * 8);
+      // There was no common type used, so we prefer integer widening promotion.
+      if (isIntegerWideningViable(P, TypePartitionTy, DL))
+        return {TypePartitionTy, true, nullptr};
+      if (VecTy)
+        return {VecTy, false, VecTy};
+      // If we couldn't promotion with TypePartitionTy, try with the largest
+      // integer type used.
+      if (LargestIntTy &&
+          DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size() &&
+          isIntegerWideningViable(P, LargestIntTy, DL))
+        return {LargestIntTy, true, nullptr};
+      // Fallback to TypePartitionTy and we probably won't promote.
+      return {TypePartitionTy, false, nullptr};
     }
 
-  if (!SliceTy)
-    SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
-  assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());
+    // Select the largest integer type used if it spans the partition.
+    if (LargestIntTy &&
+        DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size())
+      return {LargestIntTy, false, nullptr};
 
-  bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
+    // Select a legal integer type if it spans the partition.
+    if (DL.isLegalInteger(P.size() * 8))
+      return {Type::getIntNTy(*C, P.size() * 8), false, nullptr};
+
+    // Fallback to an i8 array.
+    return {ArrayType::get(Type::getInt8Ty(*C), P.size()), false, nullptr};
+  };
 
-  VectorType *VecTy =
-      IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL, VScale);
-  if (VecTy)
-    SliceTy = VecTy;
+  auto [PartitionTy, IsIntegerPromotable, VecTy] = SelectPartitionTy();
 
   // Check for the case where we're going to rewrite to a new alloca of the
   // exact same type as the original, and with the same access offsets. In that
@@ -5274,7 +5307,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   // P.beginOffset() can be non-zero even with the same type in a case with
   // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
   AllocaInst *NewAI;
-  if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
+  if (PartitionTy == AI.getAllocatedType() && P.beginOffset() == 0) {
     NewAI = &AI;
     // FIXME: We should be able to bail at this point with "nothing changed".
     // FIXME: We might want to defer PHI speculation until after here.
@@ -5284,10 +5317,10 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
     const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
     // If we will get at least this much alignment from the type alone, leave
     // the alloca's alignment unconstrained.
-    const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
+    const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(PartitionTy);
     NewAI = new AllocaInst(
-        SliceTy, AI.getAddressSpace(), nullptr,
-        IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
+        PartitionTy, AI.getAddressSpace(), nullptr,
+        IsUnconstrained ? DL.getPrefTypeAlign(PartitionTy) : Alignment,
         AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
         AI.getIterator());
     // Copy the old AI debug location over to the new one.