diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 6adf99531e30e..d2c4c7ee50bc8 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -5292,8 +5292,8 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
     } else {
       // Otherwise this is array-like indexing. The local offset is the index
       // multiplied by the element size.
-      auto *ElementSize = llvm::ConstantInt::get(
-          IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType()));
+      auto *ElementSize =
+          llvm::ConstantInt::get(IntPtrTy, GTI.getSequentialElementStride(DL));
       auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true);
       LocalOffset = eval(BO_Mul, ElementSize, IndexS);
     }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7ad3ce512a355..2be7256423e42 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1048,7 +1048,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
         if (TargetType->isScalableTy())
           return TTI::TCC_Basic;
         int64_t ElementSize =
-            DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
+            GTI.getSequentialElementStride(DL).getFixedValue();
         if (ConstIdx) {
           BaseOffset +=
               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
index f3272327c3f8b..1092b636e023a 100644
--- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/User.h"
@@ -30,7 +31,39 @@ template <typename ItTy = User::const_op_iterator>
 class generic_gep_type_iterator {
 
   ItTy OpIt;
-  PointerUnion<StructType *, Type *> CurTy;
+  // We use two different mechanisms to store the type a GEP index applies to.
+  // In some cases, we need to know the outer aggregate type the index is
+  // applied within, e.g. a struct. In such cases, we store the aggregate type
+  // in the iterator, and derive the element type on the fly.
+  //
+  // However, this is not always possible, because for the outermost index there
+  // is no containing type. In such cases, or if the containing type is not
+  // relevant, e.g. for arrays, the element type is stored as Type* in CurTy.
+  //
+  // If CurTy contains a Type* value, this does not imply anything about the
+  // type itself, because it is the element type and not the outer type.
+  // In particular, Type* can be a struct type.
+  //
+  // Consider this example:
+  //
+  //    %my.struct = type { i32, [ 4 x float ] }
+  //    [...]
+  //    %gep = getelementptr %my.struct, ptr %ptr, i32 10, i32 1, 32 3
+  //
+  // Iterating over the indices of this GEP, CurTy will contain the following
+  // values:
+  //    * i32 10: The outer index always operates on the GEP value type.
+  //              CurTy contains a Type*       pointing at `%my.struct`.
+  //    * i32 1:  This index is within a struct.
+  //              CurTy contains a StructType* pointing at `%my.struct`.
+  //    * i32 3:  This index is within an array. We reuse the "flat" indexing
+  //              for arrays which is also used in the top level GEP index.
+  //              CurTy contains a Type*       pointing at `float`.
+  //
+  // Vectors are handled separately because the layout of vectors is different
+  // for overaligned elements: Vectors are always bit-packed, whereas arrays
+  // respect ABI alignment of the elements.
+  PointerUnion<StructType *, VectorType *, Type *> CurTy;
 
   generic_gep_type_iterator() = default;
 
@@ -69,6 +102,8 @@ class generic_gep_type_iterator {
   Type *getIndexedType() const {
     if (auto *T = dyn_cast_if_present<Type *>(CurTy))
       return T;
+    if (auto *VT = dyn_cast_if_present<VectorType *>(CurTy))
+      return VT->getElementType();
     return cast<StructType *>(CurTy)->getTypeAtIndex(getOperand());
   }
 
@@ -79,7 +114,7 @@ class generic_gep_type_iterator {
     if (auto *ATy = dyn_cast<ArrayType>(Ty))
       CurTy = ATy->getElementType();
     else if (auto *VTy = dyn_cast<VectorType>(Ty))
-      CurTy = VTy->getElementType();
+      CurTy = VTy;
     else
       CurTy = dyn_cast<StructType>(Ty);
     ++OpIt;
@@ -108,7 +143,23 @@ class generic_gep_type_iterator {
   // that.
 
   bool isStruct() const { return isa<StructType *>(CurTy); }
-  bool isSequential() const { return isa<Type *>(CurTy); }
+  bool isVector() const { return isa<VectorType *>(CurTy); }
+  bool isSequential() const { return !isStruct(); }
+
+  // For sequential GEP indices (all except those into structs), the index value
+  // can be translated into a byte offset by multiplying with an element stride.
+  // This function returns this stride, which both depends on the element type,
+  // and the containing aggregate type, as vectors always tightly bit-pack their
+  // elements.
+  TypeSize getSequentialElementStride(const DataLayout &DL) const {
+    assert(isSequential());
+    Type *ElemTy = getIndexedType();
+    if (isVector()) {
+      assert(DL.typeSizeEqualsStoreSize(ElemTy) && "Not byte-addressable");
+      return DL.getTypeStoreSize(ElemTy);
+    }
+    return DL.getTypeAllocSize(ElemTy);
+  }
 
   StructType *getStructType() const { return cast<StructType *>(CurTy); }
 
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 97f60d28e4991..9eb7e914687ce 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -639,7 +639,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
           continue;
 
         // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
+        TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
         if (AllocTypeSize.isScalable()) {
           Decomposed.Base = V;
           return Decomposed;
@@ -650,7 +650,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
-      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
+      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
       if (AllocTypeSize.isScalable()) {
         Decomposed.Base = V;
         return Decomposed;
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 7096e06d925ad..1fa7badaa4fa0 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1429,7 +1429,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
       continue;
     }
 
-    APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
+    APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
   }
   return true;
diff --git a/llvm/lib/Analysis/Local.cpp b/llvm/lib/Analysis/Local.cpp
index 30757abeb0980..f5e080d2c78e6 100644
--- a/llvm/lib/Analysis/Local.cpp
+++ b/llvm/lib/Analysis/Local.cpp
@@ -64,7 +64,7 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
     // Convert to correct type.
     if (Op->getType() != IntIdxTy)
       Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
-    TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
+    TypeSize TSize = GTI.getSequentialElementStride(DL);
     if (TSize != TypeSize::getFixed(1)) {
       Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize);
       if (IntIdxTy->isVectorTy())
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 89666018d9251..aed60cc5a3f5e 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2703,7 +2703,10 @@ static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
 
     // If it's a type with the same allocation size as the result of the GEP we
     // can peel off the zero index.
-    if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
+    TypeSize ElemSize = GEPTI.isStruct()
+                            ? DL.getTypeAllocSize(GEPTI.getIndexedType())
+                            : GEPTI.getSequentialElementStride(DL);
+    if (ElemSize != GEPAllocSize)
       break;
     --LastOperand;
   }
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 16d78c1ded6d7..439127e5c9540 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1196,7 +1196,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
       unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
       KnownBits IndexBits(IndexBitWidth);
       computeKnownBits(Index, IndexBits, Depth + 1, Q);
-      TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
+      TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
       uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
       KnownBits ScalingFactor(IndexBitWidth);
       // Multiply by current sizeof type.
@@ -2128,7 +2128,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
     }
 
     // If we have a zero-sized type, the index doesn't matter. Keep looping.
-    if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero())
+    if (GTI.getSequentialElementStride(Q.DL).isZero())
       continue;
 
     // Fast path the constant operand case both for efficiency and so we don't
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6e99fb133e26a..5bd4c6b067d79 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4776,7 +4776,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
             cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
         ConstantOffset += SL->getElementOffset(Idx);
       } else {
-        TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
+        TypeSize TS = GTI.getSequentialElementStride(DL);
         if (TS.isNonZero()) {
           // The optimisations below currently only work for fixed offsets.
           if (TS.isScalable())
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index bea29642cd003..9c11113902a24 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1545,7 +1545,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
       Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
       continue;
     } else {
-      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+      uint64_t ElementSize = GTI.getSequentialElementStride(*DL);
 
       // If this is a scalar constant or a splat vector of constants,
       // handle it quickly.
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index f3d8edb8926b6..09e260394262b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -560,15 +560,13 @@ bool FastISel::selectGetElementPtr(const User *I) {
         }
       }
     } else {
-      Type *Ty = GTI.getIndexedType();
-
       // If this is a constant subscript, handle it quickly.
       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero())
           continue;
         // N = N + Offset
         uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
-        TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
+        TotalOffs += GTI.getSequentialElementStride(DL) * IdxN;
         if (TotalOffs >= MaxOffs) {
           N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
           if (!N) // Unhandled operand. Halt "fast" selection and bail.
@@ -585,7 +583,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
       }
 
       // N = N + Idx * ElementSize;
-      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      uint64_t ElementSize = GTI.getSequentialElementStride(DL);
       Register IdxN = getRegForGEPIndex(Idx);
       if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
         return false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 192f7bc8d2aa1..78ebd2d33459a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4114,7 +4114,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
       MVT IdxTy = MVT::getIntegerVT(IdxSize);
       TypeSize ElementSize =
-          DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
+          GTI.getSequentialElementStride(DAG.getDataLayout());
       // We intentionally mask away the high bits here; ElementSize may not
       // fit in IdxTy.
       APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
diff --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 770fc93490835..ae978070ac9f9 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1074,7 +1074,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
         assert(BitWidth == 64 && "Invalid index type for getelementptr");
         Idx = (int64_t)IdxGV.IntVal.getZExtValue();
       }
-      Total += getDataLayout().getTypeAllocSize(I.getIndexedType()) * Idx;
+      Total += I.getSequentialElementStride(getDataLayout()) * Idx;
     }
   }
 
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index e28f043cf9e0d..a2f5714c70687 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -936,9 +936,8 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
       // Add in the offset, as calculated by the structure layout info...
       Result += Layout->getElementOffset(FieldNo);
     } else {
-      // Get the array index and the size of each array element.
-      if (int64_t arrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
-        Result += arrayIdx * getTypeAllocSize(GTI.getIndexedType());
+      if (int64_t ArrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
+        Result += ArrayIdx * GTI.getSequentialElementStride(*this);
     }
   }
 
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index cd982c7da102a..16a89534b4b3e 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -87,7 +87,7 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
       /// If the index isn't known, we take 1 because it is the index that will
       /// give the worse alignment of the offset.
       const uint64_t ElemCount = OpC ? OpC->getZExtValue() : 1;
-      Offset = DL.getTypeAllocSize(GTI.getIndexedType()) * ElemCount;
+      Offset = GTI.getSequentialElementStride(DL) * ElemCount;
     }
     Result = Align(MinAlign(Offset, Result.value()));
   }
@@ -157,7 +157,7 @@ bool GEPOperator::accumulateConstantOffset(
         continue;
       }
       if (!AccumulateOffset(ConstOffset->getValue(),
-                            DL.getTypeAllocSize(GTI.getIndexedType())))
+                            GTI.getSequentialElementStride(DL)))
         return false;
       continue;
     }
@@ -170,8 +170,7 @@ bool GEPOperator::accumulateConstantOffset(
     if (!ExternalAnalysis(*V, AnalysisIndex))
       return false;
     UsedExternalAnalysis = true;
-    if (!AccumulateOffset(AnalysisIndex,
-                          DL.getTypeAllocSize(GTI.getIndexedType())))
+    if (!AccumulateOffset(AnalysisIndex, GTI.getSequentialElementStride(DL)))
       return false;
   }
   return true;
@@ -218,14 +217,13 @@ bool GEPOperator::collectOffset(
         continue;
       }
       CollectConstantOffset(ConstOffset->getValue(),
-                            DL.getTypeAllocSize(GTI.getIndexedType()));
+                            GTI.getSequentialElementStride(DL));
       continue;
     }
 
     if (STy || ScalableType)
       return false;
-    APInt IndexedSize =
-        APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
+    APInt IndexedSize = APInt(BitWidth, GTI.getSequentialElementStride(DL));
     // Insert an initial offset of 0 for V iff none exists already, then
     // increment the offset by IndexedSize.
     if (!IndexedSize.isZero()) {
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index b6e25c46b514d..94b0ae7435c94 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -1015,7 +1015,7 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
 
     // Otherwise, we have a sequential type like an array or fixed-length
     // vector. Multiply the index by the ElementSize.
-    TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType());
+    TypeSize Size = GTI.getSequentialElementStride(DL);
     if (Size.isScalable())
       return std::nullopt;
     Offset += Size.getFixedValue() * OpC->getSExtValue();
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 1ea63a5d6ec08..e98f6c4984a75 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -645,7 +645,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
       } else {
-        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+        uint64_t S = GTI.getSequentialElementStride(DL);
         while (true) {
           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
             // Constant-offset addressing.
@@ -4978,15 +4978,13 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
       if (Field)
         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
     } else {
-      Type *Ty = GTI.getIndexedType();
-
       // If this is a constant subscript, handle it quickly.
       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero())
           continue;
         // N = N + Offset
-        TotalOffs +=
-            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+        TotalOffs += GTI.getSequentialElementStride(DL) *
+                     cast<ConstantInt>(CI)->getSExtValue();
         continue;
       }
       if (TotalOffs) {
@@ -4997,7 +4995,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
       }
 
       // N = N + Idx * ElementSize;
-      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      uint64_t ElementSize = GTI.getSequentialElementStride(DL);
       unsigned IdxN = getRegForGEPIndex(Idx);
       if (!IdxN)
         return false;
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 1d6aaeb7433b0..cb3a709f7003b 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -747,7 +747,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
           TmpOffset += SL->getElementOffset(Idx);
         } else {
-          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+          uint64_t S = GTI.getSequentialElementStride(DL);
           while (true) {
             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
               // Constant-offset addressing.
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 7fcf375aa10b6..192ed1cec79a8 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -492,7 +492,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
       } else {
-        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+        uint64_t S = GTI.getSequentialElementStride(DL);
         while (true) {
           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
             // Constant-offset addressing.
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 42f5a4e624c49..56af80f9cedee 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -350,7 +350,7 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
           TmpOffset += SL->getElementOffset(Idx);
         } else {
-          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+          uint64_t S = GTI.getSequentialElementStride(DL);
           for (;;) {
             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
               // Constant-offset addressing.
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index 5ad1e082344e7..1129206800ad3 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -362,7 +362,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
 
     VecOperand = i;
 
-    TypeSize TS = DL->getTypeAllocSize(GTI.getIndexedType());
+    TypeSize TS = GTI.getSequentialElementStride(*DL);
     if (TS.isScalable())
       return std::make_pair(nullptr, nullptr);
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 37abbb072cdd3..15dc44a043957 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -278,7 +278,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
       } else {
-        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+        uint64_t S = GTI.getSequentialElementStride(DL);
         for (;;) {
           if (const auto *CI = dyn_cast<ConstantInt>(Op)) {
             // Constant-offset addressing.
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0ba31e173a1a7..c789ac82ac943 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -916,7 +916,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
 
       // A array/variable index is always of the form i*S where S is the
       // constant scale size.  See if we can push the scale into immediates.
-      uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+      uint64_t S = GTI.getSequentialElementStride(DL);
       for (;;) {
         if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
           // Constant-offset addressing.
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 656abdb0abbff..75cddfa16d6db 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1097,10 +1097,8 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
           // For array or vector indices, scale the index by the size of the
           // type.
           APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
-          GEPOffset +=
-              Index *
-              APInt(Offset.getBitWidth(),
-                    DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue());
+          GEPOffset += Index * APInt(Offset.getBitWidth(),
+                                     GTI.getSequentialElementStride(DL));
         }
 
         // If this index has computed an intermediate pointer which is not
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index b8c9d9d100f11..225dd454068c8 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -843,7 +843,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
         // constant offset to a byte offset, and later offset the remainder of
         // the original GEP with this byte offset.
         AccumulativeByteOffset +=
-            ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+            ConstantOffset * GTI.getSequentialElementStride(*DL);
       }
     } else if (LowerGEP) {
       StructType *StTy = GTI.getStructType();
@@ -884,7 +884,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
           continue;
 
       APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
-                                DL->getTypeAllocSize(GTI.getIndexedType()));
+                                GTI.getSequentialElementStride(*DL));
       // Scale the index by element size.
       if (ElementSize != 1) {
         if (ElementSize.isPowerOf2()) {
@@ -946,7 +946,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
           continue;
 
       APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
-                                DL->getTypeAllocSize(GTI.getIndexedType()));
+                                GTI.getSequentialElementStride(*DL));
       // Scale the index by element size.
       if (ElementSize != 1) {
         if (ElementSize.isPowerOf2()) {
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 543469d62fe73..ca1f3a0c0ae34 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -547,7 +547,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
     // indices except this current one.
     const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs);
     Value *ArrayIdx = GEP->getOperand(I);
-    uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+    uint64_t ElementSize = GTI.getSequentialElementStride(*DL);
     if (ArrayIdx->getType()->getIntegerBitWidth() <=
         DL->getIndexSizeInBits(GEP->getAddressSpace())) {
       // Skip factoring if ArrayIdx is wider than the index size, because
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index fa2459d1ca028..1f11d4894f775 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1193,7 +1193,7 @@ std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
       OpA->getType() != OpB->getType())
     return std::nullopt;
 
-  uint64_t Stride = DL.getTypeAllocSize(GTIA.getIndexedType());
+  uint64_t Stride = GTIA.getSequentialElementStride(DL);
 
   // Only look through a ZExt/SExt.
   if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 373b7f5f2fc0a..642c3eb2a0e41 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
-target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64-f16:32"
 
 %intstruct = type { i32 }
 %pair = type { i32, i32 }
@@ -111,6 +111,16 @@ define void @test_evaluate_gep_as_ptrs_array(ptr addrspace(2) %B) {
   ret void
 }
 
+; This should be turned into a constexpr instead of being an instruction
+define void @test_overaligned_vec(i8 %B) {
+; CHECK-LABEL: @test_overaligned_vec(
+; CHECK-NEXT:    store i8 [[B:%.*]], ptr getelementptr inbounds ([10 x i8], ptr @Global, i64 0, i64 2), align 1
+; CHECK-NEXT:    ret void
+  %A = getelementptr <2 x half>, ptr @Global, i64 0, i64 1
+  store i8 %B, ptr %A
+  ret void
+}
+
 define ptr @test7(ptr %I, i64 %C, i64 %D) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[I:%.*]], i64 [[C:%.*]]