diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 6adf99531e30e..d2c4c7ee50bc8 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -5292,8 +5292,8 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, } else { // Otherwise this is array-like indexing. The local offset is the index // multiplied by the element size. - auto *ElementSize = llvm::ConstantInt::get( - IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType())); + auto *ElementSize = + llvm::ConstantInt::get(IntPtrTy, GTI.getSequentialElementStride(DL)); auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true); LocalOffset = eval(BO_Mul, ElementSize, IndexS); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 7ad3ce512a355..2be7256423e42 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1048,7 +1048,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { if (TargetType->isScalableTy()) return TTI::TCC_Basic; int64_t ElementSize = - DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue(); + GTI.getSequentialElementStride(DL).getFixedValue(); if (ConstIdx) { BaseOffset += ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h index f3272327c3f8b..1092b636e023a 100644 --- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" @@ -30,7 +31,39 @@ template class generic_gep_type_iterator { ItTy OpIt; - PointerUnion CurTy; + // We use two different mechanisms to store the type a GEP index applies to. + // In some cases, we need to know the outer aggregate type the index is + // applied within, e.g. a struct. In such cases, we store the aggregate type + // in the iterator, and derive the element type on the fly. + // + // However, this is not always possible, because for the outermost index there + // is no containing type. In such cases, or if the containing type is not + // relevant, e.g. for arrays, the element type is stored as Type* in CurTy. + // + // If CurTy contains a Type* value, this does not imply anything about the + // type itself, because it is the element type and not the outer type. + // In particular, Type* can be a struct type. + // + // Consider this example: + // + // %my.struct = type { i32, [ 4 x float ] } + // [...] + // %gep = getelementptr %my.struct, ptr %ptr, i32 10, i32 1, 32 3 + // + // Iterating over the indices of this GEP, CurTy will contain the following + // values: + // * i32 10: The outer index always operates on the GEP value type. + // CurTy contains a Type* pointing at `%my.struct`. + // * i32 1: This index is within a struct. + // CurTy contains a StructType* pointing at `%my.struct`. + // * i32 3: This index is within an array. We reuse the "flat" indexing + // for arrays which is also used in the top level GEP index. + // CurTy contains a Type* pointing at `float`. + // + // Vectors are handled separately because the layout of vectors is different + // for overaligned elements: Vectors are always bit-packed, whereas arrays + // respect ABI alignment of the elements. + PointerUnion CurTy; generic_gep_type_iterator() = default; @@ -69,6 +102,8 @@ class generic_gep_type_iterator { Type *getIndexedType() const { if (auto *T = dyn_cast_if_present(CurTy)) return T; + if (auto *VT = dyn_cast_if_present(CurTy)) + return VT->getElementType(); return cast(CurTy)->getTypeAtIndex(getOperand()); } @@ -79,7 +114,7 @@ class generic_gep_type_iterator { if (auto *ATy = dyn_cast(Ty)) CurTy = ATy->getElementType(); else if (auto *VTy = dyn_cast(Ty)) - CurTy = VTy->getElementType(); + CurTy = VTy; else CurTy = dyn_cast(Ty); ++OpIt; @@ -108,7 +143,23 @@ class generic_gep_type_iterator { // that. bool isStruct() const { return isa(CurTy); } - bool isSequential() const { return isa(CurTy); } + bool isVector() const { return isa(CurTy); } + bool isSequential() const { return !isStruct(); } + + // For sequential GEP indices (all except those into structs), the index value + // can be translated into a byte offset by multiplying with an element stride. + // This function returns this stride, which both depends on the element type, + // and the containing aggregate type, as vectors always tightly bit-pack their + // elements. + TypeSize getSequentialElementStride(const DataLayout &DL) const { + assert(isSequential()); + Type *ElemTy = getIndexedType(); + if (isVector()) { + assert(DL.typeSizeEqualsStoreSize(ElemTy) && "Not byte-addressable"); + return DL.getTypeStoreSize(ElemTy); + } + return DL.getTypeAllocSize(ElemTy); + } StructType *getStructType() const { return cast(CurTy); } diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 97f60d28e4991..9eb7e914687ce 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -639,7 +639,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, continue; // Don't attempt to analyze GEPs if the scalable index is not zero. - TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL); if (AllocTypeSize.isScalable()) { Decomposed.Base = V; return Decomposed; @@ -650,7 +650,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, continue; } - TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL); if (AllocTypeSize.isScalable()) { Decomposed.Base = V; return Decomposed; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 7096e06d925ad..1fa7badaa4fa0 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1429,7 +1429,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { continue; } - APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); + APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL)); Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } return true; diff --git a/llvm/lib/Analysis/Local.cpp b/llvm/lib/Analysis/Local.cpp index 30757abeb0980..f5e080d2c78e6 100644 --- a/llvm/lib/Analysis/Local.cpp +++ b/llvm/lib/Analysis/Local.cpp @@ -64,7 +64,7 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, // Convert to correct type. if (Op->getType() != IntIdxTy) Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c"); - TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize TSize = GTI.getSequentialElementStride(DL); if (TSize != TypeSize::getFixed(1)) { Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize); if (IntIdxTy->isVectorTy()) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 89666018d9251..aed60cc5a3f5e 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2703,7 +2703,10 @@ static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { // If it's a type with the same allocation size as the result of the GEP we // can peel off the zero index. - if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize) + TypeSize ElemSize = GEPTI.isStruct() + ? DL.getTypeAllocSize(GEPTI.getIndexedType()) + : GEPTI.getSequentialElementStride(DL); + if (ElemSize != GEPAllocSize) break; --LastOperand; } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 16d78c1ded6d7..439127e5c9540 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1196,7 +1196,7 @@ static void computeKnownBitsFromOperator(const Operator *I, unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); KnownBits IndexBits(IndexBitWidth); computeKnownBits(Index, IndexBits, Depth + 1, Q); - TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); + TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL); uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); KnownBits ScalingFactor(IndexBitWidth); // Multiply by current sizeof type. @@ -2128,7 +2128,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, } // If we have a zero-sized type, the index doesn't matter. Keep looping. - if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero()) + if (GTI.getSequentialElementStride(Q.DL).isZero()) continue; // Fast path the constant operand case both for efficiency and so we don't diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 6e99fb133e26a..5bd4c6b067d79 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4776,7 +4776,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, cast(AddrInst->getOperand(i))->getZExtValue(); ConstantOffset += SL->getElementOffset(Idx); } else { - TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize TS = GTI.getSequentialElementStride(DL); if (TS.isNonZero()) { // The optimisations below currently only work for fixed offsets. if (TS.isScalable()) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index bea29642cd003..9c11113902a24 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1545,7 +1545,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, Offset += DL->getStructLayout(StTy)->getElementOffset(Field); continue; } else { - uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + uint64_t ElementSize = GTI.getSequentialElementStride(*DL); // If this is a scalar constant or a splat vector of constants, // handle it quickly. diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index f3d8edb8926b6..09e260394262b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -560,15 +560,13 @@ bool FastISel::selectGetElementPtr(const User *I) { } } } else { - Type *Ty = GTI.getIndexedType(); - // If this is a constant subscript, handle it quickly. if (const auto *CI = dyn_cast(Idx)) { if (CI->isZero()) continue; // N = N + Offset uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); - TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; + TotalOffs += GTI.getSequentialElementStride(DL) * IdxN; if (TotalOffs >= MaxOffs) { N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. @@ -585,7 +583,7 @@ bool FastISel::selectGetElementPtr(const User *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = DL.getTypeAllocSize(Ty); + uint64_t ElementSize = GTI.getSequentialElementStride(DL); Register IdxN = getRegForGEPIndex(Idx); if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 192f7bc8d2aa1..78ebd2d33459a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4114,7 +4114,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); TypeSize ElementSize = - DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType()); + GTI.getSequentialElementStride(DAG.getDataLayout()); // We intentionally mask away the high bits here; ElementSize may not // fit in IdxTy. APInt ElementMul(IdxSize, ElementSize.getKnownMinValue()); diff --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 770fc93490835..ae978070ac9f9 100644 --- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1074,7 +1074,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, assert(BitWidth == 64 && "Invalid index type for getelementptr"); Idx = (int64_t)IdxGV.IntVal.getZExtValue(); } - Total += getDataLayout().getTypeAllocSize(I.getIndexedType()) * Idx; + Total += I.getSequentialElementStride(getDataLayout()) * Idx; } } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index e28f043cf9e0d..a2f5714c70687 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -936,9 +936,8 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy, // Add in the offset, as calculated by the structure layout info... Result += Layout->getElementOffset(FieldNo); } else { - // Get the array index and the size of each array element. - if (int64_t arrayIdx = cast(Idx)->getSExtValue()) - Result += arrayIdx * getTypeAllocSize(GTI.getIndexedType()); + if (int64_t ArrayIdx = cast(Idx)->getSExtValue()) + Result += ArrayIdx * GTI.getSequentialElementStride(*this); } } diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index cd982c7da102a..16a89534b4b3e 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -87,7 +87,7 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const { /// If the index isn't known, we take 1 because it is the index that will /// give the worse alignment of the offset. const uint64_t ElemCount = OpC ? OpC->getZExtValue() : 1; - Offset = DL.getTypeAllocSize(GTI.getIndexedType()) * ElemCount; + Offset = GTI.getSequentialElementStride(DL) * ElemCount; } Result = Align(MinAlign(Offset, Result.value())); } @@ -157,7 +157,7 @@ bool GEPOperator::accumulateConstantOffset( continue; } if (!AccumulateOffset(ConstOffset->getValue(), - DL.getTypeAllocSize(GTI.getIndexedType()))) + GTI.getSequentialElementStride(DL))) return false; continue; } @@ -170,8 +170,7 @@ bool GEPOperator::accumulateConstantOffset( if (!ExternalAnalysis(*V, AnalysisIndex)) return false; UsedExternalAnalysis = true; - if (!AccumulateOffset(AnalysisIndex, - DL.getTypeAllocSize(GTI.getIndexedType()))) + if (!AccumulateOffset(AnalysisIndex, GTI.getSequentialElementStride(DL))) return false; } return true; @@ -218,14 +217,13 @@ bool GEPOperator::collectOffset( continue; } CollectConstantOffset(ConstOffset->getValue(), - DL.getTypeAllocSize(GTI.getIndexedType())); + GTI.getSequentialElementStride(DL)); continue; } if (STy || ScalableType) return false; - APInt IndexedSize = - APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType())); + APInt IndexedSize = APInt(BitWidth, GTI.getSequentialElementStride(DL)); // Insert an initial offset of 0 for V iff none exists already, then // increment the offset by IndexedSize. if (!IndexedSize.isZero()) { diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index b6e25c46b514d..94b0ae7435c94 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -1015,7 +1015,7 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) { // Otherwise, we have a sequential type like an array or fixed-length // vector. Multiply the index by the ElementSize. - TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize Size = GTI.getSequentialElementStride(DL); if (Size.isScalable()) return std::nullopt; Offset += Size.getFixedValue() * OpC->getSExtValue(); diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 1ea63a5d6ec08..e98f6c4984a75 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -645,7 +645,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); while (true) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. @@ -4978,15 +4978,13 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { if (Field) TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); } else { - Type *Ty = GTI.getIndexedType(); - // If this is a constant subscript, handle it quickly. if (const auto *CI = dyn_cast(Idx)) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += - DL.getTypeAllocSize(Ty) * cast(CI)->getSExtValue(); + TotalOffs += GTI.getSequentialElementStride(DL) * + cast(CI)->getSExtValue(); continue; } if (TotalOffs) { @@ -4997,7 +4995,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = DL.getTypeAllocSize(Ty); + uint64_t ElementSize = GTI.getSequentialElementStride(DL); unsigned IdxN = getRegForGEPIndex(Idx); if (!IdxN) return false; diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 1d6aaeb7433b0..cb3a709f7003b 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -747,7 +747,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); while (true) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index 7fcf375aa10b6..192ed1cec79a8 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -492,7 +492,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); while (true) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 42f5a4e624c49..56af80f9cedee 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -350,7 +350,7 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); for (;;) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index 5ad1e082344e7..1129206800ad3 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -362,7 +362,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr, VecOperand = i; - TypeSize TS = DL->getTypeAllocSize(GTI.getIndexedType()); + TypeSize TS = GTI.getSequentialElementStride(*DL); if (TS.isScalable()) return std::make_pair(nullptr, nullptr); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 37abbb072cdd3..15dc44a043957 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -278,7 +278,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); for (;;) { if (const auto *CI = dyn_cast(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0ba31e173a1a7..c789ac82ac943 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -916,7 +916,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // A array/variable index is always of the form i*S where S is the // constant scale size. See if we can push the scale into immediates. - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); for (;;) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 656abdb0abbff..75cddfa16d6db 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1097,10 +1097,8 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { // For array or vector indices, scale the index by the size of the // type. APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); - GEPOffset += - Index * - APInt(Offset.getBitWidth(), - DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue()); + GEPOffset += Index * APInt(Offset.getBitWidth(), + GTI.getSequentialElementStride(DL)); } // If this index has computed an intermediate pointer which is not diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index b8c9d9d100f11..225dd454068c8 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -843,7 +843,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, // constant offset to a byte offset, and later offset the remainder of // the original GEP with this byte offset. AccumulativeByteOffset += - ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + ConstantOffset * GTI.getSequentialElementStride(*DL); } } else if (LowerGEP) { StructType *StTy = GTI.getStructType(); @@ -884,7 +884,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( continue; APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + GTI.getSequentialElementStride(*DL)); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { @@ -946,7 +946,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, continue; APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + GTI.getSequentialElementStride(*DL)); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 543469d62fe73..ca1f3a0c0ae34 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -547,7 +547,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( // indices except this current one. const SCEV *BaseExpr = SE->getGEPExpr(cast(GEP), IndexExprs); Value *ArrayIdx = GEP->getOperand(I); - uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + uint64_t ElementSize = GTI.getSequentialElementStride(*DL); if (ArrayIdx->getType()->getIntegerBitWidth() <= DL->getIndexSizeInBits(GEP->getAddressSpace())) { // Skip factoring if ArrayIdx is wider than the index size, because diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index fa2459d1ca028..1f11d4894f775 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1193,7 +1193,7 @@ std::optional Vectorizer::getConstantOffsetComplexAddrs( OpA->getType() != OpB->getType()) return std::nullopt; - uint64_t Stride = DL.getTypeAllocSize(GTIA.getIndexedType()); + uint64_t Stride = GTIA.getSequentialElementStride(DL); // Only look through a ZExt/SExt. if (!isa(OpA) && !isa(OpA)) diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 373b7f5f2fc0a..642c3eb2a0e41 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S | FileCheck %s -target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64" +target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64-f16:32" %intstruct = type { i32 } %pair = type { i32, i32 } @@ -111,6 +111,16 @@ define void @test_evaluate_gep_as_ptrs_array(ptr addrspace(2) %B) { ret void } +; This should be turned into a constexpr instead of being an instruction +define void @test_overaligned_vec(i8 %B) { +; CHECK-LABEL: @test_overaligned_vec( +; CHECK-NEXT: store i8 [[B:%.*]], ptr getelementptr inbounds ([10 x i8], ptr @Global, i64 0, i64 2), align 1 +; CHECK-NEXT: ret void + %A = getelementptr <2 x half>, ptr @Global, i64 0, i64 1 + store i8 %B, ptr %A + ret void +} + define ptr @test7(ptr %I, i64 %C, i64 %D) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[I:%.*]], i64 [[C:%.*]]