diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index 7d896c44f4679..830eed5d60ee4 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -297,6 +297,13 @@ class MemoryLocation { return MemoryLocation(Ptr, LocationSize::beforeOrAfterPointer(), AATags); } + // Return the exact size if the exact size is known at compiletime, + // otherwise return LocationSize::beforeOrAfterPointer(). + static LocationSize getSizeOrUnknown(const TypeSize &T) { + return T.isScalable() ? LocationSize::beforeOrAfterPointer() + : LocationSize::precise(T.getFixedValue()); + } + MemoryLocation() : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()) {} explicit MemoryLocation(const Value *Ptr, LocationSize Size, diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 470997b31fe85..a0bc3aa1ed314 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -1060,9 +1060,8 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { int64_t Offset, LocationSize Size) { return getMachineMemOperand( MMO, Offset, - !Size.hasValue() ? LLT() - : Size.isScalable() - ? LLT::scalable_vector(1, 8 * Size.getValue().getKnownMinValue()) + !Size.hasValue() || Size.isScalable() + ? LLT() : LLT::scalar(8 * Size.getValue().getKnownMinValue())); } MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index fb9656c09ca39..9fc8ecd60b03f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -128,14 +128,14 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, // vector objects on the stack. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the // following situations arise: - if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) { + if (PtrDiff >= 0 && Size1.hasValue()) { // [----BasePtr0----] // [---BasePtr1--] // ========PtrDiff========> IsAlias = !((int64_t)Size1.getValue() <= PtrDiff); return true; } - if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) { + if (PtrDiff < 0 && Size2.hasValue()) { // [----BasePtr0----] // [---BasePtr1--] // =====(-PtrDiff)====> @@ -248,20 +248,10 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, return false; } - // If NumBytes is scalable and offset is not 0, conservatively return may - // alias - if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) || - (MUC1.NumBytes.isScalable() && MUC1.Offset != 0)) - return true; - - const bool BothNotScalable = - !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable(); - // Try to prove that there is aliasing, or that there is no aliasing. Either // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; - if (BothNotScalable && - GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) + if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) return IsAlias; // The following all rely on MMO0 and MMO1 being valid. @@ -277,18 +267,12 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = - Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset; - int64_t Overlap1 = - Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset; - LocationSize Loc0 = - Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0); - LocationSize Loc1 = - Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1); - - if (AA->isNoAlias( - MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()), - MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo()))) + int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset; + int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset; + if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0, + MUC0.MMO->getAAInfo()), + MemoryLocation(MUC1.MMO->getValue(), Overlap1, + MUC1.MMO->getAAInfo()))) return false; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 83604003a038b..e74f75ea85857 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1323,7 +1323,6 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, LocationSize WidthB = MMOb->getSize(); bool KnownWidthA = WidthA.hasValue(); bool KnownWidthB = WidthB.hasValue(); - bool BothMMONonScalable = !WidthA.isScalable() && !WidthB.isScalable(); const Value *ValA = MMOa->getValue(); const Value *ValB = MMOb->getValue(); @@ -1339,14 +1338,12 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, SameVal = true; } - if (SameVal && BothMMONonScalable) { + if (SameVal) { if (!KnownWidthA || !KnownWidthB) return true; int64_t MaxOffset = std::max(OffsetA, OffsetB); - int64_t LowWidth = (MinOffset == OffsetA) - ? WidthA.getValue().getKnownMinValue() - : WidthB.getValue().getKnownMinValue(); - return (MinOffset + LowWidth > MaxOffset); + LocationSize LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + (int)LowWidth.getValue() > MaxOffset); } if (!AA) @@ -1358,29 +1355,15 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - // If Scalable Location Size has non-zero offset, Width + Offset does not work - // at the moment - if ((WidthA.isScalable() && OffsetA > 0) || - (WidthB.isScalable() && OffsetB > 0)) - return true; - - int64_t OverlapA = - KnownWidthA ? WidthA.getValue().getKnownMinValue() + OffsetA - MinOffset - : MemoryLocation::UnknownSize; - int64_t OverlapB = - KnownWidthB ? WidthB.getValue().getKnownMinValue() + OffsetB - MinOffset - : MemoryLocation::UnknownSize; - - LocationSize LocA = (WidthA.isScalable() || !KnownWidthA) - ? WidthA - : LocationSize::precise(OverlapA); - LocationSize LocB = (WidthB.isScalable() || !KnownWidthB) - ? WidthB - : LocationSize::precise(OverlapB); + int64_t OverlapA = KnownWidthA ? WidthA.getValue() + OffsetA - MinOffset + : MemoryLocation::UnknownSize; + int64_t OverlapB = KnownWidthB ? WidthB.getValue() + OffsetB - MinOffset + : MemoryLocation::UnknownSize; return !AA->isNoAlias( - MemoryLocation(ValA, LocA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(ValB, LocB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, OverlapB, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); } bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index ace05902d5df7..937ca539513af 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1107,13 +1107,12 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags F, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) - : MachineMemOperand( - ptrinfo, F, - !TS.hasValue() ? LLT() - : TS.isScalable() - ? LLT::scalable_vector(1, 8 * TS.getValue().getKnownMinValue()) - : LLT::scalar(8 * TS.getValue().getKnownMinValue()), - BaseAlignment, AAInfo, Ranges, SSID, Ordering, FailureOrdering) {} + : MachineMemOperand(ptrinfo, F, + !TS.hasValue() || TS.isScalable() + ? LLT() + : LLT::scalar(8 * TS.getValue().getKnownMinValue()), + BaseAlignment, AAInfo, Ranges, SSID, Ordering, + FailureOrdering) {} void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { // The Value and Offset may differ due to CSE. But the flags and size diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cbba3a294b3d6..14725c3e99b8a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24266,7 +24266,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // TODO: Use "BaseIndexOffset" to make this more effective. SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL); - LocationSize StoreSize = LocationSize::precise(VT.getStoreSize()); + LocationSize StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO; if (Offset.isScalable()) { @@ -27933,10 +27933,14 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - TypeSize Size = LSN->getMemoryVT().getStoreSize(); - return {LSN->isVolatile(), LSN->isAtomic(), - LSN->getBasePtr(), Offset /*base offset*/, - LocationSize::precise(Size), LSN->getMemOperand()}; + LocationSize Size = + MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); + return {LSN->isVolatile(), + LSN->isAtomic(), + LSN->getBasePtr(), + Offset /*base offset*/, + Size, + LSN->getMemOperand()}; } if (const auto *LN = cast(N)) return {false /*isVolatile*/, @@ -27978,13 +27982,6 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { return false; } - // If NumBytes is scalable and offset is not 0, conservatively return may - // alias - if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() && - MUC0.Offset != 0) || - (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() && - MUC1.Offset != 0)) - return true; // Try to prove that there is aliasing, or that there is no aliasing. Either // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; @@ -28015,22 +28012,18 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { Align OrigAlignment1 = MUC1.MMO->getBaseAlign(); LocationSize Size0 = MUC0.NumBytes; LocationSize Size1 = MUC1.NumBytes; - if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() && - !Size1.isScalable() && Size0 == Size1 && - OrigAlignment0 > Size0.getValue().getKnownMinValue() && - SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 && - SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) { + Size0.hasValue() && Size1.hasValue() && Size0 == Size1 && + OrigAlignment0 > Size0.getValue() && + SrcValOffset0 % Size0.getValue() == 0 && + SrcValOffset1 % Size1.getValue() == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. - if ((OffAlign0 + static_cast( - Size0.getValue().getKnownMinValue())) <= OffAlign1 || - (OffAlign1 + static_cast( - Size1.getValue().getKnownMinValue())) <= OffAlign0) + if ((OffAlign0 + (int64_t)Size0.getValue()) <= OffAlign1 || + (OffAlign1 + (int64_t)Size1.getValue()) <= OffAlign0) return false; } @@ -28047,18 +28040,12 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { Size0.hasValue() && Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = - Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset; - int64_t Overlap1 = - Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset; - LocationSize Loc0 = - Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0); - LocationSize Loc1 = - Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1); + int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset; + int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset; if (AA->isNoAlias( - MemoryLocation(MUC0.MMO->getValue(), Loc0, + MemoryLocation(MUC0.MMO->getValue(), Overlap0, UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), - MemoryLocation(MUC1.MMO->getValue(), Loc1, + MemoryLocation(MUC1.MMO->getValue(), Overlap1, UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()))) return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ca0a95750ba8d..ab59bc96a2553 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8465,7 +8465,9 @@ SDValue SelectionDAG::getMemIntrinsicNode( EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags, LocationSize Size, const AAMDNodes &AAInfo) { - if (Size.hasValue() && !Size.getValue()) + if (Size.hasValue() && MemVT.isScalableVector()) + Size = LocationSize::beforeOrAfterPointer(); + else if (Size.hasValue() && !Size.getValue()) Size = LocationSize::precise(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); @@ -8628,7 +8630,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); - LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); + LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); @@ -8749,7 +8751,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize()); + LocationSize Size = + MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); @@ -8802,8 +8805,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, - AAInfo); + PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), + Alignment, AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -8897,7 +8900,7 @@ SDValue SelectionDAG::getLoadVP( if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); - LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); + LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); @@ -9050,8 +9053,8 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, - AAInfo); + PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), + Alignment, AAInfo); return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, IsCompressing); } @@ -11792,9 +11795,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, // We check here that the size of the memory operand fits within the size of // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. + // TODO: Make MachineMemOperands aware of scalable vectors. assert( (!MMO->getType().isValid() || - TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) && + memvt.getStoreSize().getKnownMinValue() <= MMO->getSize().getValue()) && "Size mismatch!"); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..9670c3ac8430e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -106,6 +106,8 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, int64_t PtrDiff; if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { // If the size of memory access is unknown, do not use it to analysis. + // One example of unknown size memory access is to load/store scalable + // vector objects on the stack. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the // following situations arise: if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3f69f7ad54477..50b5ac01135dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4974,8 +4974,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), - Ranges); + LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 9518d573bccdd..d03da07f38cb8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2687,7 +2687,10 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( return false; // The maximum vscale is 16 under AArch64, return the maximal extent for the // vector. - Width = LocationSize::precise(WidthN); + Width = WidthN.isScalable() + ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector / + AArch64::SVEBitsPerBlock + : WidthN.getKnownMinValue(); BaseOps.push_back(BaseOp); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 27387595164a4..1239a92956ca8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10402,15 +10402,9 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { - MachineMemOperand *MMO = Load->getMemOperand(); - MachineFunction &MF = DAG.getMachineFunction(); - MMO = MF.getMachineMemOperand( - MMO, MMO->getPointerInfo(), - MMO->getMemoryType().isValid() - ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits()) - : MMO->getMemoryType()); SDValue NewLoad = - DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO); + DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), + Load->getMemOperand()); SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); } @@ -10468,17 +10462,9 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, const auto [MinVLMAX, MaxVLMAX] = RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && - getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { - MachineMemOperand *MMO = Store->getMemOperand(); - MachineFunction &MF = DAG.getMachineFunction(); - MMO = MF.getMachineMemOperand( - MMO, MMO->getPointerInfo(), - MMO->getMemoryType().isValid() - ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits()) - : MMO->getMemoryType()); + getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), - MMO); - } + Store->getMemOperand()); SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll index d4d803a91cfa1..58299696e78fc 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll @@ -3,7 +3,7 @@ define void @UphPNR(target("aarch64.svcount") %predcnt) { entry: ; CHECK: %0:ppr = COPY $p0 -; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store () into %ir.predcnt.addr) +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) ; CHECK: %1:pnr_p8to15 = COPY %0 ; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_p8to15 */, %1 ; CHECK: RET_ReallyLR @@ -17,7 +17,7 @@ entry: define void @UpaPNR(target("aarch64.svcount") %predcnt) { entry: ; CHECK: %0:ppr = COPY $p0 -; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store () into %ir.predcnt.addr) +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) ; CHECK: %1:pnr = COPY %0 ; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR */, %1 ; CHECK: RET_ReallyLR @@ -31,7 +31,7 @@ entry: define void @UplPNR(target("aarch64.svcount") %predcnt) { entry: ; CHECK: %0:ppr = COPY $p0 -; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store () into %ir.predcnt.addr) +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) ; CHECK: %1:pnr_3b = COPY %0 ; CHECK: INLINEASM &"fadd z0.h, $0/m, z0.h, #0.5", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_3b */, %1 ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll index 7244ac949ab88..9a4e01a29ecb6 100644 --- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll +++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll @@ -14,12 +14,12 @@ define void @array_1D(ptr %addr) #0 { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -81,18 +81,18 @@ define void @array_2D(ptr %addr) #0 { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #5, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl] -; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #3, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: st1d { z1.d }, p0, [sp, #5, mul vl] -; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl] -; CHECK-NEXT: st1d { z5.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #5, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #4, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #5, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #4, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp] ; CHECK-NEXT: addvl sp, sp, #6 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll index f03a6f018d34d..7292d52aaf476 100644 --- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll +++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll @@ -13,12 +13,12 @@ define void @test(ptr %addr) #0 { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/bug-88799-scalable-memory-type.ll b/llvm/test/CodeGen/RISCV/bug-88799-scalable-memory-type.ll new file mode 100644 index 0000000000000..e732db414bd47 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bug-88799-scalable-memory-type.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s | FileCheck %s -check-prefix=RV64I + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +; Function Attrs: vscale_range(2,2) +define i32 @main() #0 { +; RV64I-LABEL: main: +; RV64I: # %bb.0: # %vector.body +; RV64I-NEXT: lui a0, 1040368 +; RV64I-NEXT: addiw a0, a0, -144 +; RV64I-NEXT: vl2re16.v v8, (a0) +; RV64I-NEXT: vl2re16.v v10, (a0) +; RV64I-NEXT: vs2r.v v8, (zero) +; RV64I-NEXT: vs2r.v v10, (zero) +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: ret +vector.body: + %0 = load <16 x i16>, ptr getelementptr ([3 x [23 x [23 x i16]]], ptr null, i64 -10593, i64 1, i64 22, i64 0), align 16 + store <16 x i16> %0, ptr null, align 2 + %wide.load = load , ptr getelementptr ([3 x [23 x [23 x i16]]], ptr null, i64 -10593, i64 1, i64 22, i64 0), align 16 + store %wide.load, ptr null, align 2 + ret i32 0 +} + +attributes #0 = { vscale_range(2,2) "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-smmpm,-experimental-smnpm,-experimental-ssnpm,-experimental-sspm,-experimental-ssqosid,-experimental-supm,-experimental-zaamo,-experimental-zabha,-experimental-zalasr,-experimental-zalrsc,-experimental-zfbfmin,-experimental-zicfilp,-experimental-zicfiss,-experimental-ztso,-experimental-zvfbfmin,-experimental-zvfbfwma,-h,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smepmp,-ssaia,-ssccptr,-sscofpmf,-sscounterenw,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-za128rs,-za64rs,-zacas,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvbb,-zvbc,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" } + diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll index 1fe91c721f4dd..1d025a2f776f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll @@ -18,15 +18,15 @@ define void @test(ptr %addr) { ; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vl1re64.v v9, (a0) -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a3, a0, a2 +; CHECK-NEXT: vl1re64.v v9, (a3) ; CHECK-NEXT: vl1re64.v v10, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v9, (a0) ; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs1r.v v10, (a2) -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: vs1r.v v9, (a2) +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vs1r.v v8, (a1) +; CHECK-NEXT: vs1r.v v10, (a0) ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll index a9a680d54d589..64031f8a93598 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll @@ -16,13 +16,13 @@ define @test(ptr %addr, i64 %vl) { ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; CHECK-NEXT: csrrs a2, vlenb, zero -; CHECK-NEXT: vl1re64.v v8, (a0) -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a3, a0, a2 +; CHECK-NEXT: vl1re64.v v8, (a3) ; CHECK-NEXT: vl1re64.v v9, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs1r.v v9, (a2) +; CHECK-NEXT: vs1r.v v8, (a2) +; CHECK-NEXT: vs1r.v v9, (a0) ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: vl1re64.v v9, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll index 31fd5bdbd31fd..9c30f28445e36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -16,8 +16,8 @@ define void @vpmerge_vpload_store( %passthru, ptr %p, ) into %ir.p) + ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8) ; CHECK-NEXT: PseudoRET %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -35,8 +35,8 @@ define void @vpselect_vpload_store( %passthru, ptr %p, ) into %ir.p) + ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */ + ; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8) ; CHECK-NEXT: PseudoRET %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl)