[SLP]Add support for strided loads. #80310

alexey-bataev · 2024-02-01T17:22:37Z

Added basic support for strided loads support in SLP vectorizer.
Supports constant strides only. If the strided load must be
reversed, applies -stride to avoid extra reverse shuffle.

Created using spr 1.3.5

llvmbot · 2024-02-01T17:23:08Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-backend-risc-v

Author: Alexey Bataev (alexey-bataev)

Changes

Added basic support for strided memory operations in TTI and strided
loads particularly in SLP vectorizer. Supports both runtime and constant
strides. If the strided load must be reversed, applies -stride to avoid
extra reverse shuffle.

Patch is 93.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80310.diff

12 Files Affected:

(modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+34)
(modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+13)
(modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+14)
(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+23)
(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+23)
(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+278-119)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll (+65-67)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll (+19-190)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll (+2-2)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll (+5-8)
(modified) llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll (+1-1)
(modified) llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll (+1-1)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 3b615bc700bbb..b0b6dab03fa38 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -781,6 +781,9 @@ class TargetTransformInfo {
   /// Return true if the target supports masked expand load.
   bool isLegalMaskedExpandLoad(Type *DataType) const;
 
+  /// Return true if the target supports strided load.
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) const;
+
   /// Return true if this is an alternating opcode pattern that can be lowered
   /// to a single instruction on the target. In X86 this is for the addsub
   /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
@@ -1412,6 +1415,20 @@ class TargetTransformInfo {
       Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       const Instruction *I = nullptr) const;
 
+  /// \return The cost of strided memory operations.
+  /// \p Opcode - is a type of memory access Load or Store
+  /// \p DataTy - a vector type of the data to be loaded or stored
+  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
+  /// \p VariableMask - true when the memory access is predicated with a mask
+  ///                   that is not a compile-time constant
+  /// \p Alignment - alignment of single element
+  /// \p I - the optional original context instruction, if one exists, e.g. the
+  ///        load/store to transform or the call to the gather/scatter intrinsic
+  InstructionCost getStridedMemoryOpCost(
+      unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+      Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+      const Instruction *I = nullptr) const;
+
   /// \return The cost of the interleaved memory operation.
   /// \p Opcode is the memory operation code
   /// \p VecTy is the vector type of the interleaved access.
@@ -1848,6 +1865,7 @@ class TargetTransformInfo::Concept {
                                            Align Alignment) = 0;
   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+  virtual bool isLegalStridedLoad(Type *DataType, Align Alignment) = 0;
   virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
                                unsigned Opcode1,
                                const SmallBitVector &OpcodeMask) const = 0;
@@ -2023,6 +2041,11 @@ class TargetTransformInfo::Concept {
                          bool VariableMask, Align Alignment,
                          TTI::TargetCostKind CostKind,
                          const Instruction *I = nullptr) = 0;
+  virtual InstructionCost
+  getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+                         bool VariableMask, Align Alignment,
+                         TTI::TargetCostKind CostKind,
+                         const Instruction *I = nullptr) = 0;
 
   virtual InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
@@ -2341,6 +2364,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool isLegalMaskedExpandLoad(Type *DataType) override {
     return Impl.isLegalMaskedExpandLoad(DataType);
   }
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) override {
+    return Impl.isLegalStridedLoad(DataType, Alignment);
+  }
   bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
                        const SmallBitVector &OpcodeMask) const override {
     return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
@@ -2671,6 +2697,14 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
                                        Alignment, CostKind, I);
   }
+  InstructionCost
+  getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+                         bool VariableMask, Align Alignment,
+                         TTI::TargetCostKind CostKind,
+                         const Instruction *I = nullptr) override {
+    return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                       Alignment, CostKind, I);
+  }
   InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9958b4daa6ed8..2a7e7b364ac40 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -304,6 +304,10 @@ class TargetTransformInfoImplBase {
 
   bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
 
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) const {
+    return false;
+  }
+
   bool enableOrderedReductions() const { return false; }
 
   bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
@@ -687,6 +691,15 @@ class TargetTransformInfoImplBase {
     return 1;
   }
 
+  InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
+                                         const Value *Ptr, bool VariableMask,
+                                         Align Alignment,
+                                         TTI::TargetCostKind CostKind,
+                                         const Instruction *I = nullptr) const {
+    return CostKind == TTI::TCK_RecipThroughput ? TTI::TCC_Expensive
+                                                : TTI::TCC_Basic;
+  }
+
   unsigned getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8902dde37cbca..b86397ae7d267 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -500,6 +500,11 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
   return TTIImpl->isLegalMaskedExpandLoad(DataType);
 }
 
+bool TargetTransformInfo::isLegalStridedLoad(Type *DataType,
+                                             Align Alignment) const {
+  return TTIImpl->isLegalStridedLoad(DataType, Alignment);
+}
+
 bool TargetTransformInfo::enableOrderedReductions() const {
   return TTIImpl->enableOrderedReductions();
 }
@@ -1041,6 +1046,15 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost(
   return Cost;
 }
 
+InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
+    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
+  InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
+      Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
 InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
     Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fe1cdb2dfa423..9cec8ee4cb7f2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -658,6 +658,29 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
   return NumLoads * MemOpCost;
 }
 
+InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
+    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                         Alignment, CostKind, I);
+
+  if ((Opcode == Instruction::Load && !isLegalStridedLoad(DataTy, Alignment)) ||
+      Opcode != Instruction::Load)
+    return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                         Alignment, CostKind, I);
+
+  // Cost is proportional to the number of memory operations implied.  For
+  // scalable vectors, we use an estimate on that number since we don't
+  // know exactly what VL will be.
+  auto &VTy = *cast<VectorType>(DataTy);
+  InstructionCost MemOpCost =
+      getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
+                      {TTI::OK_AnyValue, TTI::OP_None}, I);
+  unsigned NumLoads = getEstimatedVLFor(&VTy);
+  return NumLoads * MemOpCost;
+}
+
 // Currently, these represent both throughput and codesize costs
 // for the respective intrinsics.  The costs in this table are simply
 // instruction counts with the following adjustments made:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 0747a778fe9a2..742b1aadf00bd 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -143,6 +143,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
                                          TTI::TargetCostKind CostKind,
                                          const Instruction *I);
 
+  InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
+                                         const Value *Ptr, bool VariableMask,
+                                         Align Alignment,
+                                         TTI::TargetCostKind CostKind,
+                                         const Instruction *I);
+
   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                    TTI::CastContextHint CCH,
                                    TTI::TargetCostKind CostKind,
@@ -250,6 +256,23 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
     return ST->is64Bit() && !ST->hasVInstructionsI64();
   }
 
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) {
+    if (!ST->hasVInstructions())
+      return false;
+
+    EVT DataTypeVT = TLI->getValueType(DL, DataType);
+
+    // Only support fixed vectors if we know the minimum vector size.
+    if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors())
+      return false;
+
+    EVT ElemType = DataTypeVT.getScalarType();
+    if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
+      return false;
+
+    return TLI->isLegalElementTypeForRVV(ElemType);
+  }
+
   bool isVScaleKnownToBeAPowerOfTwo() const {
     return TLI->isVScaleKnownToBeAPowerOfTwo();
   }
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a8aea112bc28e..90b9b51c470bf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -87,6 +87,7 @@
 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -175,6 +176,15 @@ static cl::opt<int> RootLookAheadMaxDepth(
     "slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden,
     cl::desc("The maximum look-ahead depth for searching best rooting option"));
 
+static cl::opt<unsigned> MinProfitableStridedLoads(
+    "slp-min-strided-loads", cl::init(2), cl::Hidden,
+    cl::desc("The minimum number of loads, which should be considered strided, "
+             "if the stride is > 1 or is runtime value"));
+
+static cl::opt<unsigned> MaxProfitableLoadStride(
+    "slp-max-stride", cl::init(8), cl::Hidden,
+    cl::desc("The maximum stride, considered to be profitable."));
+
 static cl::opt<bool>
     ViewSLPTree("view-slp-tree", cl::Hidden,
                 cl::desc("Display the SLP trees with Graphviz"));
@@ -2575,7 +2585,7 @@ class BoUpSLP {
     enum EntryState {
       Vectorize,
       ScatterVectorize,
-      PossibleStridedVectorize,
+      StridedVectorize,
       NeedToGather
     };
     EntryState State;
@@ -2753,8 +2763,8 @@ class BoUpSLP {
       case ScatterVectorize:
         dbgs() << "ScatterVectorize\n";
         break;
-      case PossibleStridedVectorize:
-        dbgs() << "PossibleStridedVectorize\n";
+      case StridedVectorize:
+        dbgs() << "StridedVectorize\n";
         break;
       case NeedToGather:
         dbgs() << "NeedToGather\n";
@@ -3680,7 +3690,7 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
     if (Entry->State == TreeEntry::NeedToGather)
       return "color=red";
     if (Entry->State == TreeEntry::ScatterVectorize ||
-        Entry->State == TreeEntry::PossibleStridedVectorize)
+        Entry->State == TreeEntry::StridedVectorize)
       return "color=blue";
     return "";
   }
@@ -3846,7 +3856,7 @@ enum class LoadsState {
   Gather,
   Vectorize,
   ScatterVectorize,
-  PossibleStridedVectorize
+  StridedVectorize
 };
 } // anonymous namespace
 
@@ -3878,6 +3888,130 @@ static Align computeCommonAlignment(ArrayRef<Value *> VL) {
   return CommonAlignment;
 }
 
+/// Check if \p Order represents reverse order.
+static bool isReverseOrder(ArrayRef<unsigned> Order) {
+  unsigned Sz = Order.size();
+  return !Order.empty() && all_of(enumerate(Order), [&](const auto &Pair) {
+    return Pair.value() == Sz || Sz - Pair.index() - 1 == Pair.value();
+  });
+}
+
+/// Checks if the provided list of pointers \p Pointers represents the strided
+/// pointers for type ElemTy. If they are not, std::nullopt is returned.
+/// Otherwise, if \p Inst is not specified, just initialized optional value is
+/// returned to show that the pointers represent strided pointers. If \p Inst
+/// specified, the runtime stride is materialized before the given \p Inst.
+/// \returns std::nullopt if the pointers are not pointers with the runtime
+/// stride, nullptr or actual stride value, otherwise.
+static std::optional<Value *>
+calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
+                  const DataLayout &DL, ScalarEvolution &SE,
+                  SmallVectorImpl<unsigned> &SortedIndices,
+                  Instruction *Inst = nullptr) {
+  SmallVector<const SCEV *> SCEVs;
+  const SCEV *PtrSCEVA = nullptr;
+  const SCEV *PtrSCEVB = nullptr;
+  for (Value *Ptr : PointerOps) {
+    const SCEV *PtrSCEV = SE.getSCEV(Ptr);
+    if (!PtrSCEV)
+      return std::nullopt;
+    SCEVs.push_back(PtrSCEV);
+    if (!PtrSCEVA && !PtrSCEVB) {
+      PtrSCEVA = PtrSCEVB = PtrSCEV;
+      continue;
+    }
+    const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVA);
+    if (!Diff || isa<SCEVCouldNotCompute>(Diff))
+      return std::nullopt;
+    if (Diff->isNonConstantNegative()) {
+      PtrSCEVA = PtrSCEV;
+      continue;
+    }
+    const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVB, PtrSCEV);
+    if (!Diff1 || isa<SCEVCouldNotCompute>(Diff1))
+      return std::nullopt;
+    if (Diff1->isNonConstantNegative()) {
+      PtrSCEVB = PtrSCEV;
+      continue;
+    }
+  }
+  const SCEV *Stride = SE.getMinusSCEV(PtrSCEVB, PtrSCEVA);
+  if (!Stride)
+    return std::nullopt;
+  int Size = DL.getTypeStoreSize(ElemTy);
+  auto TryGetStride = [&](const SCEV *Dist,
+                          const SCEV *Multiplier) -> const SCEV * {
+    if (const auto *M = dyn_cast<SCEVMulExpr>(Dist)) {
+      if (M->getOperand(0) == Multiplier)
+        return M->getOperand(1);
+      if (M->getOperand(1) == Multiplier)
+        return M->getOperand(0);
+      return nullptr;
+    }
+    if (Multiplier == Dist)
+      return SE.getConstant(Dist->getType(), 1);
+    return SE.getUDivExactExpr(Dist, Multiplier);
+  };
+  if (Size != 1 || SCEVs.size() > 2) {
+    const SCEV *Sz =
+        SE.getConstant(Stride->getType(), Size * (SCEVs.size() - 1));
+    Stride = TryGetStride(Stride, Sz);
+    if (!Stride)
+      return std::nullopt;
+  }
+  if (!Stride || isa<SCEVConstant>(Stride))
+    return std::nullopt;
+  // Iterate through all pointers and check if all distances are
+  // unique multiple of Dist.
+  using DistOrdPair = std::pair<int64_t, int>;
+  auto Compare = llvm::less_first();
+  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
+  int Cnt = 0;
+  bool IsConsecutive = true;
+  for (const SCEV *PtrSCEV : SCEVs) {
+    unsigned Dist = 0;
+    if (PtrSCEV != PtrSCEVA) {
+      const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVA);
+      const SCEV *Coeff = TryGetStride(Diff, Stride);
+      if (!Coeff)
+        return std::nullopt;
+      const auto *SC = dyn_cast<SCEVConstant>(Coeff);
+      if (!SC || isa<SCEVCouldNotCompute>(SC))
+        return std::nullopt;
+      if (!SE.getMinusSCEV(PtrSCEV,
+                           SE.getAddExpr(PtrSCEVA, SE.getMulExpr(Stride, SC)))
+               ->isZero())
+        return std::nullopt;
+      Dist = SC->getAPInt().getZExtValue();
+    }
+    // If the strides are not the same or repeated, we can't vectorize.
+    if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
+      return std::nullopt;
+    auto Res = Offsets.emplace(Dist, Cnt);
+    if (!Res.second)
+      return std::nullopt;
+    // Consecutive order if the inserted element is the last one.
+    IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
+    ++Cnt;
+  }
+  if (Offsets.size() != SCEVs.size())
+    return std::nullopt;
+  SortedIndices.clear();
+  if (!IsConsecutive) {
+    // Fill SortedIndices array only if it is non-consecutive.
+    SortedIndices.resize(PointerOps.size());
+    Cnt = 0;
+    for (const std::pair<int64_t, int> &Pair : Offsets) {
+      SortedIndices[Cnt] = Pair.second;
+      ++Cnt;
+    }
+  }
+  if (!Inst)
+    return nullptr;
+  SCEVExpander Expander(SE, DL, "strided-load-vec");
+  return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
+}
+
 /// Checks if the given array of loads can be represented as a vectorized,
 /// scatter or just simple gather.
 static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
@@ -3900,7 +4034,8 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
   // Make sure all loads in the bundle are simple - we can't vectorize
   // atomic or volatile loads.
   PointerOps.clear();
-  PointerOps.resize(VL.size());
+  const unsigned Sz = VL.size();
+  PointerOps.resize(Sz);
   auto *POIter = PointerOps.begin();
   for (Value *V : VL) {
     auto *L = cast<LoadInst>(V);
@@ -3913,10 +4048,15 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
   Order.clear();
   // Check the order of pointer operands or that all pointers are the same.
   bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
+  Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
+  auto *VecTy = FixedVectorType::get(ScalarTy, Sz);
+  if (!IsSorted && Sz > MinProfitableStridedLoads && TTI.isTypeLegal(VecTy) &&
+      TTI.isLegalStridedLoad(VecTy, CommonAlignment) &&
+      calculateRtStride(PointerOps, ScalarTy, DL, SE, Order))
+    return LoadsState::StridedVectorize;
   if (IsSorted || all_of(PointerOps, [&](Value *P) {
         return arePointersCompatible(P, PointerOps.front(), TLI);
       })) {
-    bool IsPossibleStrided = false;
     if (IsSorted) {
       Value *Ptr0;
       Value *PtrN;
@@ -3930,30 +4070,68 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
       std::optional<int> Diff =
           getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
       // Check that the sorted loads are consecutive.
-      if (static_cast<unsigned>(*Diff) == VL.size() - 1)
+      if (static_cast<unsigned>(*Diff) == Sz - 1)
         return LoadsState::Vectorize;
       // Simple check if not a strided access - clear order.
-      IsPossibleStrided = *Diff % (VL.size() - 1) == 0;
+      bool IsPossibleStrided = *Diff % (Sz - 1) == 0;
+      // Try to generate strided load node if:
+      // 1. Targ...
[truncated]

llvmbot · 2024-02-01T17:23:08Z

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

Changes

Added basic support for strided memory operations in TTI and strided
loads particularly in SLP vectorizer. Supports both runtime and constant
strides. If the strided load must be reversed, applies -stride to avoid
extra reverse shuffle.

Patch is 93.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80310.diff

12 Files Affected:

(modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+34)
(modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+13)
(modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+14)
(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+23)
(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+23)
(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+278-119)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll (+65-67)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll (+19-190)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll (+2-2)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll (+5-8)
(modified) llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll (+1-1)
(modified) llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll (+1-1)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 3b615bc700bbb..b0b6dab03fa38 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -781,6 +781,9 @@ class TargetTransformInfo {
   /// Return true if the target supports masked expand load.
   bool isLegalMaskedExpandLoad(Type *DataType) const;
 
+  /// Return true if the target supports strided load.
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) const;
+
   /// Return true if this is an alternating opcode pattern that can be lowered
   /// to a single instruction on the target. In X86 this is for the addsub
   /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
@@ -1412,6 +1415,20 @@ class TargetTransformInfo {
       Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
       const Instruction *I = nullptr) const;
 
+  /// \return The cost of strided memory operations.
+  /// \p Opcode - is a type of memory access Load or Store
+  /// \p DataTy - a vector type of the data to be loaded or stored
+  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
+  /// \p VariableMask - true when the memory access is predicated with a mask
+  ///                   that is not a compile-time constant
+  /// \p Alignment - alignment of single element
+  /// \p I - the optional original context instruction, if one exists, e.g. the
+  ///        load/store to transform or the call to the gather/scatter intrinsic
+  InstructionCost getStridedMemoryOpCost(
+      unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+      Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+      const Instruction *I = nullptr) const;
+
   /// \return The cost of the interleaved memory operation.
   /// \p Opcode is the memory operation code
   /// \p VecTy is the vector type of the interleaved access.
@@ -1848,6 +1865,7 @@ class TargetTransformInfo::Concept {
                                            Align Alignment) = 0;
   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+  virtual bool isLegalStridedLoad(Type *DataType, Align Alignment) = 0;
   virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
                                unsigned Opcode1,
                                const SmallBitVector &OpcodeMask) const = 0;
@@ -2023,6 +2041,11 @@ class TargetTransformInfo::Concept {
                          bool VariableMask, Align Alignment,
                          TTI::TargetCostKind CostKind,
                          const Instruction *I = nullptr) = 0;
+  virtual InstructionCost
+  getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+                         bool VariableMask, Align Alignment,
+                         TTI::TargetCostKind CostKind,
+                         const Instruction *I = nullptr) = 0;
 
   virtual InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
@@ -2341,6 +2364,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool isLegalMaskedExpandLoad(Type *DataType) override {
     return Impl.isLegalMaskedExpandLoad(DataType);
   }
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) override {
+    return Impl.isLegalStridedLoad(DataType, Alignment);
+  }
   bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
                        const SmallBitVector &OpcodeMask) const override {
     return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
@@ -2671,6 +2697,14 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
                                        Alignment, CostKind, I);
   }
+  InstructionCost
+  getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+                         bool VariableMask, Align Alignment,
+                         TTI::TargetCostKind CostKind,
+                         const Instruction *I = nullptr) override {
+    return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                       Alignment, CostKind, I);
+  }
   InstructionCost getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9958b4daa6ed8..2a7e7b364ac40 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -304,6 +304,10 @@ class TargetTransformInfoImplBase {
 
   bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
 
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) const {
+    return false;
+  }
+
   bool enableOrderedReductions() const { return false; }
 
   bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
@@ -687,6 +691,15 @@ class TargetTransformInfoImplBase {
     return 1;
   }
 
+  InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
+                                         const Value *Ptr, bool VariableMask,
+                                         Align Alignment,
+                                         TTI::TargetCostKind CostKind,
+                                         const Instruction *I = nullptr) const {
+    return CostKind == TTI::TCK_RecipThroughput ? TTI::TCC_Expensive
+                                                : TTI::TCC_Basic;
+  }
+
   unsigned getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8902dde37cbca..b86397ae7d267 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -500,6 +500,11 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
   return TTIImpl->isLegalMaskedExpandLoad(DataType);
 }
 
+bool TargetTransformInfo::isLegalStridedLoad(Type *DataType,
+                                             Align Alignment) const {
+  return TTIImpl->isLegalStridedLoad(DataType, Alignment);
+}
+
 bool TargetTransformInfo::enableOrderedReductions() const {
   return TTIImpl->enableOrderedReductions();
 }
@@ -1041,6 +1046,15 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost(
   return Cost;
 }
 
+InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
+    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
+  InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
+      Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
 InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
     Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fe1cdb2dfa423..9cec8ee4cb7f2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -658,6 +658,29 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
   return NumLoads * MemOpCost;
 }
 
+InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
+    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                         Alignment, CostKind, I);
+
+  if ((Opcode == Instruction::Load && !isLegalStridedLoad(DataTy, Alignment)) ||
+      Opcode != Instruction::Load)
+    return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                         Alignment, CostKind, I);
+
+  // Cost is proportional to the number of memory operations implied.  For
+  // scalable vectors, we use an estimate on that number since we don't
+  // know exactly what VL will be.
+  auto &VTy = *cast<VectorType>(DataTy);
+  InstructionCost MemOpCost =
+      getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
+                      {TTI::OK_AnyValue, TTI::OP_None}, I);
+  unsigned NumLoads = getEstimatedVLFor(&VTy);
+  return NumLoads * MemOpCost;
+}
+
 // Currently, these represent both throughput and codesize costs
 // for the respective intrinsics.  The costs in this table are simply
 // instruction counts with the following adjustments made:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 0747a778fe9a2..742b1aadf00bd 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -143,6 +143,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
                                          TTI::TargetCostKind CostKind,
                                          const Instruction *I);
 
+  InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
+                                         const Value *Ptr, bool VariableMask,
+                                         Align Alignment,
+                                         TTI::TargetCostKind CostKind,
+                                         const Instruction *I);
+
   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                    TTI::CastContextHint CCH,
                                    TTI::TargetCostKind CostKind,
@@ -250,6 +256,23 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
     return ST->is64Bit() && !ST->hasVInstructionsI64();
   }
 
+  bool isLegalStridedLoad(Type *DataType, Align Alignment) {
+    if (!ST->hasVInstructions())
+      return false;
+
+    EVT DataTypeVT = TLI->getValueType(DL, DataType);
+
+    // Only support fixed vectors if we know the minimum vector size.
+    if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors())
+      return false;
+
+    EVT ElemType = DataTypeVT.getScalarType();
+    if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
+      return false;
+
+    return TLI->isLegalElementTypeForRVV(ElemType);
+  }
+
   bool isVScaleKnownToBeAPowerOfTwo() const {
     return TLI->isVScaleKnownToBeAPowerOfTwo();
   }
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a8aea112bc28e..90b9b51c470bf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -87,6 +87,7 @@
 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -175,6 +176,15 @@ static cl::opt<int> RootLookAheadMaxDepth(
     "slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden,
     cl::desc("The maximum look-ahead depth for searching best rooting option"));
 
+static cl::opt<unsigned> MinProfitableStridedLoads(
+    "slp-min-strided-loads", cl::init(2), cl::Hidden,
+    cl::desc("The minimum number of loads, which should be considered strided, "
+             "if the stride is > 1 or is runtime value"));
+
+static cl::opt<unsigned> MaxProfitableLoadStride(
+    "slp-max-stride", cl::init(8), cl::Hidden,
+    cl::desc("The maximum stride, considered to be profitable."));
+
 static cl::opt<bool>
     ViewSLPTree("view-slp-tree", cl::Hidden,
                 cl::desc("Display the SLP trees with Graphviz"));
@@ -2575,7 +2585,7 @@ class BoUpSLP {
     enum EntryState {
       Vectorize,
       ScatterVectorize,
-      PossibleStridedVectorize,
+      StridedVectorize,
       NeedToGather
     };
     EntryState State;
@@ -2753,8 +2763,8 @@ class BoUpSLP {
       case ScatterVectorize:
         dbgs() << "ScatterVectorize\n";
         break;
-      case PossibleStridedVectorize:
-        dbgs() << "PossibleStridedVectorize\n";
+      case StridedVectorize:
+        dbgs() << "StridedVectorize\n";
         break;
       case NeedToGather:
         dbgs() << "NeedToGather\n";
@@ -3680,7 +3690,7 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
     if (Entry->State == TreeEntry::NeedToGather)
       return "color=red";
     if (Entry->State == TreeEntry::ScatterVectorize ||
-        Entry->State == TreeEntry::PossibleStridedVectorize)
+        Entry->State == TreeEntry::StridedVectorize)
       return "color=blue";
     return "";
   }
@@ -3846,7 +3856,7 @@ enum class LoadsState {
   Gather,
   Vectorize,
   ScatterVectorize,
-  PossibleStridedVectorize
+  StridedVectorize
 };
 } // anonymous namespace
 
@@ -3878,6 +3888,130 @@ static Align computeCommonAlignment(ArrayRef<Value *> VL) {
   return CommonAlignment;
 }
 
+/// Check if \p Order represents reverse order.
+static bool isReverseOrder(ArrayRef<unsigned> Order) {
+  unsigned Sz = Order.size();
+  return !Order.empty() && all_of(enumerate(Order), [&](const auto &Pair) {
+    return Pair.value() == Sz || Sz - Pair.index() - 1 == Pair.value();
+  });
+}
+
+/// Checks if the provided list of pointers \p Pointers represents the strided
+/// pointers for type ElemTy. If they are not, std::nullopt is returned.
+/// Otherwise, if \p Inst is not specified, just initialized optional value is
+/// returned to show that the pointers represent strided pointers. If \p Inst
+/// specified, the runtime stride is materialized before the given \p Inst.
+/// \returns std::nullopt if the pointers are not pointers with the runtime
+/// stride, nullptr or actual stride value, otherwise.
+static std::optional<Value *>
+calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
+                  const DataLayout &DL, ScalarEvolution &SE,
+                  SmallVectorImpl<unsigned> &SortedIndices,
+                  Instruction *Inst = nullptr) {
+  SmallVector<const SCEV *> SCEVs;
+  const SCEV *PtrSCEVA = nullptr;
+  const SCEV *PtrSCEVB = nullptr;
+  for (Value *Ptr : PointerOps) {
+    const SCEV *PtrSCEV = SE.getSCEV(Ptr);
+    if (!PtrSCEV)
+      return std::nullopt;
+    SCEVs.push_back(PtrSCEV);
+    if (!PtrSCEVA && !PtrSCEVB) {
+      PtrSCEVA = PtrSCEVB = PtrSCEV;
+      continue;
+    }
+    const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVA);
+    if (!Diff || isa<SCEVCouldNotCompute>(Diff))
+      return std::nullopt;
+    if (Diff->isNonConstantNegative()) {
+      PtrSCEVA = PtrSCEV;
+      continue;
+    }
+    const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVB, PtrSCEV);
+    if (!Diff1 || isa<SCEVCouldNotCompute>(Diff1))
+      return std::nullopt;
+    if (Diff1->isNonConstantNegative()) {
+      PtrSCEVB = PtrSCEV;
+      continue;
+    }
+  }
+  const SCEV *Stride = SE.getMinusSCEV(PtrSCEVB, PtrSCEVA);
+  if (!Stride)
+    return std::nullopt;
+  int Size = DL.getTypeStoreSize(ElemTy);
+  auto TryGetStride = [&](const SCEV *Dist,
+                          const SCEV *Multiplier) -> const SCEV * {
+    if (const auto *M = dyn_cast<SCEVMulExpr>(Dist)) {
+      if (M->getOperand(0) == Multiplier)
+        return M->getOperand(1);
+      if (M->getOperand(1) == Multiplier)
+        return M->getOperand(0);
+      return nullptr;
+    }
+    if (Multiplier == Dist)
+      return SE.getConstant(Dist->getType(), 1);
+    return SE.getUDivExactExpr(Dist, Multiplier);
+  };
+  if (Size != 1 || SCEVs.size() > 2) {
+    const SCEV *Sz =
+        SE.getConstant(Stride->getType(), Size * (SCEVs.size() - 1));
+    Stride = TryGetStride(Stride, Sz);
+    if (!Stride)
+      return std::nullopt;
+  }
+  if (!Stride || isa<SCEVConstant>(Stride))
+    return std::nullopt;
+  // Iterate through all pointers and check if all distances are
+  // unique multiple of Dist.
+  using DistOrdPair = std::pair<int64_t, int>;
+  auto Compare = llvm::less_first();
+  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
+  int Cnt = 0;
+  bool IsConsecutive = true;
+  for (const SCEV *PtrSCEV : SCEVs) {
+    unsigned Dist = 0;
+    if (PtrSCEV != PtrSCEVA) {
+      const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVA);
+      const SCEV *Coeff = TryGetStride(Diff, Stride);
+      if (!Coeff)
+        return std::nullopt;
+      const auto *SC = dyn_cast<SCEVConstant>(Coeff);
+      if (!SC || isa<SCEVCouldNotCompute>(SC))
+        return std::nullopt;
+      if (!SE.getMinusSCEV(PtrSCEV,
+                           SE.getAddExpr(PtrSCEVA, SE.getMulExpr(Stride, SC)))
+               ->isZero())
+        return std::nullopt;
+      Dist = SC->getAPInt().getZExtValue();
+    }
+    // If the strides are not the same or repeated, we can't vectorize.
+    if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
+      return std::nullopt;
+    auto Res = Offsets.emplace(Dist, Cnt);
+    if (!Res.second)
+      return std::nullopt;
+    // Consecutive order if the inserted element is the last one.
+    IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
+    ++Cnt;
+  }
+  if (Offsets.size() != SCEVs.size())
+    return std::nullopt;
+  SortedIndices.clear();
+  if (!IsConsecutive) {
+    // Fill SortedIndices array only if it is non-consecutive.
+    SortedIndices.resize(PointerOps.size());
+    Cnt = 0;
+    for (const std::pair<int64_t, int> &Pair : Offsets) {
+      SortedIndices[Cnt] = Pair.second;
+      ++Cnt;
+    }
+  }
+  if (!Inst)
+    return nullptr;
+  SCEVExpander Expander(SE, DL, "strided-load-vec");
+  return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
+}
+
 /// Checks if the given array of loads can be represented as a vectorized,
 /// scatter or just simple gather.
 static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
@@ -3900,7 +4034,8 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
   // Make sure all loads in the bundle are simple - we can't vectorize
   // atomic or volatile loads.
   PointerOps.clear();
-  PointerOps.resize(VL.size());
+  const unsigned Sz = VL.size();
+  PointerOps.resize(Sz);
   auto *POIter = PointerOps.begin();
   for (Value *V : VL) {
     auto *L = cast<LoadInst>(V);
@@ -3913,10 +4048,15 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
   Order.clear();
   // Check the order of pointer operands or that all pointers are the same.
   bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
+  Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
+  auto *VecTy = FixedVectorType::get(ScalarTy, Sz);
+  if (!IsSorted && Sz > MinProfitableStridedLoads && TTI.isTypeLegal(VecTy) &&
+      TTI.isLegalStridedLoad(VecTy, CommonAlignment) &&
+      calculateRtStride(PointerOps, ScalarTy, DL, SE, Order))
+    return LoadsState::StridedVectorize;
   if (IsSorted || all_of(PointerOps, [&](Value *P) {
         return arePointersCompatible(P, PointerOps.front(), TLI);
       })) {
-    bool IsPossibleStrided = false;
     if (IsSorted) {
       Value *Ptr0;
       Value *PtrN;
@@ -3930,30 +4070,68 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
       std::optional<int> Diff =
           getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
       // Check that the sorted loads are consecutive.
-      if (static_cast<unsigned>(*Diff) == VL.size() - 1)
+      if (static_cast<unsigned>(*Diff) == Sz - 1)
         return LoadsState::Vectorize;
       // Simple check if not a strided access - clear order.
-      IsPossibleStrided = *Diff % (VL.size() - 1) == 0;
+      bool IsPossibleStrided = *Diff % (Sz - 1) == 0;
+      // Try to generate strided load node if:
+      // 1. Targ...
[truncated]

github-actions · 2024-02-01T17:25:06Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Created using spr 1.3.5

preames

Can I ask you to separate the TTI pieces to a separate review? I'd be comfortable LGTMing that with some pretty minimal changes, but can't reasonable review the SLP changes in any depth.

preames · 2024-02-01T17:49:11Z

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

+                                         Align Alignment,
+                                         TTI::TargetCostKind CostKind,
+                                         const Instruction *I = nullptr) const {
+    return CostKind == TTI::TCK_RecipThroughput ? TTI::TCC_Expensive


InstructionCost::getInvalid() is likely a better default here.

For codesize, you could reasonable use TCC_Basic for a legal strided load or store.

I thought about this. Unfortunately, TTI tends to fallback to the default TTI functions in many cases and just returns 1 in many cases for all kinds of costs. If it is ok to override this default behavior here, I will do it.

preames · 2024-02-01T17:51:32Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

@@ -250,6 +256,23 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
    return ST->is64Bit() && !ST->hasVInstructionsI64();
  }

+  bool isLegalStridedLoad(Type *DataType, Align Alignment) {
+    if (!ST->hasVInstructions())


isLegalStridedLoadStore already exists on TLI. You can reuse by legalizing the type to a MVT.

Ok, will do

alexey-bataev · 2024-02-01T18:07:03Z

Can I ask you to separate the TTI pieces to a separate review? I'd be comfortable LGTMing that with some pretty minimal changes, but can't reasonable review the SLP changes in any depth.

I can't, since don't know how to test TTI changes without SLP vectorizer support.

preames · 2024-02-01T18:28:05Z

Can I ask you to separate the TTI pieces to a separate review? I'd be comfortable LGTMing that with some pretty minimal changes, but can't reasonable review the SLP changes in any depth.

I can't, since don't know how to test TTI changes without SLP vectorizer support.

The change would be untested on it's own. I'm explicitly saying I'm willing to approve that as an exception to usual policy.

Partly because if we can get that in, I plan to write a patch which exercises at least the most basic cases through the instruction cost printer pass.

alexey-bataev · 2024-02-01T18:41:27Z

Can I ask you to separate the TTI pieces to a separate review? I'd be comfortable LGTMing that with some pretty minimal changes, but can't reasonable review the SLP changes in any depth.

I can't, since don't know how to test TTI changes without SLP vectorizer support.

The change would be untested on it's own. I'm explicitly saying I'm willing to approve that as an exception to usual policy.

Partly because if we can get that in, I plan to write a patch which exercises at least the most basic cases through the instruction cost printer pass.

Ok, will do

Added basic legality check and cost estimation functions for strided loads and stores. These interfaces will be built upon in #80310. Reviewers: preames Reviewed By: preames Pull Request: #80329

Created using spr 1.3.5

preames · 2024-02-01T22:51:53Z

FYI - #80360 adds testing infrastructure to exercise the TTI hooks.

preames

These comments are trying to be helpful in pointing out bits which might be simplified or split off, but my track record with SLP reviews is not great. Feel free to ignore any or all of these.

preames · 2024-02-01T23:10:28Z

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll

@@ -17,7 +17,7 @@ define i16 @test() {
 ; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP3]], i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
-; CHECK-NEXT:    [[CMP_I178:%.*]] = icmp ult i16 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[CMP_I178:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    br label [[WHILE_BODY_I]]
 ;
 entry:


Unless this is specifically testing something about offsets from null, can you update this test to pass in a pointer argument and index off that?

(Separate change, no review needed.)

For SLP vectorizer it is not important.

Please separate, land, and rebase.

Impossible to separate, the strided loads support affects reordering, which affects these tests

preames · 2024-02-01T23:11:37Z

llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll

@@ -30,7 +30,7 @@ define void @test() {
 ; CHECK-SLP-THRESHOLD:       bb:
 ; CHECK-SLP-THRESHOLD-NEXT:    [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0
 ; CHECK-SLP-THRESHOLD-NEXT:    [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-SLP-THRESHOLD-NEXT:    [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
+; CHECK-SLP-THRESHOLD-NEXT:    [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 12, i64 8, i64 4, i64 0>


Shouldn't this be a strided load with a stride of -4*8?

If what you're aiming for is test stability, can you use a index which doesn't look anything like a strided load?

For X86 target it is supposed as not supported currently, so it just produces masked gather and the order is not important

I'd missed this was an X86 test.

Why do we need this change for an X86 test at all? Shouldn't the default cost of illegal prevent any of the new code for kicking in for X86?

Same reason, as above. Before this patch we did not check, if it is important for the target to treat strided loads as strided or better to continue treating them as masked gathers. Since X86 currently does not support strided loads, the reordering does not kick in and causes these changes.

preames · 2024-02-01T23:13:03Z

llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll

@@ -7,7 +7,7 @@ define i32 @test(ptr noalias %p, ptr noalias %addr) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> <i32 15, i32 13, i32 11, i32 9, i32 7, i32 5, i32 3, i32 1>


Same as last.

Same, TTI for X86 does not support strided loads, so the order is not important

preames · 2024-02-01T23:14:41Z

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll

-; CHECK-NEXT:    store <8 x float> [[TMP11]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>


Can't this reverse become a negative strided load?

It can, planned for the next patch(es), cannot put all the stuff in a single patch

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Created using spr 1.3.5

Added basic legality check and cost estimation functions for strided loads and stores. These interfaces will be built upon in llvm#80310. Reviewers: preames Reviewed By: preames Pull Request: llvm#80329

Created using spr 1.3.5

preames · 2024-02-08T20:54:50Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+            // Iterate through all pointers and check if all distances are
+            // unique multiple of Dist.
+            SmallSet<int, 4> Dists;
+            for (Value *Ptr : PointerOps) {


Are we possibly missing an indirection through Order here?

Ptr0 is PointerOps[Order.front()], but this loop doesn't indirect.

For this loop it is not important. It just checks, that all pointers has proportional dists. The order is important only for the cost/codegen.

preames · 2024-02-08T20:56:43Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+              if (((Dist / Stride) * Stride) != Dist ||
+                  !Dists.insert(Dist).second)
+                break;
+            }


A thought for possible follow up.

If the addresses form a strided access in any order, then we could generate a strided load and a shuffle or a shuffle then a strided store.

I don't understand the shuffle placement parts of the reordering code in SLP, so I don't know if this is something easy to exploit, or not.

Currently it will emit strided load + shuffle. It may also emit the shuffle + store, if the graph rotation shows that it is profitable. Both scenarios are possible, SLP will try to minimize number of shuffles anyway

preames

LGTM

However, this a very weak LGTM. It's more of a "I don't spot any obvious problems" more than "I have any confidence in my ability to catch errors".

Alexey, take that as you will. I know we've got a shortage of reviewers in SLP, so feel free to use your judgement here.

hokein · 2024-02-12T16:01:03Z

After this change, clang crashes on compiling the https://github.com/oneapi-src/oneDNN project.

UPDATE: here is a not-reduced test case.

Stacktrace:

2.      Optimizer
 #0 0x00005627dd41d6a8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) llvm-project/llvm/lib/Support/Unix/Signals.inc:723:13
 #1 0x00005627dd41b7a0 llvm::sys::RunSignalHandlers() llvm-project/llvm/lib/Support/Signals.cpp:106:18
 #2 0x00005627dd41dd48 SignalHandler(int) llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #3 0x00007fd8fb05a510 (/lib/x86_64-linux-gnu/libc.so.6+0x3c510)
 #4 0x00007fd8fb0a80fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007fd8fb05a472 raise ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x00007fd8fb0444b2 abort ./stdlib/abort.c:81:7
 #7 0x00007fd8fb0443d5 _nl_load_domain ./intl/loadmsgcat.c:1177:9
 #8 0x00007fd8fb0533a2 (/lib/x86_64-linux-gnu/libc.so.6+0x353a2)
 #9 0x00005627de99d150 llvm::isa_impl_cl<llvm::InsertElementInst, llvm::Instruction const*>::doit(llvm::Instruction const*) llvm-project/llvm/include/llvm/Support/Casting.h:109:5
#10 0x00005627de99d150 llvm::isa_impl_wrap<llvm::InsertElementInst, llvm::Instruction const*, llvm::Instruction const*>::doit(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:137:12
#11 0x00005627de99d150 llvm::isa_impl_wrap<llvm::InsertElementInst, llvm::Instruction const* const, llvm::Instruction const*>::doit(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:127:12
#12 0x00005627de99d150 llvm::CastIsPossible<llvm::InsertElementInst, llvm::Instruction const*, void>::isPossible(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:255:12
#13 0x00005627de99d150 llvm::CastInfo<llvm::InsertElementInst, llvm::Instruction* const, void>::isPossible(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:509:12
#14 0x00005627de99d150 bool llvm::isa<llvm::InsertElementInst, llvm::Instruction*>(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:549:10
#15 0x00005627de99d150 bool llvm::isa<llvm::InsertElementInst, llvm::StoreInst, llvm::Instruction*>(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:554:10
#16 0x00005627de99d150 llvm::slpvectorizer::BoUpSLP::reorderTopToBottom() llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:4609:13
#17 0x00005627de9f8148 (anonymous namespace)::HorizontalReduction::tryToReduce(llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo const&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:14873:11
#18 0x00005627de9dfe8e llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&)::$_1::operator()(llvm::Instruction*) const llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15699:19
#19 0x00005627de9dfe8e llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15724:30
#20 0x00005627de9e01d9 llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15759:14
#21 0x00005627de9d81fd llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:16317:24
#22 0x00005627de9d5fc0 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:13528:13
#23 0x00005627de9d58bf llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:13459:18
#24 0x00005627de6cc5ed llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#25 0x00005627dcef2459 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManager.h:247:10
#26 0x00005627db43affd llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#27 0x00005627dcef5e2e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/lib/IR/PassManager.cpp:128:23
#28 0x00005627db43ad9d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#29 0x00005627dcef174b llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/include/llvm/IR/PassManager.h:247:10
#30 0x00005627ddb51a67 llvm::SmallPtrSetImplBase::isSmall() const llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:196:33
#31 0x00005627ddb51a67 llvm::SmallPtrSetImplBase::~SmallPtrSetImplBase() llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:84:10
#32 0x00005627ddb51a67 llvm::PreservedAnalyses::~PreservedAnalyses() llvm-project/llvm/include/llvm/IR/Analysis.h:109:7
#33 0x00005627ddb51a67 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>&, std::unique_ptr<llvm::ToolOutputFile, std::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1102:5
#34 0x00005627ddb48de8 (anonymous namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:0:3
#35 0x00005627ddb48de8 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1329:13
#36 0x00005627ddb6754a std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>::~unique_ptr() /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/unique_ptr.h:403:6
#37 0x00005627ddb6754a clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:379:3
#38 0x00005627df5faab6 __gnu_cxx::__normal_iterator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>*, std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>>::__normal_iterator(std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/stl_iterator.h:1077:20
#39 0x00005627df5faab6 std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>::begin() /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/stl_vector.h:871:16
#40 0x00005627df5faab6 void clang::finalize<std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>>(std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>&, clang::Sema const&) llvm-project/clang/include/clang/Sema/TemplateInstCallback.h:54:16
#41 0x00005627df5faab6 clang::ParseAST(clang::Sema&, bool, bool) llvm-project/clang/lib/Parse/ParseAST.cpp:183:3
#42 0x00005627ddf05230 clang::FrontendAction::Execute() llvm-project/clang/lib/Frontend/FrontendAction.cpp:1073:10
#43 0x00005627dde7b6ad llvm::Error::getPtr() const llvm-project/llvm/include/llvm/Support/Error.h:276:42
#44 0x00005627dde7b6ad llvm::Error::operator bool() llvm-project/llvm/include/llvm/Support/Error.h:239:16
#45 0x00005627dde7b6ad clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) llvm-project/clang/lib/Frontend/CompilerInstance.cpp:1057:23

alexey-bataev · 2024-02-12T16:12:52Z

After this change, clang crashes on compiling the https://github.com/oneapi-src/oneDNN project.

UPDATE: here is a not-reduced test case.

Stacktrace:

2.      Optimizer
 #0 0x00005627dd41d6a8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) llvm-project/llvm/lib/Support/Unix/Signals.inc:723:13
 #1 0x00005627dd41b7a0 llvm::sys::RunSignalHandlers() llvm-project/llvm/lib/Support/Signals.cpp:106:18
 #2 0x00005627dd41dd48 SignalHandler(int) llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #3 0x00007fd8fb05a510 (/lib/x86_64-linux-gnu/libc.so.6+0x3c510)
 #4 0x00007fd8fb0a80fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007fd8fb05a472 raise ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x00007fd8fb0444b2 abort ./stdlib/abort.c:81:7
 #7 0x00007fd8fb0443d5 _nl_load_domain ./intl/loadmsgcat.c:1177:9
 #8 0x00007fd8fb0533a2 (/lib/x86_64-linux-gnu/libc.so.6+0x353a2)
 #9 0x00005627de99d150 llvm::isa_impl_cl<llvm::InsertElementInst, llvm::Instruction const*>::doit(llvm::Instruction const*) llvm-project/llvm/include/llvm/Support/Casting.h:109:5
#10 0x00005627de99d150 llvm::isa_impl_wrap<llvm::InsertElementInst, llvm::Instruction const*, llvm::Instruction const*>::doit(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:137:12
#11 0x00005627de99d150 llvm::isa_impl_wrap<llvm::InsertElementInst, llvm::Instruction const* const, llvm::Instruction const*>::doit(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:127:12
#12 0x00005627de99d150 llvm::CastIsPossible<llvm::InsertElementInst, llvm::Instruction const*, void>::isPossible(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:255:12
#13 0x00005627de99d150 llvm::CastInfo<llvm::InsertElementInst, llvm::Instruction* const, void>::isPossible(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:509:12
#14 0x00005627de99d150 bool llvm::isa<llvm::InsertElementInst, llvm::Instruction*>(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:549:10
#15 0x00005627de99d150 bool llvm::isa<llvm::InsertElementInst, llvm::StoreInst, llvm::Instruction*>(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:554:10
#16 0x00005627de99d150 llvm::slpvectorizer::BoUpSLP::reorderTopToBottom() llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:4609:13
#17 0x00005627de9f8148 (anonymous namespace)::HorizontalReduction::tryToReduce(llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo const&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:14873:11
#18 0x00005627de9dfe8e llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&)::$_1::operator()(llvm::Instruction*) const llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15699:19
#19 0x00005627de9dfe8e llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15724:30
#20 0x00005627de9e01d9 llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15759:14
#21 0x00005627de9d81fd llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:16317:24
#22 0x00005627de9d5fc0 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:13528:13
#23 0x00005627de9d58bf llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:13459:18
#24 0x00005627de6cc5ed llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#25 0x00005627dcef2459 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManager.h:247:10
#26 0x00005627db43affd llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#27 0x00005627dcef5e2e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/lib/IR/PassManager.cpp:128:23
#28 0x00005627db43ad9d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#29 0x00005627dcef174b llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/include/llvm/IR/PassManager.h:247:10
#30 0x00005627ddb51a67 llvm::SmallPtrSetImplBase::isSmall() const llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:196:33
#31 0x00005627ddb51a67 llvm::SmallPtrSetImplBase::~SmallPtrSetImplBase() llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:84:10
#32 0x00005627ddb51a67 llvm::PreservedAnalyses::~PreservedAnalyses() llvm-project/llvm/include/llvm/IR/Analysis.h:109:7
#33 0x00005627ddb51a67 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>&, std::unique_ptr<llvm::ToolOutputFile, std::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1102:5
#34 0x00005627ddb48de8 (anonymous namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:0:3
#35 0x00005627ddb48de8 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1329:13
#36 0x00005627ddb6754a std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>::~unique_ptr() /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/unique_ptr.h:403:6
#37 0x00005627ddb6754a clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:379:3
#38 0x00005627df5faab6 __gnu_cxx::__normal_iterator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>*, std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>>::__normal_iterator(std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/stl_iterator.h:1077:20
#39 0x00005627df5faab6 std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>::begin() /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/stl_vector.h:871:16
#40 0x00005627df5faab6 void clang::finalize<std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>>(std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>&, clang::Sema const&) llvm-project/clang/include/clang/Sema/TemplateInstCallback.h:54:16
#41 0x00005627df5faab6 clang::ParseAST(clang::Sema&, bool, bool) llvm-project/clang/lib/Parse/ParseAST.cpp:183:3
#42 0x00005627ddf05230 clang::FrontendAction::Execute() llvm-project/clang/lib/Frontend/FrontendAction.cpp:1073:10
#43 0x00005627dde7b6ad llvm::Error::getPtr() const llvm-project/llvm/include/llvm/Support/Error.h:276:42
#44 0x00005627dde7b6ad llvm::Error::operator bool() llvm-project/llvm/include/llvm/Support/Error.h:239:16
#45 0x00005627dde7b6ad clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) llvm-project/clang/lib/Frontend/CompilerInstance.cpp:1057:23

Thanks, will check it ASAP.

This reverts commit 0940f90 to fix issues reported in #80310.

Added basic support for strided loads support in SLP vectorizer. Supports constant strides only. If the strided load must be reversed, applies -stride to avoid extra reverse shuffle. Reviewers: preames, lukel97 Reviewed By: preames Pull Request: #80310

alexey-bataev · 2024-02-12T19:09:20Z

After this change, clang crashes on compiling the https://github.com/oneapi-src/oneDNN project.

UPDATE: here is a not-reduced test case.

Stacktrace:

2.      Optimizer
 #0 0x00005627dd41d6a8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) llvm-project/llvm/lib/Support/Unix/Signals.inc:723:13
 #1 0x00005627dd41b7a0 llvm::sys::RunSignalHandlers() llvm-project/llvm/lib/Support/Signals.cpp:106:18
 #2 0x00005627dd41dd48 SignalHandler(int) llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #3 0x00007fd8fb05a510 (/lib/x86_64-linux-gnu/libc.so.6+0x3c510)
 #4 0x00007fd8fb0a80fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007fd8fb05a472 raise ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x00007fd8fb0444b2 abort ./stdlib/abort.c:81:7
 #7 0x00007fd8fb0443d5 _nl_load_domain ./intl/loadmsgcat.c:1177:9
 #8 0x00007fd8fb0533a2 (/lib/x86_64-linux-gnu/libc.so.6+0x353a2)
 #9 0x00005627de99d150 llvm::isa_impl_cl<llvm::InsertElementInst, llvm::Instruction const*>::doit(llvm::Instruction const*) llvm-project/llvm/include/llvm/Support/Casting.h:109:5
#10 0x00005627de99d150 llvm::isa_impl_wrap<llvm::InsertElementInst, llvm::Instruction const*, llvm::Instruction const*>::doit(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:137:12
#11 0x00005627de99d150 llvm::isa_impl_wrap<llvm::InsertElementInst, llvm::Instruction const* const, llvm::Instruction const*>::doit(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:127:12
#12 0x00005627de99d150 llvm::CastIsPossible<llvm::InsertElementInst, llvm::Instruction const*, void>::isPossible(llvm::Instruction const* const&) llvm-project/llvm/include/llvm/Support/Casting.h:255:12
#13 0x00005627de99d150 llvm::CastInfo<llvm::InsertElementInst, llvm::Instruction* const, void>::isPossible(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:509:12
#14 0x00005627de99d150 bool llvm::isa<llvm::InsertElementInst, llvm::Instruction*>(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:549:10
#15 0x00005627de99d150 bool llvm::isa<llvm::InsertElementInst, llvm::StoreInst, llvm::Instruction*>(llvm::Instruction* const&) llvm-project/llvm/include/llvm/Support/Casting.h:554:10
#16 0x00005627de99d150 llvm::slpvectorizer::BoUpSLP::reorderTopToBottom() llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:4609:13
#17 0x00005627de9f8148 (anonymous namespace)::HorizontalReduction::tryToReduce(llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo const&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:14873:11
#18 0x00005627de9dfe8e llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&)::$_1::operator()(llvm::Instruction*) const llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15699:19
#19 0x00005627de9dfe8e llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15724:30
#20 0x00005627de9e01d9 llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::TargetTransformInfo*) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:15759:14
#21 0x00005627de9d81fd llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:16317:24
#22 0x00005627de9d5fc0 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:13528:13
#23 0x00005627de9d58bf llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:13459:18
#24 0x00005627de6cc5ed llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#25 0x00005627dcef2459 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManager.h:247:10
#26 0x00005627db43affd llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#27 0x00005627dcef5e2e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/lib/IR/PassManager.cpp:128:23
#28 0x00005627db43ad9d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:90:5
#29 0x00005627dcef174b llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) llvm-project/llvm/include/llvm/IR/PassManager.h:247:10
#30 0x00005627ddb51a67 llvm::SmallPtrSetImplBase::isSmall() const llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:196:33
#31 0x00005627ddb51a67 llvm::SmallPtrSetImplBase::~SmallPtrSetImplBase() llvm-project/llvm/include/llvm/ADT/SmallPtrSet.h:84:10
#32 0x00005627ddb51a67 llvm::PreservedAnalyses::~PreservedAnalyses() llvm-project/llvm/include/llvm/IR/Analysis.h:109:7
#33 0x00005627ddb51a67 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>&, std::unique_ptr<llvm::ToolOutputFile, std::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1102:5
#34 0x00005627ddb48de8 (anonymous namespace)::EmitAssemblyHelper::EmitAssembly(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:0:3
#35 0x00005627ddb48de8 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) llvm-project/clang/lib/CodeGen/BackendUtil.cpp:1329:13
#36 0x00005627ddb6754a std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>::~unique_ptr() /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/unique_ptr.h:403:6
#37 0x00005627ddb6754a clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) llvm-project/clang/lib/CodeGen/CodeGenAction.cpp:379:3
#38 0x00005627df5faab6 __gnu_cxx::__normal_iterator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>*, std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>>::__normal_iterator(std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>* const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/stl_iterator.h:1077:20
#39 0x00005627df5faab6 std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>::begin() /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/stl_vector.h:871:16
#40 0x00005627df5faab6 void clang::finalize<std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>>(std::vector<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>, std::allocator<std::unique_ptr<clang::TemplateInstantiationCallback, std::default_delete<clang::TemplateInstantiationCallback>>>>&, clang::Sema const&) llvm-project/clang/include/clang/Sema/TemplateInstCallback.h:54:16
#41 0x00005627df5faab6 clang::ParseAST(clang::Sema&, bool, bool) llvm-project/clang/lib/Parse/ParseAST.cpp:183:3
#42 0x00005627ddf05230 clang::FrontendAction::Execute() llvm-project/clang/lib/Frontend/FrontendAction.cpp:1073:10
#43 0x00005627dde7b6ad llvm::Error::getPtr() const llvm-project/llvm/include/llvm/Support/Error.h:276:42
#44 0x00005627dde7b6ad llvm::Error::operator bool() llvm-project/llvm/include/llvm/Support/Error.h:239:16
#45 0x00005627dde7b6ad clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) llvm-project/clang/lib/Frontend/CompilerInstance.cpp:1057:23

Must be fixed in the updated version

hokein · 2024-02-12T20:50:25Z

Thank you for the prompt response, and quick fix!

[𝘀𝗽𝗿] initial version

92950af

Created using spr 1.3.5

alexey-bataev requested review from preames and lukel97 February 1, 2024 17:22

llvmbot added backend:RISC-V vectorization llvm:analysis llvm:transforms labels Feb 1, 2024

Fix formatting

8bdb5df

Created using spr 1.3.5

preames reviewed Feb 1, 2024

View reviewed changes

preames mentioned this pull request Feb 1, 2024

[TTI]Add support for strided loads/stores. #80329

Merged

Rebase

bceaada

Created using spr 1.3.5

alexey-bataev changed the title ~~[SLP][TTI]Add support for strided loads.~~ [SLP]Add support for strided loads. Feb 1, 2024

preames reviewed Feb 1, 2024

View reviewed changes

Rebase, remove type legality check

f1e632e

Created using spr 1.3.5

alexey-bataev added 2 commits February 6, 2024 14:39

Rebase, ping!

56f40a8

Created using spr 1.3.5

Rebase, Address comments

5b2e3db

Created using spr 1.3.5

preames reviewed Feb 8, 2024

View reviewed changes

preames approved these changes Feb 9, 2024

View reviewed changes

alexey-bataev merged commit 0940f90 into main Feb 12, 2024
3 of 4 checks passed

alexey-bataev deleted the users/alexey-bataev/spr/slpttiadd-support-for-strided-loads branch February 12, 2024 12:41

alexey-bataev added a commit that referenced this pull request Feb 12, 2024

Revert "[SLP]Add support for strided loads."

6a3a5ca

This reverts commit 0940f90 to fix issues reported in #80310.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SLP]Add support for strided loads. #80310

[SLP]Add support for strided loads. #80310

alexey-bataev commented Feb 1, 2024 •

edited

llvmbot commented Feb 1, 2024 •

edited

llvmbot commented Feb 1, 2024

github-actions bot commented Feb 1, 2024 •

edited

preames left a comment

preames Feb 1, 2024

alexey-bataev Feb 1, 2024

preames Feb 1, 2024

alexey-bataev Feb 1, 2024

alexey-bataev commented Feb 1, 2024

preames commented Feb 1, 2024

alexey-bataev commented Feb 1, 2024

preames commented Feb 1, 2024

preames left a comment

preames Feb 1, 2024

alexey-bataev Feb 1, 2024

preames Feb 7, 2024 •

edited

alexey-bataev Feb 7, 2024

preames Feb 1, 2024

alexey-bataev Feb 1, 2024

preames Feb 7, 2024

alexey-bataev Feb 7, 2024 •

edited

preames Feb 1, 2024

alexey-bataev Feb 1, 2024

preames Feb 1, 2024

alexey-bataev Feb 1, 2024

preames Feb 8, 2024

alexey-bataev Feb 8, 2024

preames Feb 8, 2024

alexey-bataev Feb 8, 2024

preames left a comment

hokein commented Feb 12, 2024 •

edited

alexey-bataev commented Feb 12, 2024

alexey-bataev commented Feb 12, 2024

hokein commented Feb 12, 2024

[SLP]Add support for strided loads. #80310

[SLP]Add support for strided loads. #80310

Conversation

alexey-bataev commented Feb 1, 2024 • edited

llvmbot commented Feb 1, 2024 • edited

llvmbot commented Feb 1, 2024

github-actions bot commented Feb 1, 2024 • edited

preames left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

alexey-bataev commented Feb 1, 2024

preames commented Feb 1, 2024

alexey-bataev commented Feb 1, 2024

preames commented Feb 1, 2024

preames left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

preames Feb 7, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

alexey-bataev Feb 7, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

preames left a comment

Choose a reason for hiding this comment

hokein commented Feb 12, 2024 • edited

alexey-bataev commented Feb 12, 2024

alexey-bataev commented Feb 12, 2024

hokein commented Feb 12, 2024

alexey-bataev commented Feb 1, 2024 •

edited

llvmbot commented Feb 1, 2024 •

edited

github-actions bot commented Feb 1, 2024 •

edited

preames Feb 7, 2024 •

edited

alexey-bataev Feb 7, 2024 •

edited

hokein commented Feb 12, 2024 •

edited