-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Add runtime stride support for strided loads. #81517
[SLP]Add runtime stride support for strided loads. #81517
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesAdded support for runtime strides. Full diff: https://github.com/llvm/llvm-project/pull/81517.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c54d065cac6382..8fa32035d172d5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -87,6 +87,7 @@
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -3890,6 +3891,122 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
});
}
+/// Checks if the provided list of pointers \p Pointers represents the strided
+/// pointers for type ElemTy. If they are not, std::nullopt is returned.
+/// Otherwise, if \p Inst is not specified, just initialized optional value is
+/// returned to show that the pointers represent strided pointers. If \p Inst
+/// specified, the runtime stride is materialized before the given \p Inst.
+/// \returns std::nullopt if the pointers are not pointers with the runtime
+/// stride, nullptr or actual stride value, otherwise.
+static std::optional<Value *>
+calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ Instruction *Inst = nullptr) {
+ SmallVector<const SCEV *> SCEVs;
+ const SCEV *PtrSCEVA = nullptr;
+ const SCEV *PtrSCEVB = nullptr;
+ for (Value *Ptr : PointerOps) {
+ const SCEV *PtrSCEV = SE.getSCEV(Ptr);
+ if (!PtrSCEV)
+ return std::nullopt;
+ SCEVs.push_back(PtrSCEV);
+ if (!PtrSCEVA && !PtrSCEVB) {
+ PtrSCEVA = PtrSCEVB = PtrSCEV;
+ continue;
+ }
+ const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVA);
+ if (!Diff || isa<SCEVCouldNotCompute>(Diff))
+ return std::nullopt;
+ if (Diff->isNonConstantNegative()) {
+ PtrSCEVA = PtrSCEV;
+ continue;
+ }
+ const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVB, PtrSCEV);
+ if (!Diff1 || isa<SCEVCouldNotCompute>(Diff1))
+ return std::nullopt;
+ if (Diff1->isNonConstantNegative()) {
+ PtrSCEVB = PtrSCEV;
+ continue;
+ }
+ }
+ const SCEV *Stride = SE.getMinusSCEV(PtrSCEVB, PtrSCEVA);
+ if (!Stride)
+ return std::nullopt;
+ int Size = DL.getTypeStoreSize(ElemTy);
+ auto TryGetStride = [&](const SCEV *Dist,
+ const SCEV *Multiplier) -> const SCEV * {
+ if (const auto *M = dyn_cast<SCEVMulExpr>(Dist)) {
+ if (M->getOperand(0) == Multiplier)
+ return M->getOperand(1);
+ if (M->getOperand(1) == Multiplier)
+ return M->getOperand(0);
+ return nullptr;
+ }
+ if (Multiplier == Dist)
+ return SE.getConstant(Dist->getType(), 1);
+ return SE.getUDivExactExpr(Dist, Multiplier);
+ };
+ if (Size != 1 || SCEVs.size() > 2) {
+ const SCEV *Sz =
+ SE.getConstant(Stride->getType(), Size * (SCEVs.size() - 1));
+ Stride = TryGetStride(Stride, Sz);
+ if (!Stride)
+ return std::nullopt;
+ }
+ if (!Stride || isa<SCEVConstant>(Stride))
+ return std::nullopt;
+ // Iterate through all pointers and check if all distances are
+ // unique multiple of Dist.
+ using DistOrdPair = std::pair<int64_t, int>;
+ auto Compare = llvm::less_first();
+ std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
+ int Cnt = 0;
+ bool IsConsecutive = true;
+ for (const SCEV *PtrSCEV : SCEVs) {
+ unsigned Dist = 0;
+ if (PtrSCEV != PtrSCEVA) {
+ const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVA);
+ const SCEV *Coeff = TryGetStride(Diff, Stride);
+ if (!Coeff)
+ return std::nullopt;
+ const auto *SC = dyn_cast<SCEVConstant>(Coeff);
+ if (!SC || isa<SCEVCouldNotCompute>(SC))
+ return std::nullopt;
+ if (!SE.getMinusSCEV(PtrSCEV,
+ SE.getAddExpr(PtrSCEVA, SE.getMulExpr(Stride, SC)))
+ ->isZero())
+ return std::nullopt;
+ Dist = SC->getAPInt().getZExtValue();
+ }
+ // If the strides are not the same or repeated, we can't vectorize.
+ if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
+ return std::nullopt;
+ auto Res = Offsets.emplace(Dist, Cnt);
+ if (!Res.second)
+ return std::nullopt;
+ // Consecutive order if the inserted element is the last one.
+ IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
+ ++Cnt;
+ }
+ if (Offsets.size() != SCEVs.size())
+ return std::nullopt;
+ SortedIndices.clear();
+ if (!IsConsecutive) {
+ // Fill SortedIndices array only if it is non-consecutive.
+ SortedIndices.resize(PointerOps.size());
+ Cnt = 0;
+ for (const std::pair<int64_t, int> &Pair : Offsets) {
+ SortedIndices[Cnt] = Pair.second;
+ ++Cnt;
+ }
+ }
+ if (!Inst)
+ return nullptr;
+ SCEVExpander Expander(SE, DL, "strided-load-vec");
+ return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
+}
+
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
@@ -3927,6 +4044,11 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
auto *VecTy = FixedVectorType::get(ScalarTy, Sz);
// Check the order of pointer operands or that all pointers are the same.
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
+ Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
+ if (!IsSorted && Sz > MinProfitableStridedLoads && TTI.isTypeLegal(VecTy) &&
+ TTI.isLegalStridedLoadStore(VecTy, CommonAlignment) &&
+ calculateRtStride(PointerOps, ScalarTy, DL, SE, Order))
+ return LoadsState::StridedVectorize;
if (IsSorted || all_of(PointerOps, [&](Value *P) {
return arePointersCompatible(P, PointerOps.front(), TLI);
})) {
@@ -11645,10 +11767,30 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
std::optional<int> Diff = getPointersDiff(
VL0->getType(), Ptr0, VL0->getType(), PtrN, *DL, *SE);
Type *StrideTy = DL->getIndexType(PO->getType());
- int Stride = *Diff / (static_cast<int>(E->Scalars.size()) - 1);
- Value *StrideVal =
- ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
- DL->getTypeAllocSize(ScalarTy));
+ Value *StrideVal;
+ if (Diff) {
+ int Stride = *Diff / (static_cast<int>(E->Scalars.size()) - 1);
+ StrideVal =
+ ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
+ DL->getTypeAllocSize(ScalarTy));
+ } else {
+ SmallVector<Value *> PointerOps(E->Scalars.size(), nullptr);
+ transform(E->Scalars, PointerOps.begin(), [](Value *V) {
+ return cast<LoadInst>(V)->getPointerOperand();
+ });
+ OrdersType Order;
+ std::optional<Value *> Stride =
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order,
+ &*Builder.GetInsertPoint());
+ Value *NewStride =
+ Builder.CreateIntCast(*Stride, StrideTy, /*isSigned=*/true);
+ StrideVal = Builder.CreateMul(
+ NewStride,
+ ConstantInt::get(
+ StrideTy,
+ (IsReverseOrder ? -1 : 1) *
+ static_cast<int>(DL->getTypeAllocSize(ScalarTy))));
+ }
Align CommonAlignment = computeCommonAlignment<LoadInst>(E->Scalars);
auto *Inst = Builder.CreateIntrinsic(
Intrinsic::experimental_vp_strided_load,
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
index 4b0b41970bbb4d..03acc0009fb04c 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
@@ -78,67 +78,13 @@ define void @test1(ptr %p, ptr noalias %s, i32 %stride) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
-; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
-; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
-; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
-; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
-; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
-; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
-; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
-; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
-; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
-; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
-; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
-; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
-; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
-; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
-; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
-; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
-; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
-; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
-; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
-; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
-; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
-; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
-; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
-; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
-; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
-; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
-; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
-; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
-; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
-; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
-; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
-; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
-; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
-; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
-; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
-; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
-; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
-; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
-; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
-; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
-; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
-; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
-; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
-; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
-; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
-; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
-; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
-; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
-; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
-; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
-; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
-; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
-; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
-; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
-; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
-; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
-; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -215,38 +161,12 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
-; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
-; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
-; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
-; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
-; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
-; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
-; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
-; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
-; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
-; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
-; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
-; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
-; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
-; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
-; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
-; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
-; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
-; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
-; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0
-; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[I1]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I3]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I5]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I7]], i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I9]], i32 4
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I11]], i32 5
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I13]], i32 6
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[I15]], i32 7
-; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <8 x float> [[TMP8]], [[TMP0]]
-; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: ret void
;
entry:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ping!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Inline comments - sorry, these had actually been made a bit ago, but I apparently forgot to submit the review so they were still pending.
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVA); | ||
if (!Diff || isa<SCEVCouldNotCompute>(Diff)) | ||
return std::nullopt; | ||
if (Diff->isNonConstantNegative()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is isNonConstant part? Why not isKnownNegative?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I'll comment that I find the guess and check style logic used in this patch to be very confusing without clear comments as to exactly what's going on. I think you could make this much clearer if you used appropriate variable names, or at least some clear comments about the point where you transfer from best effort guessing to checking your result and relying on it's correctness.
Renamed the variables, added some more comments |
Created using spr 1.3.5
Added support for runtime strides.