diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp index d03930d9e2d99..c374d328c984a 100644 --- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp @@ -141,10 +141,26 @@ void SCEVDivision::visitAddRecExpr(const SCEVAddRecExpr *Numerator) { if (Ty != StartQ->getType() || Ty != StartR->getType() || Ty != StepQ->getType() || Ty != StepR->getType()) return cannotDivide(Numerator); + + // Infer no-wrap flags for Remainder. + // TODO: Catch more cases. + SCEV::NoWrapFlags NumFlags = Numerator->getNoWrapFlags(); + SCEV::NoWrapFlags RemFlags = SCEV::NoWrapFlags::FlagAnyWrap; + const SCEV *StepNumAbs = + SE.getAbsExpr(Numerator->getStepRecurrence(SE), /*IsNSW=*/false); + const SCEV *StepRAbs = SE.getAbsExpr(StepR, /*IsNSW=*/false); + const Loop *L = Numerator->getLoop(); + + // If abs(StepR) <=u abs(StepNumAbs) and both are loop invariant, propagate + // the from Numerator to Remainder. + if (ScalarEvolution::hasFlags(NumFlags, SCEV::NoWrapFlags::FlagNW) && + SE.isLoopInvariant(StepNumAbs, L) && SE.isLoopInvariant(StepRAbs, L) && + SE.isKnownPredicate(ICmpInst::ICMP_ULE, StepRAbs, StepNumAbs)) + RemFlags = ScalarEvolution::setFlags(RemFlags, SCEV::NoWrapFlags::FlagNW); + Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), - Numerator->getNoWrapFlags()); - Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), - Numerator->getNoWrapFlags()); + SCEV::NoWrapFlags::FlagAnyWrap); + Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), RemFlags); } void SCEVDivision::visitAddExpr(const SCEVAddExpr *Numerator) { diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll index 0512044990163..e77fd79a49b35 100644 --- a/llvm/test/Analysis/Delinearization/fixed_size_array.ll +++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll @@ -163,7 +163,7 @@ exit: ; CHECK: Delinearization on function a_i_2j1_k: ; CHECK: Base offset: %a ; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes. -; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{32,+,1}<%for.k>] +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{32,+,1}<%for.k>] define void @a_i_2j1_k(ptr %a) { entry: br label %for.i.header diff --git a/llvm/test/Analysis/Delinearization/wraps.ll b/llvm/test/Analysis/Delinearization/wraps.ll new file mode 100644 index 0000000000000..fc4935bad9939 --- /dev/null +++ b/llvm/test/Analysis/Delinearization/wraps.ll @@ -0,0 +1,130 @@ +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; In the following case, we don't know the concret value of `m`, so we cannot +; deduce no-wrap behavior for the quotient/remainder divided by `m`. However, +; we can infer `{0,+,1}<%loop>` is nuw and nsw from the induction variable. +; +; for (int i = 0; i < btc; i++) +; a[i * (m + 42)] = 0; + +; CHECK: AccessFunction: {0,+,(42 + %m)}<%loop> +; CHECK-NEXT: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%loop>][{0,+,42}<%loop>] +define void @divide_by_m0(ptr %a, i64 %m, i64 %btc) { +entry: + %stride = add nsw nuw i64 %m, 42 + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] + %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ] + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %i.next = add nsw nuw i64 %i, 1 + %offset.next = add nsw nuw i64 %offset, %stride + %cond = icmp eq i64 %i.next, %btc + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +; In the following case, we don't know the concret value of `m`, so we cannot +; deduce no-wrap behavior for the quotient/remainder divided by `m`. Also, we +; don't infer nsw/nuw from the induction variable in this case. +; +; for (int i = 0; i < btc; i++) +; a[i * (2 * m + 42)] = 0; + +; CHECK: AccessFunction: {0,+,(42 + (2 * %m))}<%loop> +; CHECK-NEXT: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes. +; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][{0,+,42}<%loop>] +define void @divide_by_m2(ptr %a, i64 %m, i64 %btc) { +entry: + %m2 = add nsw nuw i64 %m, %m + %stride = add nsw nuw i64 %m2, 42 + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] + %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ] + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 0, ptr %idx + %i.next = add nsw nuw i64 %i, 1 + %offset.next = add nsw nuw i64 %offset, %stride + %cond = icmp eq i64 %i.next, %btc + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +; In the following case, the `i * 2 * d` is always zero, so it's nsw and nuw. +; However, the quotient divided by `d` is neither nsw nor nuw. +; +; if (d == 0) +; for (unsigned long long i = 0; i != UINT64_MAX; i++) +; a[i * 2 * d] = 42; + +; CHECK: AccessFunction: {0,+,(2 * %d)}<%loop> +; CHECK-NEXT: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes. +; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][0] +define void @divide_by_zero(ptr %a, i64 %d) { +entry: + %guard = icmp eq i64 %d, 0 + br i1 %guard, label %loop.preheader, label %exit + +loop.preheader: + %stride = mul nsw nuw i64 %d, 2 ; since %d is 0, %stride is also 0 + br label %loop + +loop: + %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ] + %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ] + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 42, ptr %idx + %i.next = add nuw i64 %i, 1 + %offset.next = add nsw nuw i64 %offset, %stride + %cond = icmp eq i64 %i.next, -1 + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +; In the following case, the `i * (d + 1)` is always zero, so it's nsw and nuw. +; However, the quotient/remainder divided by `d` is not nsw. +; +; if (d == UINT64_MAX) +; for (unsigned long long i = 0; i != d; i++) +; a[i * (d + 1)] = 42; + +; CHECK: AccessFunction: {0,+,(1 + %d)}<%loop> +; CHECK-NEXT: Base offset: %a +; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%loop>][{0,+,1}<%loop>] +define void @divide_by_minus_one(ptr %a, i64 %d) { +entry: + %guard = icmp eq i64 %d, -1 + br i1 %guard, label %loop.preheader, label %exit + +loop.preheader: + %stride = add nsw i64 %d, 1 ; since %d is -1, %stride is 0 + br label %loop + +loop: + %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ] + %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ] + %idx = getelementptr inbounds i8, ptr %a, i64 %offset + store i8 42, ptr %idx + %i.next = add nuw i64 %i, 1 + %offset.next = add nsw nuw i64 %offset, %stride + %cond = icmp eq i64 %i.next, %d + br i1 %cond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll index 8f94a455d3724..f670136aed750 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll @@ -479,14 +479,16 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, ;; for (int k = 1; k < o; k++) ;; = A[i*m*o + j*o + k] ;; A[i*m*o + j*o + k - 1] = +;; +;; FIXME: Currently fails to infer nsw for the SCEV `{0,+,1}` define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't8' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - consistent anti [0 0 1]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0