Skip to content

Commit

Permalink
[SCEV] Fix nsw flags for GEP expressions
Browse files Browse the repository at this point in the history
The SCEV code for constructing GEP expressions currently assumes
that the addition of the base and all the offsets is nsw if the GEP
is inbounds. While the addition of the offsets is indeed nsw, the
addition to the base address is not, as the base address is
interpreted as an unsigned value.

Fix the GEP expression code to not assume nsw for the base+offset
calculation. However, do assume nuw if we know that the offset is
non-negative. With this, we use the same behavior as the
construction of GEP addrecs does. (Modulo the fact that we
disregard SCEV unification, as the pre-existing FIXME points out).

Differential Revision: https://reviews.llvm.org/D90648
  • Loading branch information
nikic committed Nov 13, 2020
1 parent df84941 commit f3124a4
Show file tree
Hide file tree
Showing 17 changed files with 89 additions and 74 deletions.
26 changes: 18 additions & 8 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Expand Up @@ -3437,20 +3437,20 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// flow and the no-overflow bits may not be valid for the expression in any
// context. This can be fixed similarly to how these flags are handled for
// adds.
SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
: SCEV::FlagAnyWrap;
SCEV::NoWrapFlags OffsetWrap =
GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;

Type *CurTy = GEP->getType();
bool FirstIter = true;
SmallVector<const SCEV *, 4> AddOps{BaseExpr};
SmallVector<const SCEV *, 4> Offsets;
for (const SCEV *IndexExpr : IndexExprs) {
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(CurTy)) {
// For a struct, add the member offset.
ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
unsigned FieldNo = Index->getZExtValue();
const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
AddOps.push_back(FieldOffset);
Offsets.push_back(FieldOffset);

// Update CurTy to the type of the field at Index.
CurTy = STy->getTypeAtIndex(Index);
Expand All @@ -3470,13 +3470,23 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);

// Multiply the index by the element size to compute the element offset.
const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
AddOps.push_back(LocalOffset);
const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap);
Offsets.push_back(LocalOffset);
}
}

// Add the base and all the offsets together.
return getAddExpr(AddOps, Wrap);
// Handle degenerate case of GEP without offsets.
if (Offsets.empty())
return BaseExpr;

// Add the offsets together, assuming nsw if inbounds.
const SCEV *Offset = getAddExpr(Offsets, OffsetWrap);
// Add the base address and the offset. We cannot use the nsw flag, as the
// base address is unsigned. However, if we know that the offset is
// non-negative, we can use nuw.
SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset)
? SCEV::FlagNUW : SCEV::FlagAnyWrap;
return getAddExpr(BaseExpr, Offset, BaseWrap);
}

std::tuple<SCEV *, FoldingSetNodeID, void *>
Expand Down
Expand Up @@ -46,10 +46,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; CHECK: Expressions re-written:
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom:
; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nuw>
; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body>
; CHECK-NEXT: [PSE] %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11:
; CHECK-NEXT: ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
; CHECK-NEXT: ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nuw>
; CHECK-NEXT: --> {%b,+,4}<%for.body>
define void @test1(i64 %x, i32* %a, i32* %b) {
entry:
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
Expand Up @@ -97,11 +97,11 @@ for.end: ; preds = %for.body
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: %c High: (80 + %c))
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
; CHECK-NEXT: Member: {(2 + %c)<nuw>,+,4}
; CHECK-NEXT: Member: {%c,+,4}
; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (42 + %a))
; CHECK-NEXT: Member: {(2 + %a)<nsw>,+,2}
; CHECK-NEXT: Member: {(2 + %a)<nuw>,+,2}
; CHECK-NEXT: Member: {%a,+,2}
; CHECK-NEXT: Group {{.*}}[[TWO]]:
; CHECK-NEXT: (Low: %b High: (40 + %b))
Expand Down Expand Up @@ -169,7 +169,7 @@ for.end: ; preds = %for.body
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: %c High: (80 + %c))
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
; CHECK-NEXT: Member: {(2 + %c)<nuw>,+,4}
; CHECK-NEXT: Member: {%c,+,4}
; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (42 + %a))
Expand Down Expand Up @@ -247,8 +247,8 @@ for.end: ; preds = %for.body
; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
; CHECK-NEXT: Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (10000 + (2 * %offset) + %a))
; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<nw><%for.body>
; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (10000 + %a))
; CHECK-NEXT: Member: {%a,+,2}<nw><%for.body>
Expand Down
Expand Up @@ -58,8 +58,8 @@ for.end: ; preds = %for.body

; Here it is not obvious what the limits are, since 'step' could be negative.

; CHECK: Low: ((60000 + %a)<nsw> umin (60000 + (-40000 * %step) + %a))
; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a))
; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a)))

define void @g(i64 %step) {
entry:
Expand Down
Expand Up @@ -365,7 +365,7 @@ for.end: ; preds = %for.body
; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>

; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)<nsw>
; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>

; LV-LABEL: f5
Expand Down
Expand Up @@ -29,7 +29,7 @@ define i32 @d(i32 %base) {
; CHECK-NEXT: %idxprom = sext i32 %f.0 to i64
; CHECK-NEXT: --> {(sext i32 %base to i64),+,1}<nsw><%for.cond> U: [-2147483648,-9223372036854775808) S: [-2147483648,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds [1 x [1 x i8]], [1 x [1 x i8]]* %e, i64 0, i64 %idxprom
; CHECK-NEXT: --> {((sext i32 %base to i64) + %e)<nsw>,+,1}<nsw><%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
; CHECK-NEXT: --> {((sext i32 %base to i64) + %e),+,1}<nw><%for.cond> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Computable }
; CHECK-NEXT: %1 = load i32*, i32** @c, align 8
; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
; CHECK-NEXT: %sub.ptr.lhs.cast = ptrtoint i32* %1 to i64
Expand All @@ -39,7 +39,7 @@ define i32 @d(i32 %base) {
; CHECK-NEXT: %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4
; CHECK-NEXT: --> %sub.ptr.div U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
; CHECK-NEXT: %arrayidx1 = getelementptr inbounds [1 x i8], [1 x i8]* %arrayidx, i64 0, i64 %sub.ptr.div
; CHECK-NEXT: --> ({((sext i32 %base to i64) + %e)<nsw>,+,1}<nsw><%for.cond> + %sub.ptr.div)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
; CHECK-NEXT: --> ({((sext i32 %base to i64) + %e),+,1}<nw><%for.cond> + %sub.ptr.div) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
; CHECK-NEXT: %2 = load i8, i8* %arrayidx1, align 1
; CHECK-NEXT: --> %2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
; CHECK-NEXT: %conv = sext i8 %2 to i32
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Analysis/ScalarEvolution/load.ll
Expand Up @@ -17,11 +17,11 @@ define i32 @test1() nounwind readnone {
; CHECK-NEXT: %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,50) S: [0,50) Exits: 49 LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* @arr1, i32 0, i32 %i.03
; CHECK-NEXT: --> {@arr1,+,4}<nsw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr1) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: --> {@arr1,+,4}<nuw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr1) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4
; CHECK-NEXT: --> %0 U: full-set S: full-set Exits: 50 LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %arrayidx1 = getelementptr inbounds [50 x i32], [50 x i32]* @arr2, i32 0, i32 %i.03
; CHECK-NEXT: --> {@arr2,+,4}<nsw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr2) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: --> {@arr2,+,4}<nuw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr2) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %1 = load i32, i32* %arrayidx1, align 4
; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %add = add i32 %0, %sum.04
Expand Down Expand Up @@ -74,7 +74,7 @@ define i32 @test2() nounwind uwtable readonly {
; CHECK-NEXT: %n.01 = phi %struct.ListNode* [ bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node5 to %struct.ListNode*), %entry ], [ %1, %for.body ]
; CHECK-NEXT: --> %n.01 U: full-set S: full-set Exits: @node1 LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %i = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 1
; CHECK-NEXT: --> (4 + %n.01)<nsw> U: [-2147483644,-2147483648) S: [-2147483644,-2147483648) Exits: (4 + @node1)<nuw><nsw> LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: --> (4 + %n.01)<nuw> U: [4,0) S: [4,0) Exits: (4 + @node1)<nuw><nsw> LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %0 = load i32, i32* %i, align 4
; CHECK-NEXT: --> %0 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %add = add nsw i32 %0, %sum.02
Expand Down

0 comments on commit f3124a4

Please sign in to comment.