Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions llvm/lib/Analysis/Delinearization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,20 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
ArrayRef<const SCEV *> Sizes,
ArrayRef<const SCEV *> Subscripts,
const Value *Ptr) {
// Sizes and Subscripts are as follows:
//
// Sizes: [UNK][S_2]...[S_n]
// Subscripts: [I_1][I_2]...[I_n]
//
// where the size of the outermost dimension is unknown (UNK).

auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/true, A, B))
return nullptr;
return SE.getMulExpr(A, B);
};

// Range check: 0 <= I_k < S_k for k = 2..n.
for (size_t I = 1; I < Sizes.size(); ++I) {
const SCEV *Size = Sizes[I - 1];
const SCEV *Subscript = Subscripts[I];
Expand All @@ -755,6 +769,43 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
if (!isKnownLessThan(&SE, Subscript, Size))
return false;
}

// The offset computation is as follows:
//
// Offset = I_n +
// S_n * I_{n-1} +
// ... +
// (S_2 * ... * S_n) * I_1
//
// Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it
// must be injective. To guarantee it, the above calculation must not
// overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n,
// the minimum and maximum values occur in the following cases:
//
// Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why isn't Min just 0 (I_1 == 0)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because DA appears to allow the outermost subscript to be negative, e.g.,

;; for (long int i = 0; i < 100; i++)
;; for (long int j = 0; j < 100; j++) {
;; A[3*i - 18][22 - i][2*i + j] = i;
;; *B++ = A[i][i][3*i + j];
. That said, I don't know if this is important or not.

// Max = [I_1][S_2-1]...[S_n-1]
// = (S_2 * ... * S_n) * I_1 +
// (S_2 * ... * S_{n-1}) * (S_2 - 1) +
// ... +
// (S_n - 1)
// = (S_2 * ... * S_n) * I_1 +
// (S_2 * ... * S_n) - 1 (can be proven by induction)
//
const SCEV *Prod = SE.getOne(Sizes[0]->getType());
for (const SCEV *Size : Sizes) {
Prod = MulOverflow(Prod, Size);
if (!Prod)
return false;
}
const SCEV *Min = MulOverflow(Prod, Subscripts[0]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the first dimension, it takes the concrete subscript, for every other dimension it assumes the largest possible subscript. Would we get a better estimate combining both sources of knowledge by the max of those (SCEVMaxExpr(Subscripts[i],Size[i]-1))?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure until we try it, but once a SCEVUnknown gets in there, I feel like it probably won't change much.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the following implementation match your intention?

kasuga-fj@e8f6f57

As for the existing regression tests, their results have not changed.

if (!Min)
return false;

// Over-approximate Max as Prod * I_1 + Prod (ignoring the -1).
if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/true, Min,
Subscripts[0]))
return false;

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define void @mat_mul(ptr %C, ptr %A, ptr %B, i64 %N) !kernel_arg_addr_space !2 !
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
; CHECK-EMPTY:
; CHECK-NEXT: Inst: %tmp5 = load float, ptr %arrayidx4, align 4
; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
; CHECK-EMPTY:
; CHECK-NEXT: Inst: store double %val, ptr %arrayidx, align 8
; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
; CHECK-EMPTY:
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx1, align 8
; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,8}<%for.i>,+,(8 * %n)}<%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%n] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.i>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i
Expand Down
54 changes: 46 additions & 8 deletions llvm/test/Analysis/Delinearization/validation_large_size.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes='print<delinearization>' --delinearize-use-fixed-size-array-heuristic -disable-output 2>&1 | FileCheck %s

; FIXME: When considering an array as a function from subcripts to addresses,
; it should be injective. That is, different subscript tuples should map to
; different addresses. Currently, delinearization doesn't guarantee this
; property, especially when the inferred array size is very large so that the
; product of dimensions may overflow. The delinearization validation should
; consider such cases as invalid.
; When considering an array as a function from subcripts to addresses, it
; should be injective. That is, different subscript tuples should map to
; different addresses.

; for (i = 0; i < (1ULL << 60); i++)
; for (j = 0; j < 256; j++)
Expand All @@ -23,7 +20,7 @@ define void @large_size_fixed(ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][256] with elements of 1 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i.header
Expand Down Expand Up @@ -75,7 +72,7 @@ define void @large_size_parametric(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 1 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k.header>]
; CHECK-NEXT: Delinearization validation: Succeeded
; CHECK-NEXT: Delinearization validation: Failed
;
entry:
%guard.i = icmp sgt i64 %n, 0
Expand Down Expand Up @@ -134,3 +131,44 @@ for.i.latch:
exit:
ret void
}

; for (i = 0; i < (1 << 54); i++)
; for (j = 0; j < 256; j++)
; A[i*256 + j] = 0;
;
; We also need to consider the element size when validation.
;
define void @elementsize_cause_ovfl(ptr %A) {
; CHECK-LABEL: 'elementsize_cause_ovfl'
; CHECK-NEXT: Inst: store i64 0, ptr %gep, align 4
; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,2048}<%for.i.header>,+,8}<%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][256] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j>]
; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i.header

for.i.header:
%i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.latch ]
%i.mul = mul i64 %i, 256
br label %for.j

for.j:
%j = phi i64 [ 0, %for.i.header ], [ %j.inc, %for.j ]
%offset = add i64 %i.mul, %j
%gep = getelementptr i64, ptr %A, i64 %offset
store i64 0, ptr %gep
%j.inc = add i64 %j, 1
%ec.j = icmp eq i64 %j.inc, 256
br i1 %ec.j, label %for.i.latch, label %for.j

for.i.latch:
%i.inc = add i64 %i, 1
%ec.i = icmp eq i64 %i.inc, 18014398509481984
br i1 %ec.i, label %exit, label %for.i.header

exit:
ret void
}
32 changes: 16 additions & 16 deletions llvm/test/Analysis/DependenceAnalysis/DADelin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ target triple = "thumbv8m.main-arm-none-eabi"
define void @t1(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't1'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - consistent anti [0 0 0|<]!
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
Expand Down Expand Up @@ -78,7 +78,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t2(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't2'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
Expand Down Expand Up @@ -145,7 +145,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t3(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't3'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
Expand Down Expand Up @@ -212,7 +212,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t4(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't4'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
Expand Down Expand Up @@ -279,7 +279,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t5(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't5'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
Expand Down Expand Up @@ -346,11 +346,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t6(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't6'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - consistent anti [-1 0 0]!
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
Expand Down Expand Up @@ -414,11 +414,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t7(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't7'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - consistent anti [1 0 0]!
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
Expand Down Expand Up @@ -482,11 +482,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't8'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - consistent anti [0 0 1]!
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ define void @linearized_accesses(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4
; CHECK-NEXT: da analyze - output [* * *|<]!
; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
br label %for.i
Expand Down
10 changes: 7 additions & 3 deletions llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
Original file line number Diff line number Diff line change
Expand Up @@ -536,9 +536,13 @@ for.end: ; preds = %for.body
;; A[i] = 0;

define void @strong11(ptr %A) nounwind uwtable ssp {
; CHECK-LABEL: 'strong11'
; CHECK-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - consistent output [0 S]!
; CHECK-ALL-LABEL: 'strong11'
; CHECK-ALL-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
; CHECK-ALL-NEXT: da analyze - none!
;
; CHECK-STRONG-SIV-LABEL: 'strong11'
; CHECK-STRONG-SIV-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
; CHECK-STRONG-SIV-NEXT: da analyze - consistent output [0 S]!
;
entry:
br label %for.cond1.preheader
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/Transforms/LICM/lnicm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM
; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM

; XFAIL: *
; Loop interchange currently fails due to a failure in dependence analysis.

; This test represents the following function:
; void test(int n, int m, int x[m][n], int y[n], int *z) {
; for (int k = 0; k < n; k++) {
Expand Down