diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp index 0c3b02ae09f47..b1edc76f37d09 100644 --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -747,6 +747,20 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE, ArrayRef Sizes, ArrayRef Subscripts, const Value *Ptr) { + // Sizes and Subscripts are as follows: + // + // Sizes: [UNK][S_2]...[S_n] + // Subscripts: [I_1][I_2]...[I_n] + // + // where the size of the outermost dimension is unknown (UNK). + + auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * { + if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/true, A, B)) + return nullptr; + return SE.getMulExpr(A, B); + }; + + // Range check: 0 <= I_k < S_k for k = 2..n. for (size_t I = 1; I < Sizes.size(); ++I) { const SCEV *Size = Sizes[I - 1]; const SCEV *Subscript = Subscripts[I]; @@ -755,6 +769,43 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE, if (!isKnownLessThan(&SE, Subscript, Size)) return false; } + + // The offset computation is as follows: + // + // Offset = I_n + + // S_n * I_{n-1} + + // ... + + // (S_2 * ... * S_n) * I_1 + // + // Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it + // must be injective. To guarantee it, the above calculation must not + // overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n, + // the minimum and maximum values occur in the following cases: + // + // Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1 + // Max = [I_1][S_2-1]...[S_n-1] + // = (S_2 * ... * S_n) * I_1 + + // (S_2 * ... * S_{n-1}) * (S_2 - 1) + + // ... + + // (S_n - 1) + // = (S_2 * ... * S_n) * I_1 + + // (S_2 * ... * S_n) - 1 (can be proven by induction) + // + const SCEV *Prod = SE.getOne(Sizes[0]->getType()); + for (const SCEV *Size : Sizes) { + Prod = MulOverflow(Prod, Size); + if (!Prod) + return false; + } + const SCEV *Min = MulOverflow(Prod, Subscripts[0]); + if (!Min) + return false; + + // Over-approximate Max as Prod * I_1 + Prod (ignoring the -1). + if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/true, Min, + Subscripts[0])) + return false; + return true; } diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll index 9e6a4221f8eda..7e5c5142dccbc 100644 --- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll +++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll @@ -11,7 +11,7 @@ define void @mat_mul(ptr %C, ptr %A, ptr %B, i64 %N) !kernel_arg_addr_space !2 ! ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. ; CHECK-NEXT: ArrayRef[%call][{0,+,1}<%for.inc>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; CHECK-EMPTY: ; CHECK-NEXT: Inst: %tmp5 = load float, ptr %arrayidx4, align 4 ; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc> diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll index e1ad1c55313a4..e5d2806101926 100644 --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll @@ -16,14 +16,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; CHECK-EMPTY: ; CHECK-NEXT: Inst: store double %val, ptr %arrayidx, align 8 ; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j> ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll index d5213e5afb33c..f5f0628ede937 100644 --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll @@ -16,7 +16,7 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i diff --git a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll index 011dc40697cb5..f768002dd9e41 100644 --- a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll +++ b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll @@ -19,14 +19,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; CHECK-EMPTY: ; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx1, align 8 ; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,8}<%for.i>,+,(8 * %n)}<%for.j> ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%n] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<%for.j>][{0,+,1}<%for.i>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i diff --git a/llvm/test/Analysis/Delinearization/validation_large_size.ll b/llvm/test/Analysis/Delinearization/validation_large_size.ll index f78e68f60569a..c3d1f6db64d15 100644 --- a/llvm/test/Analysis/Delinearization/validation_large_size.ll +++ b/llvm/test/Analysis/Delinearization/validation_large_size.ll @@ -1,12 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes='print' --delinearize-use-fixed-size-array-heuristic -disable-output 2>&1 | FileCheck %s -; FIXME: When considering an array as a function from subcripts to addresses, -; it should be injective. That is, different subscript tuples should map to -; different addresses. Currently, delinearization doesn't guarantee this -; property, especially when the inferred array size is very large so that the -; product of dimensions may overflow. The delinearization validation should -; consider such cases as invalid. +; When considering an array as a function from subcripts to addresses, it +; should be injective. That is, different subscript tuples should map to +; different addresses. ; for (i = 0; i < (1ULL << 60); i++) ; for (j = 0; j < 256; j++) @@ -23,7 +20,7 @@ define void @large_size_fixed(ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][256] with elements of 1 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i.header @@ -75,7 +72,7 @@ define void @large_size_parametric(i64 %n, i64 %m, i64 %o, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 1 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{0,+,1}<%for.k.header>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: %guard.i = icmp sgt i64 %n, 0 @@ -134,3 +131,44 @@ for.i.latch: exit: ret void } + +; for (i = 0; i < (1 << 54); i++) +; for (j = 0; j < 256; j++) +; A[i*256 + j] = 0; +; +; We also need to consider the element size when validation. +; +define void @elementsize_cause_ovfl(ptr %A) { +; CHECK-LABEL: 'elementsize_cause_ovfl' +; CHECK-NEXT: Inst: store i64 0, ptr %gep, align 4 +; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,2048}<%for.i.header>,+,8}<%for.j> +; CHECK-NEXT: Base offset: %A +; CHECK-NEXT: ArrayDecl[UnknownSize][256] with elements of 8 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j>] +; CHECK-NEXT: Delinearization validation: Failed +; +entry: + br label %for.i.header + +for.i.header: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.latch ] + %i.mul = mul i64 %i, 256 + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i.header ], [ %j.inc, %for.j ] + %offset = add i64 %i.mul, %j + %gep = getelementptr i64, ptr %A, i64 %offset + store i64 0, ptr %gep + %j.inc = add i64 %j, 1 + %ec.j = icmp eq i64 %j.inc, 256 + br i1 %ec.j, label %for.i.latch, label %for.j + +for.i.latch: + %i.inc = add i64 %i, 1 + %ec.i = icmp eq i64 %i.inc, 18014398509481984 + br i1 %ec.i, label %exit, label %for.i.header + +exit: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll index 8f94a455d3724..130b9930cfdf5 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll @@ -13,11 +13,11 @@ target triple = "thumbv8m.main-arm-none-eabi" define void @t1(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't1' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - consistent anti [0 0 0|<]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 @@ -78,7 +78,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t2(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't2' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -145,7 +145,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t3(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't3' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -212,7 +212,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t4(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't4' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -279,7 +279,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t5(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't5' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -346,11 +346,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t6(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't6' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - consistent anti [-1 0 0]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 @@ -414,11 +414,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t7(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't7' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - consistent anti [1 0 0]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 @@ -482,11 +482,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't8' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - consistent anti [0 0 1]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll index 069a540ea0295..0ff9c05c884ff 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll @@ -100,7 +100,7 @@ define void @linearized_accesses(i64 %n, i64 %m, i64 %o, ptr %A) { ; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4 ; CHECK-NEXT: da analyze - output [* * *|<]! ; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: br label %for.i diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll index 19cef4537a769..16e0e7bccaaf5 100644 --- a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll @@ -536,9 +536,13 @@ for.end: ; preds = %for.body ;; A[i] = 0; define void @strong11(ptr %A) nounwind uwtable ssp { -; CHECK-LABEL: 'strong11' -; CHECK-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - consistent output [0 S]! +; CHECK-ALL-LABEL: 'strong11' +; CHECK-ALL-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4 +; CHECK-ALL-NEXT: da analyze - none! +; +; CHECK-STRONG-SIV-LABEL: 'strong11' +; CHECK-STRONG-SIV-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4 +; CHECK-STRONG-SIV-NEXT: da analyze - consistent output [0 S]! ; entry: br label %for.cond1.preheader diff --git a/llvm/test/Transforms/LICM/lnicm.ll b/llvm/test/Transforms/LICM/lnicm.ll index 814f964666305..e331ab7d39e83 100644 --- a/llvm/test/Transforms/LICM/lnicm.ll +++ b/llvm/test/Transforms/LICM/lnicm.ll @@ -3,6 +3,9 @@ ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM +; XFAIL: * +; Loop interchange currently fails due to a failure in dependence analysis. + ; This test represents the following function: ; void test(int n, int m, int x[m][n], int y[n], int *z) { ; for (int k = 0; k < n; k++) {