Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 52 additions & 12 deletions llvm/test/Transforms/LoopUnrollAndJam/dependencies_multidims.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
; RUN: opt -da-disable-delinearization-checks -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
; RUN: opt -da-disable-delinearization-checks -aa-pipeline=basic-aa -passes='loop-unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

; CHECK-LABEL: sub_sub_less
; CHECK: %j = phi
; CHECK-NOT: %j.1 = phi
;
; sub_sub_less should NOT be unroll-and-jammed due to a loop-carried dependency.
; Memory accesses:
; - A[i][j] = 1 (write to current iteration)
; - A[i+1][j-1] = add (write to next i iteration, previous j iteration)
; The dependency: A[i+1][j-1] from iteration (i,j) may conflict with A[i'][j']
; from a later iteration when i'=i+1 and j'=j-1, creating a backward dependency
; in the j dimension that prevents safe unroll-and-jam.
define void @sub_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
Expand All @@ -16,7 +22,7 @@ for.outer:
br label %for.inner

for.inner:
%j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
%j = phi i32 [ %add6, %for.inner ], [ 1, %for.outer ]
%sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
%arrayidx5 = getelementptr inbounds i32, ptr %B, i32 %j
%0 = load i32, ptr %arrayidx5, align 4
Expand Down Expand Up @@ -47,6 +53,14 @@ cleanup:
; CHECK: %j.1 = phi
; CHECK: %j.2 = phi
; CHECK: %j.3 = phi
;
; sub_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
; Memory accesses:
; - A[i][j] = 1 (write to current iteration)
; - A[i+1][j] = add (write to next i iteration, same j iteration)
; No dependency conflict: A[i+1][j] from iteration (i,j) doesn't conflict with
; any A[i'][j'] from unrolled j iterations since j' values are different and
; i+1 from current doesn't overlap with i' from unrolled iterations.
Comment on lines +61 to +63
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sum = 0;
for (int i = 0; i < N; ++i) // N > 0
  for (int j = 0; j < N; ++j) 
    sum += i * B[j];
    A[i][j] = 1;
    A[i+1][j] = sum;

unrolled j iterations

The i (outer loop) iterations are unrolled with unroll-and-jam, not the j iterations.

since j' values are different and i+1 from current doesn't overlap with i' from unrolled iteration

A[i+1][j] and A[i'][j] with i'==i+1 (next iteration that will be in the same body ofter unroll-and-jam) do point to the same memory, so there seems to be a hazard between the iterations. It is just there is no iteration from [i,j+1] to [i,N-1], and [i+1,0] to [i+1,j-1] (iterations that execute between [i,j] and [i',j] in the original loop) do access A[i+1][j] or A[i+2][j]. Maybe this is saying this, but I have difficulty understanding this explanation.

Factor-2 unroll:

sum0 = 0;
sum1 = 0;
for (int i = 0; i < N; i+=2)
  for (int j = 0; j < N; ++j) 
    sum0 += i * B[j];
    A[i][j] = 1;
    A[i + 1][j] = sum0;
    
    sum1 += (i+1) * B[j];
    A[i+1][j] = 1;
    A[i+2][j] = sum1;

define void @sub_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
Expand Down Expand Up @@ -88,6 +102,14 @@ cleanup:
; CHECK: %j.1 = phi
; CHECK: %j.2 = phi
; CHECK: %j.3 = phi
;
; sub_sub_more SHOULD be unroll-and-jammed (count=4) as it's safe.
; Memory accesses:
; - A[i][j] = 1 (write to current iteration)
; - A[i+1][j+1] = add (write to next i iteration, next j iteration)
; No dependency conflict: A[i+1][j+1] from iteration (i,j) doesn't conflict with
; any A[i'][j'] from unrolled j iterations since the forward dependency pattern
; doesn't create overlapping accesses between unrolled iterations.
define void @sub_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
Expand Down Expand Up @@ -126,12 +148,21 @@ cleanup:
; CHECK-LABEL: sub_sub_less_3d
; CHECK: %k = phi
; CHECK-NOT: %k.1 = phi

;
; sub_sub_less_3d should NOT be unroll-and-jammed due to a loop-carried dependency.
; Memory accesses:
; - A3d[i][j][k] = 0 (write to current iteration)
; - A3d[i+1][j][k-1] = 0 (write to next i iteration, previous k iteration)
; The dependency: A[i+1][j][k-1] from iteration (i,j,k) may conflict with
; A[i'][j'][k'] from a later iteration when i'=i+1 and k'=k-1, creating a
; backward dependency in the k dimension that prevents safe unroll-and-jam.
; This is a 3D version of the same pattern as sub_sub_less.
;
; for (long i = 0; i < 100; ++i)
; for (long j = 0; j < 100; ++j)
; for (long k = 0; k < 100; ++k) {
; A[i][j][k] = 0;
; A[i+1][j][k-1] = 0;
; for (long k = 1; k < 100; ++k) {
; A[i][j][k] = 5;
; A[i+1][j][k-1] = 10;
; }

define void @sub_sub_less_3d(ptr noalias %A) {
Expand All @@ -147,13 +178,13 @@ for.j:
br label %for.k

for.k:
%k = phi i32 [ 0, %for.j ], [ %inc.k, %for.k ]
%k = phi i32 [ 1, %for.j ], [ %inc.k, %for.k ]
%arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr %A, i32 %i, i32 %j, i32 %k
store i32 0, ptr %arrayidx, align 4
store i32 5, ptr %arrayidx, align 4
%add.i = add nsw i32 %i, 1
%sub.k = add nsw i32 %k, -1
%arrayidx2 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i32 %add.i, i32 %j, i32 %sub.k
store i32 0, ptr %arrayidx2, align 4
store i32 10, ptr %arrayidx2, align 4
%inc.k = add nsw i32 %k, 1
%cmp.k = icmp slt i32 %inc.k, 100
br i1 %cmp.k, label %for.k, label %for.j.latch
Expand All @@ -175,7 +206,16 @@ for.end:
; CHECK-LABEL: sub_sub_outer_scalar
; CHECK: %k = phi
; CHECK-NOT: %k.1 = phi

;
; sub_sub_outer_scalar should NOT be unroll-and-jammed due to a loop-carried dependency.
; Memory accesses:
; - load from A[j][k] (read from current j iteration)
; - store to A[j-1][k] (write to previous j iteration)
; The dependency: reading A[j][k] and writing A[j-1][k] creates a backward
; dependency in the j dimension. The test attempts to unroll-and-jam the j loop
; with the k loop being jammed. When this happens, iterations j, j+1, j+2, j+3
; would be unrolled and their k loops jammed together, but j+1's write to A[j][k]
; would conflict with j's read from A[j][k], violating sequential semantics.
define void @sub_sub_outer_scalar(ptr %A) {
entry:
br label %for.i
Expand All @@ -185,7 +225,7 @@ for.i:
br label %for.j

for.j:
%j = phi i64 [ 0, %for.i ], [ %inc.j, %for.j.latch ]
%j = phi i64 [ 1, %for.i ], [ %inc.j, %for.j.latch ]
br label %for.k

for.k:
Expand Down
Loading