| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s | ||
|
|
||
| define amdgpu_kernel void @divergent_cycle_1(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK-LABEL: UniformityInfo for function 'divergent_cycle_1': | ||
| ; CHECK: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK: depth=1: entries(R P) S Q | ||
| ; CHECK: CYCLES WITH DIVERGENT EXIT: | ||
| ; CHECK: depth=2: entries(S P) Q | ||
| ; CHECK: depth=1: entries(R P) S Q | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br i1 %cond.uni, label %P, label %R | ||
|
|
||
| P: | ||
| ; CHECK: DIVERGENT: %pp.phi = | ||
| %pp.phi = phi i32 [ %a, %entry], [ %b, %S ] | ||
| %pp = add i32 %b, 1 | ||
| br label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| ; CHECK: DIVERGENT: %s.phi = | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %b, 1 | ||
| br i1 %cond.uni, label %exit, label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @uniform_cycle_1(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK-LABEL: UniformityInfo for function 'uniform_cycle_1': | ||
| ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br i1 %cond.uni, label %P, label %T | ||
|
|
||
| P: | ||
| ; CHECK-NOT: DIVERGENT: %pp.phi = phi i32 | ||
| %pp.phi = phi i32 [ %a, %entry], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| ; CHECK: DIVERGENT: %s.phi = | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %b, 1 | ||
| br i1 %cond.uni, label %exit, label %T | ||
|
|
||
| T: | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,240 @@ | ||
| ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s | ||
|
|
||
| ; These tests have identical control flow graphs with slight changes | ||
| ; that affect cycle-info. There is a minor functional difference in | ||
| ; the branch conditions; but that is not relevant to the tests. | ||
|
|
||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| ;; | ||
| ;; The inner cycle has a header (P) that dominates the join, hence | ||
| ;; both cycles are reported as converged. | ||
| ;; | ||
| ;; CHECK-LABEL: UniformityInfo for function 'headers_b_p': | ||
| ;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: | ||
|
|
||
| define amdgpu_kernel void @headers_b_p(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| %a.div = add i32 %tid, %a | ||
| br i1 %cond.uni, label %B, label %A | ||
|
|
||
| A: | ||
| br label %B | ||
|
|
||
| B: | ||
| br i1 %cond.uni, label %C, label %D | ||
|
|
||
| C: | ||
| br i1 %cond.uni, label %T, label %P | ||
|
|
||
| P: | ||
| %pp.phi = phi i32 [ %a, %C], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br i1 %cond.uni, label %R, label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %pp.phi, 1 | ||
| br i1 %cond.uni, label %D, label %T | ||
|
|
||
| D: | ||
| br i1 %cond.uni, label %exit, label %A | ||
|
|
||
| T: | ||
| %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| ;; | ||
| ;; Same as previous, but the outer cycle has a different header (A). | ||
| ;; The inner cycle has a header (P) that dominates the join, hence | ||
| ;; both cycles are reported as converged. | ||
| ;; | ||
| ;; CHECK-LABEL: UniformityInfo for function 'headers_a_p': | ||
| ;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: | ||
|
|
||
| define amdgpu_kernel void @headers_a_p(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| %a.div = add i32 %tid, %a | ||
| br i1 %cond.uni, label %B, label %A | ||
|
|
||
| A: | ||
| br label %B | ||
|
|
||
| B: | ||
| br i1 %cond.uni, label %C, label %D | ||
|
|
||
| C: | ||
| br i1 %cond.uni, label %T, label %P | ||
|
|
||
| P: | ||
| %pp.phi = phi i32 [ %a, %C], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br i1 %cond.uni, label %R, label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %pp.phi, 1 | ||
| br i1 %cond.uni, label %D, label %T | ||
|
|
||
| D: | ||
| br i1 %cond.uni, label %exit, label %A | ||
|
|
||
| T: | ||
| %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| ;; | ||
| ;; The inner cycle has a header (T) that does not dominate the join. | ||
| ;; The outer cycle has a header (B) that dominates the join. Hence | ||
| ;; only the inner cycle is reported as diverged. | ||
| ;; | ||
| ;; CHECK-LABEL: UniformityInfo for function 'headers_b_t': | ||
| ;; CHECK: CYCLES ASSSUMED DIVERGENT: | ||
| ;; CHECK: depth=2: entries(T P) S Q R | ||
| ;; CHECK: CYCLES WITH DIVERGENT EXIT: | ||
| ;; CHECK: depth=1: entries(B A) D T S Q P R C | ||
|
|
||
| define amdgpu_kernel void @headers_b_t(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| %a.div = add i32 %tid, %a | ||
| br i1 %cond.uni, label %A, label %B | ||
|
|
||
| A: | ||
| br label %B | ||
|
|
||
| B: | ||
| br i1 %cond.uni, label %C, label %D | ||
|
|
||
| C: | ||
| br i1 %cond.uni, label %P, label %T | ||
|
|
||
| P: | ||
| %pp.phi = phi i32 [ %a, %C], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br i1 %cond.uni, label %R, label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %pp.phi, 1 | ||
| br i1 %cond.uni, label %D, label %T | ||
|
|
||
| D: | ||
| br i1 %cond.uni, label %exit, label %A | ||
|
|
||
| T: | ||
| %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| ;; | ||
| ;; The cycles have headers (A and T) that do not dominate the join. | ||
| ;; Hence the outermost cycle is reported as diverged. | ||
| ;; | ||
| ;; CHECK-LABEL: UniformityInfo for function 'headers_a_t': | ||
| ;; CHECK: CYCLES ASSSUMED DIVERGENT: | ||
| ;; CHECK: depth=1: entries(A B) D T S Q P R C | ||
| ;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: | ||
|
|
||
| define amdgpu_kernel void @headers_a_t(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| %a.div = add i32 %tid, %a | ||
| br i1 %cond.uni, label %B, label %A | ||
|
|
||
| A: | ||
| br label %B | ||
|
|
||
| B: | ||
| br i1 %cond.uni, label %C, label %D | ||
|
|
||
| C: | ||
| br i1 %cond.uni, label %P, label %T | ||
|
|
||
| P: | ||
| %pp.phi = phi i32 [ %a, %C], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br i1 %cond.uni, label %R, label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %pp.phi, 1 | ||
| br i1 %cond.uni, label %D, label %T | ||
|
|
||
| D: | ||
| br i1 %cond.uni, label %exit, label %A | ||
|
|
||
| T: | ||
| %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s | ||
|
|
||
| ; These tests have identical control flow graphs with slight changes | ||
| ; that affect cycle-info. There is a minor functional difference in | ||
| ; the branch conditions; but that is not relevant to the tests. | ||
|
|
||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| ;; | ||
| ;; The cycle has a header (T) that does not dominate the join, hence | ||
| ;; the entire cycle is reported as converged. | ||
| ;; | ||
| ;; CHECK-LABEL: UniformityInfo for function 't_header': | ||
| ;; CHECK: CYCLES ASSSUMED DIVERGENT: | ||
| ;; CHECK: depth=1: entries(T P) S Q R | ||
|
|
||
| define amdgpu_kernel void @t_header(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| %a.div = add i32 %tid, %a | ||
| br i1 %cond.uni, label %P, label %T | ||
|
|
||
| P: | ||
| ; CHECK: DIVERGENT: %pp.phi = | ||
| %pp.phi = phi i32 [ %a, %entry], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br i1 %cond.uni, label %R, label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| ; CHECK: DIVERGENT: %s.phi = | ||
| ; CHECK: DIVERGENT: %ss = | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %pp.phi, 1 | ||
| br i1 %cond.uni, label %exit, label %T | ||
|
|
||
| T: | ||
| ; CHECK: DIVERGENT: %tt.phi = | ||
| %tt.phi = phi i32 [ %ss, %S ], [ %a, %entry ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
| ;; | ||
| ;; The cycle has a header (P) that dominates the join, hence | ||
| ;; the cycle is reported as converged. | ||
| ;; | ||
| ;; CHECK-LABEL: UniformityInfo for function 'p_header': | ||
| ;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
|
|
||
| define amdgpu_kernel void @p_header(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br i1 %cond.uni, label %T, label %P | ||
|
|
||
| P: | ||
| ; CHECK-NOT: DIVERGENT: %pp.phi = phi i32 | ||
| %pp.phi = phi i32 [ %a, %entry], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br i1 %cond.uni, label %R, label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| br i1 %cond.div, label %S, label %R | ||
|
|
||
| R: | ||
| %rr = add i32 %b, 1 | ||
| br label %S | ||
|
|
||
| S: | ||
| ; CHECK: DIVERGENT: %s.phi = | ||
| ; CHECK-NOT: DIVERGENT: %ss = add i32 | ||
| %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] | ||
| %ss = add i32 %pp.phi, 1 | ||
| br i1 %cond.uni, label %exit, label %T | ||
|
|
||
| T: | ||
| ; CHECK-NIT: DIVERGENT: %tt.phi = phi i32 | ||
| %tt.phi = phi i32 [ %ss, %S ], [ %a, %entry ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,139 @@ | ||
| ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s | ||
|
|
||
| ; CHECK=LABEL: UniformityInfo for function 'basic': | ||
| ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK: CYCLES WITH DIVERGENT EXIT: | ||
| ; CHECK: depth=1: entries(P T) Q | ||
| define amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br i1 %cond.uni, label %T, label %P | ||
|
|
||
| P: | ||
| %pp.phi.1 = phi i32 [ %a, %entry], [ %b, %T ] | ||
| %pp.phi.2 = phi i32 [ %a, %entry], [ %tt.phi, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| %qq.div.1 = add i32 %pp.phi.2, 1 | ||
| %qq.div.2 = add i32 %pp.phi.2, 1 | ||
| br i1 %cond.div, label %T, label %exit | ||
|
|
||
| T: | ||
| %tt.phi = phi i32 [ %qq, %Q ], [ %a, %entry ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| ; CHECK: DIVERGENT: %ee.1 = | ||
| ; CHECK: DIVERGENT: %xx.2 = | ||
| ; CHECK-NOT: DIVERGENT: %ee.3 = | ||
| %ee.1 = add i32 %pp.phi.1, 1 | ||
| %xx.2 = add i32 %pp.phi.2, 1 | ||
| %ee.3 = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: UniformityInfo for function 'outer_reducible': | ||
| ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK: CYCLES WITH DIVERGENT EXIT: | ||
| ; CHECK: depth=1: entries(H) P T R Q | ||
| define amdgpu_kernel void @outer_reducible(i32 %a, i32 %b, i32 %c) { | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br label %H | ||
|
|
||
| H: | ||
| br i1 %cond.uni, label %T, label %P | ||
|
|
||
| P: | ||
| %pp.phi.1 = phi i32 [ %a, %H], [ %b, %T ] | ||
| %pp.phi.2 = phi i32 [ %a, %H], [ %tt.phi, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br label %Q | ||
|
|
||
| Q: | ||
| %qq = add i32 %b, 1 | ||
| %qq.div.1 = add i32 %pp.phi.2, 1 | ||
| %qq.div.2 = add i32 %pp.phi.2, 1 | ||
| br i1 %cond.div, label %R, label %exit | ||
|
|
||
| R: | ||
| br i1 %cond.uni, label %T, label %H | ||
|
|
||
|
|
||
| T: | ||
| %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| exit: | ||
| ; CHECK: DIVERGENT: %ee.1 = | ||
| ; CHECK: DIVERGENT: %xx.2 = | ||
| ; CHECK-NOT: DIVERGENT: %ee.3 = | ||
| %ee.1 = add i32 %pp.phi.1, 1 | ||
| %xx.2 = add i32 %pp.phi.2, 1 | ||
| %ee.3 = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ; entry(div) | ||
| ; | \ | ||
| ; H -> B | ||
| ; ^ /| | ||
| ; \--C | | ||
| ; \| | ||
| ; X | ||
| ; | ||
| ; This has a divergent cycle due to the external divergent branch, but | ||
| ; there are no divergent exits. Hence a use at X is not divergent | ||
| ; unless the def itself is divergent. | ||
| ; | ||
| ; CHECK-LABEL: UniformityInfo for function 'no_divergent_exit': | ||
| ; CHECK: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK: depth=1: entries(H B) C | ||
| ; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: | ||
| define amdgpu_kernel void @no_divergent_exit(i32 %n, i32 %a, i32 %b) #0 { | ||
| entry: | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %div.cond = icmp slt i32 %tid, 0 | ||
| %uni.cond = icmp slt i32 %a, 0 | ||
| br i1 %div.cond, label %B, label %H | ||
|
|
||
| H: ; preds = %C, %entry | ||
| ; CHECK: DIVERGENT: %div.merge.h = | ||
| %div.merge.h = phi i32 [ 0, %entry ], [ %b, %C ] | ||
| br label %B | ||
|
|
||
| B: ; preds = %H, %entry | ||
| ; CHECK: DIVERGENT: %div.merge.b = | ||
| %div.merge.b = phi i32 [ %a, %H ], [ 1, %entry ] | ||
| ; CHECK-NOT: DIVERGENT %bb = | ||
| %bb = add i32 %a, 1 | ||
| ; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %C | ||
| br i1 %uni.cond, label %X, label %C | ||
|
|
||
| C: ; preds = %B | ||
| ; CHECK-NOT: DIVERGENT %cc = | ||
| %cc = add i32 %a, 1 | ||
| ; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %H | ||
| br i1 %uni.cond, label %X, label %H | ||
|
|
||
| ; CHECK-LABEL: BLOCK X | ||
| X: ; preds = %C, %B | ||
| ; CHECK: DIVERGENT: %uni.merge.x = | ||
| %uni.merge.x = phi i32 [ %bb, %B ], [%cc, %C ] | ||
| ; CHECK: DIVERGENT: %div.merge.x = | ||
| %div.merge.x = phi i32 [ %div.merge.b, %B ], [%cc, %C ] | ||
| ret void | ||
| } | ||
|
|
||
| declare i32 @llvm.amdgcn.workitem.id.x() #0 | ||
|
|
||
| attributes #0 = { nounwind readnone } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,225 @@ | ||
| ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s | ||
|
|
||
| ; | ||
| ; Entry | ||
| ; | | ||
| ; v | ||
| ; -------->H--------- | ||
| ; | | | | ||
| ; | v | | ||
| ; | --->T---- | | ||
| ; | | | | | ||
| ; | | V | | ||
| ; S<---R P <--- | ||
| ; ^ ^ | | ||
| ; | | Div | | ||
| ; | --- Q <-- | ||
| ; | | | ||
| ; | v | ||
| ; -------- U | ||
| ; | | ||
| ; v | ||
| ; Exit | ||
| ; | ||
| ; The divergent branch is at Q that exits an irreducible cycle with | ||
| ; entries T and P nested inside a reducible cycle with header H. R is | ||
| ; assigned label R, which reaches P. S is a join node with label S. If | ||
| ; this is propagated to P via H, then P is incorrectly recognized as a | ||
| ; join, making the inner cycle divergent. P is always executed | ||
| ; convergently -- either by threads that reconverged at header H, or | ||
| ; by threads that are still executing the inner cycle. Thus, any PHI | ||
| ; at P should not be marked divergent. | ||
|
|
||
| define amdgpu_kernel void @nested_irreducible(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK=LABEL: UniformityInfo for function 'nested_irreducible': | ||
| ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK: CYCLES WITH DIVERGENT EXIT: | ||
| ; CHECK: depth=2: entries(P T) R Q | ||
| ; CHECK: depth=1: entries(H) S P T R Q U | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br label %H | ||
|
|
||
| H: | ||
| br i1 %cond.uni, label %T, label %P | ||
|
|
||
| P: | ||
| ; CHECK-LABEL: BLOCK P | ||
| ; CHECK-NOT: DIVERGENT: %pp.phi = | ||
| ; CHECK-NOT: DIVERGENT: %pp = | ||
| %pp.phi = phi i32 [ %a, %H], [ %b, %T ] | ||
| %pp = add i32 %b, 1 | ||
| br label %Q | ||
|
|
||
| Q: | ||
| ; CHECK-LABEL: BLOCK Q | ||
| ; CHECK-NOT: DIVERGENT: %qq = | ||
| ; CHECK-NOT: DIVERGENT: %qq.uni = | ||
| %qq = add i32 %b, 1 | ||
| %qq.uni = add i32 %pp.phi, 1 | ||
| br i1 %cond.div, label %R, label %U | ||
|
|
||
| R: | ||
| br i1 %cond.uni, label %S, label %T | ||
|
|
||
| T: | ||
| ; CHECK-LABEL: BLOCK T | ||
| ; CHECK-NOT: DIVERGENT: %tt.phi = | ||
| ; CHECK-NOT: DIVERGENT: %tt = | ||
| %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ] | ||
| %tt = add i32 %b, 1 | ||
| br label %P | ||
|
|
||
| S: | ||
| ; CHECK-LABEL: BLOCK S | ||
| ; CHECK: DIVERGENT: %ss.phi = | ||
| ; CHECK-NOT: DIVERGENT: %ss = | ||
| %ss.phi = phi i32 [ %qq.uni, %U ], [ %a, %R ] | ||
| %ss = add i32 %b, 1 | ||
| br label %H | ||
|
|
||
| U: | ||
| br i1 %cond.uni, label %S, label %exit | ||
|
|
||
| exit: | ||
| ; CHECK: DIVERGENT: %ee.div = | ||
| ; CHECK-NOT: DIVERGENT: %ee = | ||
| %ee.div = add i32 %qq.uni, 1 | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ; | ||
| ; Entry | ||
| ; | | ||
| ; v | ||
| ; -->-------->H--------- | ||
| ; | ^ | | | ||
| ; | | | | | ||
| ; | | | | | ||
| ; | | | | | ||
| ; | | v V | ||
| ; | R<-------T-->U--->P | ||
| ; | Div | | ||
| ; | | | ||
| ; ----------- Q <------- | ||
| ; | | ||
| ; v | ||
| ; Exit | ||
| ; | ||
| ; This is a reducible cycle with a divergent branch at T. Disjoint | ||
| ; paths eventually join at the header H, which is assigned label H. | ||
| ; Node P is assigned label U. If the header label were propagated to | ||
| ; P, it will be incorrectly recgonized as a join. P is always executed | ||
| ; convergently -- either by threads that reconverged at header H, or | ||
| ; by threads that diverged at T (and eventually reconverged at H). | ||
| ; Thus, any PHI at P should not be marked divergent. | ||
|
|
||
| define amdgpu_kernel void @header_label_1(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK=LABEL: UniformityInfo for function 'header_label_1': | ||
| ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK: CYCLES WITH DIVERGENT EXIT: | ||
| ; CHECK: depth=1: entries(H) Q P U T R | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br label %H | ||
|
|
||
| H: | ||
| br i1 %cond.uni, label %T, label %P | ||
|
|
||
| P: | ||
| ; CHECK-LABEL: BLOCK P | ||
| ; CHECK-NOT: DIVERGENT: %pp.phi = | ||
| ; CHECK-NOT: DIVERGENT: %pp = | ||
| %pp.phi = phi i32 [ %a, %H], [ %b, %U ] | ||
| %pp = add i32 %b, 1 | ||
| br label %Q | ||
|
|
||
| Q: | ||
| ; CHECK-LABEL: BLOCK Q | ||
| ; CHECK-NOT: DIVERGENT: %qq = | ||
| ; CHECK-NOT: DIVERGENT: %qq.uni = | ||
| %qq = add i32 %b, 1 | ||
| %qq.uni = add i32 %pp.phi, 1 | ||
| br i1 %cond.uni, label %exit, label %H | ||
|
|
||
| R: | ||
| br label %H | ||
|
|
||
| T: | ||
| br i1 %cond.div, label %R, label %U | ||
|
|
||
| U: | ||
| br label %P | ||
|
|
||
| exit: | ||
| ; CHECK-LABEL: BLOCK exit | ||
| ; CHECK: DIVERGENT: %ee.div = | ||
| ; CHECK-NOT: DIVERGENT: %ee = | ||
| %ee.div = add i32 %qq.uni, 1 | ||
| %ee = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| ; entry | ||
| ; | | ||
| ; --> H1 | ||
| ; | | \ | ||
| ; | | H2(div) | ||
| ; | \ / \ | ||
| ; | B C | ||
| ; ^ \ / | ||
| ; \------D | ||
| ; | | ||
| ; X | ||
| ; | ||
| ; This is a reducible cycle with a divergent branch at H2. Disjoint | ||
| ; paths eventually join at the header D, which is assigned label D. | ||
| ; Node B is assigned label B. If the header label D were propagated to | ||
| ; B, it will be incorrectly recgonized as a join. B is always executed | ||
| ; convergently -- either by threads that reconverged at header H1, or | ||
| ; by threads that diverge at H2 (and eventually reconverged at H1). | ||
| ; Thus, any PHI at B should not be marked divergent. | ||
|
|
||
| define amdgpu_kernel void @header_label_2(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK-LABEL: UniformityInfo for function 'header_label_2': | ||
| ; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: | ||
| ; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: | ||
| entry: | ||
| %cond.uni = icmp slt i32 %a, 0 | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %cond.div = icmp slt i32 %tid, 0 | ||
| br label %H1 | ||
|
|
||
| H1: | ||
| br i1 %cond.uni, label %B, label %H2 | ||
|
|
||
| H2: | ||
| br i1 %cond.div, label %B, label %C | ||
|
|
||
| B: | ||
| ; CHECK-LABEL: BLOCK B | ||
| ; CHECK-NOT: DIVERGENT: %bb.phi = | ||
| %bb.phi = phi i32 [%a, %H1], [%b, %H2] | ||
| br label %D | ||
|
|
||
| C: | ||
| br label %D | ||
|
|
||
| D: | ||
| ; CHECK-LABEL: BLOCK D | ||
| ; CHECK: DIVERGENT: %dd.phi = | ||
| %dd.phi = phi i32 [%a, %B], [%b, %C] | ||
| br i1 %cond.uni, label %exit, label %H1 | ||
|
|
||
| exit: | ||
| %ee.1 = add i32 %dd.phi, 1 | ||
| %ee.2 = add i32 %b, 1 | ||
| ret void | ||
| } | ||
|
|
||
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s | ||
| ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s | ||
|
|
||
| ; CHECK: DIVERGENT: %phi.h = phi i32 [ 0, %entry ], [ %inc, %C ], [ %inc, %D ], [ %inc, %E ] | ||
| ; CHECK: DIVERGENT: %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| ; CHECK: DIVERGENT: %div.cond = icmp slt i32 %tid, 0 | ||
| ; CHECK: DIVERGENT: %inc = add i32 %phi.h, 1 | ||
| ; CHECK: DIVERGENT: br i1 %div.cond, label %C, label %D | ||
|
|
||
| define void @nested_loop_extension() { | ||
| entry: | ||
| %anchor = call token @llvm.experimental.convergence.anchor() | ||
| br label %A | ||
|
|
||
| A: | ||
| %phi.h = phi i32 [ 0, %entry ], [ %inc, %C ], [ %inc, %D ], [ %inc, %E ] | ||
| br label %B | ||
|
|
||
| B: | ||
| %tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %div.cond = icmp slt i32 %tid, 0 | ||
| %inc = add i32 %phi.h, 1 | ||
| br i1 %div.cond, label %C, label %D | ||
|
|
||
| C: | ||
| br i1 undef, label %A, label %E | ||
|
|
||
| D: | ||
| br i1 undef, label %A, label %E | ||
|
|
||
| E: | ||
| %b = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ] | ||
| br i1 undef, label %A, label %F | ||
|
|
||
| F: | ||
| ret void | ||
| } | ||
|
|
||
| declare i32 @llvm.amdgcn.workitem.id.x() #0 | ||
|
|
||
| declare token @llvm.experimental.convergence.anchor() | ||
| declare token @llvm.experimental.convergence.loop() | ||
|
|
||
| attributes #0 = { nounwind readnone } |