Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

define amdgpu_kernel void @divergent_cycle_1(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: UniformityInfo for function 'divergent_cycle_1':
; CHECK: CYCLES ASSSUMED DIVERGENT:
; CHECK: depth=1: entries(R P) S Q
; CHECK: CYCLES WITH DIVERGENT EXIT:
; CHECK: depth=2: entries(S P) Q
; CHECK: depth=1: entries(R P) S Q
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br i1 %cond.uni, label %P, label %R

P:
; CHECK: DIVERGENT: %pp.phi =
%pp.phi = phi i32 [ %a, %entry], [ %b, %S ]
%pp = add i32 %b, 1
br label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
; CHECK: DIVERGENT: %s.phi =
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %b, 1
br i1 %cond.uni, label %exit, label %P

exit:
%ee = add i32 %b, 1
ret void
}

define amdgpu_kernel void @uniform_cycle_1(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: UniformityInfo for function 'uniform_cycle_1':
; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br i1 %cond.uni, label %P, label %T

P:
; CHECK-NOT: DIVERGENT: %pp.phi = phi i32
%pp.phi = phi i32 [ %a, %entry], [ %b, %T ]
%pp = add i32 %b, 1
br label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
; CHECK: DIVERGENT: %s.phi =
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %b, 1
br i1 %cond.uni, label %exit, label %T

T:
%tt = add i32 %b, 1
br label %P

exit:
%ee = add i32 %b, 1
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

; These tests have identical control flow graphs with slight changes
; that affect cycle-info. There is a minor functional difference in
; the branch conditions; but that is not relevant to the tests.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The inner cycle has a header (P) that dominates the join, hence
;; both cycles are reported as converged.
;;
;; CHECK-LABEL: UniformityInfo for function 'headers_b_p':
;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:

define amdgpu_kernel void @headers_b_p(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
%a.div = add i32 %tid, %a
br i1 %cond.uni, label %B, label %A

A:
br label %B

B:
br i1 %cond.uni, label %C, label %D

C:
br i1 %cond.uni, label %T, label %P

P:
%pp.phi = phi i32 [ %a, %C], [ %b, %T ]
%pp = add i32 %b, 1
br i1 %cond.uni, label %R, label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %pp.phi, 1
br i1 %cond.uni, label %D, label %T

D:
br i1 %cond.uni, label %exit, label %A

T:
%tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
%tt = add i32 %b, 1
br label %P

exit:
%ee = add i32 %b, 1
ret void
}

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Same as previous, but the outer cycle has a different header (A).
;; The inner cycle has a header (P) that dominates the join, hence
;; both cycles are reported as converged.
;;
;; CHECK-LABEL: UniformityInfo for function 'headers_a_p':
;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:

define amdgpu_kernel void @headers_a_p(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
%a.div = add i32 %tid, %a
br i1 %cond.uni, label %B, label %A

A:
br label %B

B:
br i1 %cond.uni, label %C, label %D

C:
br i1 %cond.uni, label %T, label %P

P:
%pp.phi = phi i32 [ %a, %C], [ %b, %T ]
%pp = add i32 %b, 1
br i1 %cond.uni, label %R, label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %pp.phi, 1
br i1 %cond.uni, label %D, label %T

D:
br i1 %cond.uni, label %exit, label %A

T:
%tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
%tt = add i32 %b, 1
br label %P

exit:
%ee = add i32 %b, 1
ret void
}

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The inner cycle has a header (T) that does not dominate the join.
;; The outer cycle has a header (B) that dominates the join. Hence
;; only the inner cycle is reported as diverged.
;;
;; CHECK-LABEL: UniformityInfo for function 'headers_b_t':
;; CHECK: CYCLES ASSSUMED DIVERGENT:
;; CHECK: depth=2: entries(T P) S Q R
;; CHECK: CYCLES WITH DIVERGENT EXIT:
;; CHECK: depth=1: entries(B A) D T S Q P R C

define amdgpu_kernel void @headers_b_t(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
%a.div = add i32 %tid, %a
br i1 %cond.uni, label %A, label %B

A:
br label %B

B:
br i1 %cond.uni, label %C, label %D

C:
br i1 %cond.uni, label %P, label %T

P:
%pp.phi = phi i32 [ %a, %C], [ %b, %T ]
%pp = add i32 %b, 1
br i1 %cond.uni, label %R, label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %pp.phi, 1
br i1 %cond.uni, label %D, label %T

D:
br i1 %cond.uni, label %exit, label %A

T:
%tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
%tt = add i32 %b, 1
br label %P

exit:
%ee = add i32 %b, 1
ret void
}

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The cycles have headers (A and T) that do not dominate the join.
;; Hence the outermost cycle is reported as diverged.
;;
;; CHECK-LABEL: UniformityInfo for function 'headers_a_t':
;; CHECK: CYCLES ASSSUMED DIVERGENT:
;; CHECK: depth=1: entries(A B) D T S Q P R C
;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:

define amdgpu_kernel void @headers_a_t(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
%a.div = add i32 %tid, %a
br i1 %cond.uni, label %B, label %A

A:
br label %B

B:
br i1 %cond.uni, label %C, label %D

C:
br i1 %cond.uni, label %P, label %T

P:
%pp.phi = phi i32 [ %a, %C], [ %b, %T ]
%pp = add i32 %b, 1
br i1 %cond.uni, label %R, label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %pp.phi, 1
br i1 %cond.uni, label %D, label %T

D:
br i1 %cond.uni, label %exit, label %A

T:
%tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
%tt = add i32 %b, 1
br label %P

exit:
%ee = add i32 %b, 1
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

; These tests have identical control flow graphs with slight changes
; that affect cycle-info. There is a minor functional difference in
; the branch conditions; but that is not relevant to the tests.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The cycle has a header (T) that does not dominate the join, hence
;; the entire cycle is reported as converged.
;;
;; CHECK-LABEL: UniformityInfo for function 't_header':
;; CHECK: CYCLES ASSSUMED DIVERGENT:
;; CHECK: depth=1: entries(T P) S Q R

define amdgpu_kernel void @t_header(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
%a.div = add i32 %tid, %a
br i1 %cond.uni, label %P, label %T

P:
; CHECK: DIVERGENT: %pp.phi =
%pp.phi = phi i32 [ %a, %entry], [ %b, %T ]
%pp = add i32 %b, 1
br i1 %cond.uni, label %R, label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
; CHECK: DIVERGENT: %s.phi =
; CHECK: DIVERGENT: %ss =
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %pp.phi, 1
br i1 %cond.uni, label %exit, label %T

T:
; CHECK: DIVERGENT: %tt.phi =
%tt.phi = phi i32 [ %ss, %S ], [ %a, %entry ]
%tt = add i32 %b, 1
br label %P

exit:
%ee = add i32 %b, 1
ret void
}

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The cycle has a header (P) that dominates the join, hence
;; the cycle is reported as converged.
;;
;; CHECK-LABEL: UniformityInfo for function 'p_header':
;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:

define amdgpu_kernel void @p_header(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br i1 %cond.uni, label %T, label %P

P:
; CHECK-NOT: DIVERGENT: %pp.phi = phi i32
%pp.phi = phi i32 [ %a, %entry], [ %b, %T ]
%pp = add i32 %b, 1
br i1 %cond.uni, label %R, label %Q

Q:
%qq = add i32 %b, 1
br i1 %cond.div, label %S, label %R

R:
%rr = add i32 %b, 1
br label %S

S:
; CHECK: DIVERGENT: %s.phi =
; CHECK-NOT: DIVERGENT: %ss = add i32
%s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
%ss = add i32 %pp.phi, 1
br i1 %cond.uni, label %exit, label %T

T:
; CHECK-NIT: DIVERGENT: %tt.phi = phi i32
%tt.phi = phi i32 [ %ss, %S ], [ %a, %entry ]
%tt = add i32 %b, 1
br label %P

exit:
%ee = add i32 %b, 1
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

; CHECK=LABEL: UniformityInfo for function 'basic':
; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
; CHECK: CYCLES WITH DIVERGENT EXIT:
; CHECK: depth=1: entries(P T) Q
define amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br i1 %cond.uni, label %T, label %P

P:
%pp.phi.1 = phi i32 [ %a, %entry], [ %b, %T ]
%pp.phi.2 = phi i32 [ %a, %entry], [ %tt.phi, %T ]
%pp = add i32 %b, 1
br label %Q

Q:
%qq = add i32 %b, 1
%qq.div.1 = add i32 %pp.phi.2, 1
%qq.div.2 = add i32 %pp.phi.2, 1
br i1 %cond.div, label %T, label %exit

T:
%tt.phi = phi i32 [ %qq, %Q ], [ %a, %entry ]
%tt = add i32 %b, 1
br label %P

exit:
; CHECK: DIVERGENT: %ee.1 =
; CHECK: DIVERGENT: %xx.2 =
; CHECK-NOT: DIVERGENT: %ee.3 =
%ee.1 = add i32 %pp.phi.1, 1
%xx.2 = add i32 %pp.phi.2, 1
%ee.3 = add i32 %b, 1
ret void
}

; CHECK-LABEL: UniformityInfo for function 'outer_reducible':
; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
; CHECK: CYCLES WITH DIVERGENT EXIT:
; CHECK: depth=1: entries(H) P T R Q
define amdgpu_kernel void @outer_reducible(i32 %a, i32 %b, i32 %c) {
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br label %H

H:
br i1 %cond.uni, label %T, label %P

P:
%pp.phi.1 = phi i32 [ %a, %H], [ %b, %T ]
%pp.phi.2 = phi i32 [ %a, %H], [ %tt.phi, %T ]
%pp = add i32 %b, 1
br label %Q

Q:
%qq = add i32 %b, 1
%qq.div.1 = add i32 %pp.phi.2, 1
%qq.div.2 = add i32 %pp.phi.2, 1
br i1 %cond.div, label %R, label %exit

R:
br i1 %cond.uni, label %T, label %H


T:
%tt.phi = phi i32 [ %qq, %R ], [ %a, %H ]
%tt = add i32 %b, 1
br label %P

exit:
; CHECK: DIVERGENT: %ee.1 =
; CHECK: DIVERGENT: %xx.2 =
; CHECK-NOT: DIVERGENT: %ee.3 =
%ee.1 = add i32 %pp.phi.1, 1
%xx.2 = add i32 %pp.phi.2, 1
%ee.3 = add i32 %b, 1
ret void
}

; entry(div)
; | \
; H -> B
; ^ /|
; \--C |
; \|
; X
;
; This has a divergent cycle due to the external divergent branch, but
; there are no divergent exits. Hence a use at X is not divergent
; unless the def itself is divergent.
;
; CHECK-LABEL: UniformityInfo for function 'no_divergent_exit':
; CHECK: CYCLES ASSSUMED DIVERGENT:
; CHECK: depth=1: entries(H B) C
; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
define amdgpu_kernel void @no_divergent_exit(i32 %n, i32 %a, i32 %b) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
%uni.cond = icmp slt i32 %a, 0
br i1 %div.cond, label %B, label %H

H: ; preds = %C, %entry
; CHECK: DIVERGENT: %div.merge.h =
%div.merge.h = phi i32 [ 0, %entry ], [ %b, %C ]
br label %B

B: ; preds = %H, %entry
; CHECK: DIVERGENT: %div.merge.b =
%div.merge.b = phi i32 [ %a, %H ], [ 1, %entry ]
; CHECK-NOT: DIVERGENT %bb =
%bb = add i32 %a, 1
; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %C
br i1 %uni.cond, label %X, label %C

C: ; preds = %B
; CHECK-NOT: DIVERGENT %cc =
%cc = add i32 %a, 1
; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %H
br i1 %uni.cond, label %X, label %H

; CHECK-LABEL: BLOCK X
X: ; preds = %C, %B
; CHECK: DIVERGENT: %uni.merge.x =
%uni.merge.x = phi i32 [ %bb, %B ], [%cc, %C ]
; CHECK: DIVERGENT: %div.merge.x =
%div.merge.x = phi i32 [ %div.merge.b, %B ], [%cc, %C ]
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s

; NOTE: The new pass manager does not fall back on legacy divergence
; analysis even when the function contains an irreducible loop. The
; (new) divergence analysis conservatively reports all values as
; divergent. This test does not check for this conservative
; behaviour. Instead, it only checks for the values that are known to
; be divergent according to the legacy analysis.

; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

; This test contains an unstructured loop.
; +-------------- entry ----------------+
Expand All @@ -21,31 +13,38 @@
; |
; V
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
; because sync dependent on (tid / i3).

define i32 @unstructured_loop(i1 %entry_cond) {
; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
; CHECK-LABEL: for function 'unstructured_loop'
; CHECK: DIVERGENT: i1 %entry_cond

entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
loop_entry_1:
; CHECK: DIVERGENT: %i1 =
%i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
%j1 = add i32 %i1, 1
br label %loop_body
loop_entry_2:
; CHECK: DIVERGENT: %i2 =
%i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
%j2 = add i32 %i2, 2
br label %loop_body
loop_body:
; CHECK: DIVERGENT: %i3 =
%i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
br label %loop_latch
loop_latch:
%div = sdiv i32 %tid, %i3
switch i32 %div, label %branch [ i32 1, label %loop_entry_1
i32 2, label %loop_entry_2 ]
branch:
; CHECK: DIVERGENT: %cmp =
; CHECK: DIVERGENT: br i1 %cmp,
%cmp = icmp eq i32 %i3, 5
br i1 %cmp, label %then, label %else
; CHECK: DIVERGENT: br i1 %cmp,
then:
ret i32 0
else:
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

;
; Entry
; |
; v
; -------->H---------
; | | |
; | v |
; | --->T---- |
; | | | |
; | | V |
; S<---R P <---
; ^ ^ |
; | | Div |
; | --- Q <--
; | |
; | v
; -------- U
; |
; v
; Exit
;
; The divergent branch is at Q that exits an irreducible cycle with
; entries T and P nested inside a reducible cycle with header H. R is
; assigned label R, which reaches P. S is a join node with label S. If
; this is propagated to P via H, then P is incorrectly recognized as a
; join, making the inner cycle divergent. P is always executed
; convergently -- either by threads that reconverged at header H, or
; by threads that are still executing the inner cycle. Thus, any PHI
; at P should not be marked divergent.

define amdgpu_kernel void @nested_irreducible(i32 %a, i32 %b, i32 %c) {
; CHECK=LABEL: UniformityInfo for function 'nested_irreducible':
; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
; CHECK: CYCLES WITH DIVERGENT EXIT:
; CHECK: depth=2: entries(P T) R Q
; CHECK: depth=1: entries(H) S P T R Q U
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br label %H

H:
br i1 %cond.uni, label %T, label %P

P:
; CHECK-LABEL: BLOCK P
; CHECK-NOT: DIVERGENT: %pp.phi =
; CHECK-NOT: DIVERGENT: %pp =
%pp.phi = phi i32 [ %a, %H], [ %b, %T ]
%pp = add i32 %b, 1
br label %Q

Q:
; CHECK-LABEL: BLOCK Q
; CHECK-NOT: DIVERGENT: %qq =
; CHECK-NOT: DIVERGENT: %qq.uni =
%qq = add i32 %b, 1
%qq.uni = add i32 %pp.phi, 1
br i1 %cond.div, label %R, label %U

R:
br i1 %cond.uni, label %S, label %T

T:
; CHECK-LABEL: BLOCK T
; CHECK-NOT: DIVERGENT: %tt.phi =
; CHECK-NOT: DIVERGENT: %tt =
%tt.phi = phi i32 [ %qq, %R ], [ %a, %H ]
%tt = add i32 %b, 1
br label %P

S:
; CHECK-LABEL: BLOCK S
; CHECK: DIVERGENT: %ss.phi =
; CHECK-NOT: DIVERGENT: %ss =
%ss.phi = phi i32 [ %qq.uni, %U ], [ %a, %R ]
%ss = add i32 %b, 1
br label %H

U:
br i1 %cond.uni, label %S, label %exit

exit:
; CHECK: DIVERGENT: %ee.div =
; CHECK-NOT: DIVERGENT: %ee =
%ee.div = add i32 %qq.uni, 1
%ee = add i32 %b, 1
ret void
}

;
; Entry
; |
; v
; -->-------->H---------
; | ^ | |
; | | | |
; | | | |
; | | | |
; | | v V
; | R<-------T-->U--->P
; | Div |
; | |
; ----------- Q <-------
; |
; v
; Exit
;
; This is a reducible cycle with a divergent branch at T. Disjoint
; paths eventually join at the header H, which is assigned label H.
; Node P is assigned label U. If the header label were propagated to
; P, it will be incorrectly recgonized as a join. P is always executed
; convergently -- either by threads that reconverged at header H, or
; by threads that diverged at T (and eventually reconverged at H).
; Thus, any PHI at P should not be marked divergent.

define amdgpu_kernel void @header_label_1(i32 %a, i32 %b, i32 %c) {
; CHECK=LABEL: UniformityInfo for function 'header_label_1':
; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
; CHECK: CYCLES WITH DIVERGENT EXIT:
; CHECK: depth=1: entries(H) Q P U T R
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br label %H

H:
br i1 %cond.uni, label %T, label %P

P:
; CHECK-LABEL: BLOCK P
; CHECK-NOT: DIVERGENT: %pp.phi =
; CHECK-NOT: DIVERGENT: %pp =
%pp.phi = phi i32 [ %a, %H], [ %b, %U ]
%pp = add i32 %b, 1
br label %Q

Q:
; CHECK-LABEL: BLOCK Q
; CHECK-NOT: DIVERGENT: %qq =
; CHECK-NOT: DIVERGENT: %qq.uni =
%qq = add i32 %b, 1
%qq.uni = add i32 %pp.phi, 1
br i1 %cond.uni, label %exit, label %H

R:
br label %H

T:
br i1 %cond.div, label %R, label %U

U:
br label %P

exit:
; CHECK-LABEL: BLOCK exit
; CHECK: DIVERGENT: %ee.div =
; CHECK-NOT: DIVERGENT: %ee =
%ee.div = add i32 %qq.uni, 1
%ee = add i32 %b, 1
ret void
}

; entry
; |
; --> H1
; | | \
; | | H2(div)
; | \ / \
; | B C
; ^ \ /
; \------D
; |
; X
;
; This is a reducible cycle with a divergent branch at H2. Disjoint
; paths eventually join at the header D, which is assigned label D.
; Node B is assigned label B. If the header label D were propagated to
; B, it will be incorrectly recgonized as a join. B is always executed
; convergently -- either by threads that reconverged at header H1, or
; by threads that diverge at H2 (and eventually reconverged at H1).
; Thus, any PHI at B should not be marked divergent.

define amdgpu_kernel void @header_label_2(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: UniformityInfo for function 'header_label_2':
; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
entry:
%cond.uni = icmp slt i32 %a, 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.div = icmp slt i32 %tid, 0
br label %H1

H1:
br i1 %cond.uni, label %B, label %H2

H2:
br i1 %cond.div, label %B, label %C

B:
; CHECK-LABEL: BLOCK B
; CHECK-NOT: DIVERGENT: %bb.phi =
%bb.phi = phi i32 [%a, %H1], [%b, %H2]
br label %D

C:
br label %D

D:
; CHECK-LABEL: BLOCK D
; CHECK: DIVERGENT: %dd.phi =
%dd.phi = phi i32 [%a, %B], [%b, %C]
br i1 %cond.uni, label %exit, label %H1

exit:
%ee.1 = add i32 %dd.phi, 1
%ee.2 = add i32 %b, 1
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: bb3:
; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ]
; CHECK: DIVERGENT: br i1 %Guard.bb4, label %bb4, label %bb5

Expand Down
44 changes: 44 additions & 0 deletions llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: DIVERGENT: %phi.h = phi i32 [ 0, %entry ], [ %inc, %C ], [ %inc, %D ], [ %inc, %E ]
; CHECK: DIVERGENT: %tid = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK: DIVERGENT: %div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %inc = add i32 %phi.h, 1
; CHECK: DIVERGENT: br i1 %div.cond, label %C, label %D

define void @nested_loop_extension() {
entry:
%anchor = call token @llvm.experimental.convergence.anchor()
br label %A

A:
%phi.h = phi i32 [ 0, %entry ], [ %inc, %C ], [ %inc, %D ], [ %inc, %E ]
br label %B

B:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
%inc = add i32 %phi.h, 1
br i1 %div.cond, label %C, label %D

C:
br i1 undef, label %A, label %E

D:
br i1 undef, label %A, label %E

E:
%b = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
br i1 undef, label %A, label %F

F:
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0

declare token @llvm.experimental.convergence.anchor()
declare token @llvm.experimental.convergence.loop()

attributes #0 = { nounwind readnone }
17 changes: 9 additions & 8 deletions llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK: DIVERGENT: ptr addrspace(4) %arg0
; CHECK-LABEL: for function 'test_amdgpu_ps':
; CHECK-DAG: DIVERGENT: ptr addrspace(4) %arg0
; CHECK-DAG: DIVERGENT: <2 x i32> %arg3
; CHECK-DAG: DIVERGENT: <3 x i32> %arg4
; CHECK-DAG: DIVERGENT: float %arg5
; CHECK-DAG: DIVERGENT: i32 %arg6
; CHECK-NOT: DIVERGENT
; CHECK: DIVERGENT: <2 x i32> %arg3
; CHECK: DIVERGENT: <3 x i32> %arg4
; CHECK: DIVERGENT: float %arg5
; CHECK: DIVERGENT: i32 %arg6

define amdgpu_ps void @test_amdgpu_ps(ptr addrspace(4) byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
ret void
}

; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_kernel':
; CHECK-LABEL: for function 'test_amdgpu_kernel':
; CHECK-NOT: %arg0
; CHECK-NOT: %arg1
; CHECK-NOT: %arg2
Expand All @@ -24,7 +25,7 @@ define amdgpu_kernel void @test_amdgpu_kernel(ptr addrspace(4) byref([4 x <16 x
ret void
}

; CHECK-LABEL: Divergence Analysis' for function 'test_c':
; CHECK-LABEL: for function 'test_c':
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp2
; CHECK: DIVERGENT: %tmp10 = load volatile float, ptr addrspace(1) %tmp5, align 4
Expand Down
18 changes: 11 additions & 7 deletions llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LOOPDA
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CYCLEDA

; CHECK-LABEL: 'test1':
; CHECK-NEXT: DIVERGENT: i32 %bound
; CHECK: {{^ *}}%counter =
; CHECK: DIVERGENT: i32 %bound
; CYCLEDA: DIVERGENT: %counter =
; LOOPDA: {{^ *}} %counter =
; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound
; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body
; CHECK: {{^ *}}%counter.next =
; CHECK: {{^ *}}%counter.footer =
; CHECK: DIVERGENT: br i1 %break, label %end, label %header
; CYCLEDA: DIVERGENT: %counter.next =
; CYCLEDA: DIVERGENT: %counter.footer =
; LOOPDA: {{^ *}}%counter.next =
; LOOPDA: {{^ *}}%counter.footer =

; Note: %counter is not divergent!

define amdgpu_ps void @test1(i32 %bound) {
entry:
br label %header
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: bb6:
; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ]
; CHECK: DIVERGENT: br i1 %.126.i355.i, label %bb7, label %bb8

; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #0
Expand Down
11 changes: 6 additions & 5 deletions llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; temporal-divergent use of value carried by divergent loop
define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge':
; CHECK-LABEL: for function 'temporal_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.

Expand All @@ -26,7 +27,7 @@ X:

; temporal-divergent use of value carried by divergent loop inside a top-level loop
define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_inloop':
; CHECK-LABEL: for function 'temporal_diverge_inloop':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.

Expand Down Expand Up @@ -58,7 +59,7 @@ Y:

; temporal-uniform use of a valud, definition and users are carried by a surrounding divergent loop
define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Divergence Analysis' for function 'temporal_uniform_indivloop':
; CHECK-LABEL: for function 'temporal_uniform_indivloop':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.

Expand Down Expand Up @@ -90,7 +91,7 @@ Y:

; temporal-divergent use of value carried by divergent loop, user is inside sibling loop
define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser':
; CHECK-LABEL: for function 'temporal_diverge_loopuser':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.

Expand Down Expand Up @@ -120,7 +121,7 @@ Y:

; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop
define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
; CHECK-LABEL: for function 'temporal_diverge_loopuser_nested':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: bb2:
; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ]

; Function Attrs: nounwind readnone speculatable
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: DIVERGENT: %tmp = cmpxchg volatile
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

define i32 @daorder(i32 %n) {
; CHECK-LABEL: Divergence Analysis' for function 'daorder'
; CHECK-LABEL: for function 'daorder'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond = icmp slt i32 %tid, 0
br i1 %cond, label %A, label %B ; divergent
; CHECK: DIVERGENT: %cond =
; CHECK: DIVERGENT: br i1 %cond,
A:
%defAtA = add i32 %n, 1 ; uniform
Expand Down
26 changes: 17 additions & 9 deletions llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Divergence Analysis' for function 'no_diverge'
; CHECK-LABEL: for function 'no_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond = icmp slt i32 %n, 0
br i1 %cond, label %then, label %else ; uniform
; CHECK-NOT: DIVERGENT: %cond =
; CHECK-NOT: DIVERGENT: br i1 %cond,
then:
%a1 = add i32 %a, %tid
Expand All @@ -27,11 +29,12 @@ merge:
; c = b;
; return c; // c is divergent: sync dependent
define i32 @sync(i32 %a, i32 %b) {
; CHECK-LABEL: Divergence Analysis' for function 'sync'
; CHECK-LABEL: for function 'sync'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%cond = icmp slt i32 %tid, 5
br i1 %cond, label %bb2, label %bb3
; CHECK: DIVERGENT: %cond =
; CHECK: DIVERGENT: br i1 %cond,
bb2:
br label %bb3
Expand All @@ -48,11 +51,12 @@ bb3:
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
; return c;
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Divergence Analysis' for function 'mixed'
; CHECK-LABEL: for function 'mixed'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
%cond = icmp slt i32 %tid, 5
br i1 %cond, label %bb6, label %bb2
; CHECK: DIVERGENT: %cond =
; CHECK: DIVERGENT: br i1 %cond,
bb2:
%cond2 = icmp slt i32 %n, 0
Expand All @@ -73,13 +77,14 @@ bb6:

; We conservatively treats all parameters of a __device__ function as divergent.
define i32 @device(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Divergence Analysis' for function 'device'
; CHECK: DIVERGENT: i32 %n
; CHECK: DIVERGENT: i32 %a
; CHECK: DIVERGENT: i32 %b
; CHECK-LABEL: for function 'device'
; CHECK-DAG: DIVERGENT: i32 %n
; CHECK-DAG: DIVERGENT: i32 %a
; CHECK-DAG: DIVERGENT: i32 %b
entry:
%cond = icmp slt i32 %n, 0
br i1 %cond, label %then, label %else
; CHECK: DIVERGENT: %cond =
; CHECK: DIVERGENT: br i1 %cond,
then:
br label %merge
Expand All @@ -98,7 +103,7 @@ merge:
;
; The i defined in the loop is used outside.
define i32 @loop() {
; CHECK-LABEL: Divergence Analysis' for function 'loop'
; CHECK-LABEL: for function 'loop'
entry:
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
br label %loop
Expand All @@ -111,6 +116,7 @@ loop:
loop_exit:
%cond = icmp eq i32 %i, 10
br i1 %cond, label %then, label %else
; CHECK: DIVERGENT: %cond =
; CHECK: DIVERGENT: br i1 %cond,
then:
ret i32 0
Expand All @@ -120,7 +126,7 @@ else:

; Same as @loop, but the loop is in the LCSSA form.
define i32 @lcssa() {
; CHECK-LABEL: Divergence Analysis' for function 'lcssa'
; CHECK-LABEL: for function 'lcssa'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br label %loop
Expand All @@ -135,6 +141,7 @@ loop_exit:
; CHECK: DIVERGENT: %i.lcssa =
%cond = icmp eq i32 %i.lcssa, 10
br i1 %cond, label %then, label %else
; CHECK: DIVERGENT: %cond =
; CHECK: DIVERGENT: br i1 %cond,
then:
ret i32 0
Expand All @@ -144,6 +151,7 @@ else:

; Verifies sync-dependence is computed correctly in the absense of loops.
define i32 @sync_no_loop(i32 %arg) {
; CHECK-LABEL: for function 'sync_no_loop'
entry:
%0 = add i32 %arg, 1
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Divergence Analysis' for function 'hidden_diverge'
; CHECK-LABEL: for function 'hidden_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond.var = icmp slt i32 %tid, 0
br i1 %cond.var, label %B, label %C ; divergent
; CHECK: DIVERGENT: %cond.var =
; CHECK: DIVERGENT: br i1 %cond.var,
B:
%cond.uni = icmp slt i32 %n, 0
br i1 %cond.uni, label %C, label %merge ; uniform
; CHECK-NOT: DIVERGENT: %cond.uni =
; CHECK-NOT: DIVERGENT: br i1 %cond.uni,
C:
%phi.var.hidden = phi i32 [ 1, %entry ], [ 2, %B ]
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

; NOTE: The new pass manager does not fall back on legacy divergence
; analysis even when the function contains an irreducible loop. The
Expand All @@ -24,7 +25,7 @@ target triple = "nvptx64-nvidia-cuda"
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
define i32 @unstructured_loop(i1 %entry_cond) {
; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
; CHECK-LABEL: for function 'unstructured_loop'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
Expand All @@ -46,6 +47,7 @@ loop_latch:
branch:
%cmp = icmp eq i32 %i3, 5
br i1 %cmp, label %then, label %else
; CHECK: DIVERGENT: %cmp =
; CHECK: DIVERGENT: br i1 %cmp,
then:
ret i32 0
Expand Down