15 changes: 15 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@
; GCN-O0-NEXT: Memory SSA
; GCN-O0-NEXT: AMDGPU Annotate Uniform Values
; GCN-O0-NEXT: SI annotate control flow
; GCN-O0-NEXT: Post-Dominator Tree Construction
; GCN-O0-NEXT: Legacy Divergence Analysis
; GCN-O0-NEXT: AMDGPU Rewrite Undef for PHI
; GCN-O0-NEXT: LCSSA Verifier
; GCN-O0-NEXT: Loop-Closed SSA Form Pass
; GCN-O0-NEXT: DummyCGSCCPass
Expand Down Expand Up @@ -264,6 +267,9 @@
; GCN-O1-NEXT: Memory SSA
; GCN-O1-NEXT: AMDGPU Annotate Uniform Values
; GCN-O1-NEXT: SI annotate control flow
; GCN-O1-NEXT: Post-Dominator Tree Construction
; GCN-O1-NEXT: Legacy Divergence Analysis
; GCN-O1-NEXT: AMDGPU Rewrite Undef for PHI
; GCN-O1-NEXT: LCSSA Verifier
; GCN-O1-NEXT: Loop-Closed SSA Form Pass
; GCN-O1-NEXT: DummyCGSCCPass
Expand Down Expand Up @@ -548,6 +554,9 @@
; GCN-O1-OPTS-NEXT: Memory SSA
; GCN-O1-OPTS-NEXT: AMDGPU Annotate Uniform Values
; GCN-O1-OPTS-NEXT: SI annotate control flow
; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis
; GCN-O1-OPTS-NEXT: AMDGPU Rewrite Undef for PHI
; GCN-O1-OPTS-NEXT: LCSSA Verifier
; GCN-O1-OPTS-NEXT: Loop-Closed SSA Form Pass
; GCN-O1-OPTS-NEXT: DummyCGSCCPass
Expand Down Expand Up @@ -840,6 +849,9 @@
; GCN-O2-NEXT: Memory SSA
; GCN-O2-NEXT: AMDGPU Annotate Uniform Values
; GCN-O2-NEXT: SI annotate control flow
; GCN-O2-NEXT: Post-Dominator Tree Construction
; GCN-O2-NEXT: Legacy Divergence Analysis
; GCN-O2-NEXT: AMDGPU Rewrite Undef for PHI
; GCN-O2-NEXT: LCSSA Verifier
; GCN-O2-NEXT: Loop-Closed SSA Form Pass
; GCN-O2-NEXT: Analysis if a function is memory bound
Expand Down Expand Up @@ -1147,6 +1159,9 @@
; GCN-O3-NEXT: Memory SSA
; GCN-O3-NEXT: AMDGPU Annotate Uniform Values
; GCN-O3-NEXT: SI annotate control flow
; GCN-O3-NEXT: Post-Dominator Tree Construction
; GCN-O3-NEXT: Legacy Divergence Analysis
; GCN-O3-NEXT: AMDGPU Rewrite Undef for PHI
; GCN-O3-NEXT: LCSSA Verifier
; GCN-O3-NEXT: Loop-Closed SSA Form Pass
; GCN-O3-NEXT: Analysis if a function is memory bound
Expand Down
64 changes: 37 additions & 27 deletions llvm/test/CodeGen/AMDGPU/loop_break.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s

Expand All @@ -11,15 +12,16 @@ define amdgpu_kernel void @break_loop(i32 %arg) #0 {
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: bb1:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP0:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
; OPT: bb4:
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
; OPT-NEXT: br label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[TMP0]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
Expand All @@ -43,22 +45,25 @@ define amdgpu_kernel void @break_loop(i32 %arg) #0 {
; GCN-NEXT: s_add_i32 s6, s6, 1
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
; GCN-NEXT: s_cmp_gt_i32 s6, -1
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
; GCN-NEXT: ; %bb.2: ; %bb4
; GCN-NEXT: s_cbranch_scc0 .LBB0_3
; GCN-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
; GCN-NEXT: ; implicit-def: $sgpr6
; GCN-NEXT: s_branch .LBB0_4
; GCN-NEXT: .LBB0_3: ; %bb4
; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GCN-NEXT: .LBB0_3: ; %Flow
; GCN-NEXT: .LBB0_4: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GCN-NEXT: s_cbranch_execnz .LBB0_1
; GCN-NEXT: ; %bb.4: ; %bb9
; GCN-NEXT: ; %bb.5: ; %bb9
; GCN-NEXT: s_endpgm
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -88,22 +93,23 @@ define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: bb1:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
; OPT: bb4:
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
; OPT-NEXT: br label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
; OPT: bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: undef_phi_cond_break_loop:
Expand Down Expand Up @@ -179,22 +185,23 @@ define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: bb1:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
; OPT: bb4:
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
; OPT-NEXT: br label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
; OPT: bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: constexpr_phi_cond_break_loop:
Expand Down Expand Up @@ -267,22 +274,23 @@ define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: bb1:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
; OPT: bb4:
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
; OPT-NEXT: br label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
; OPT: bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: true_phi_cond_break_loop:
Expand Down Expand Up @@ -355,22 +363,23 @@ define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: bb1:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
; OPT: bb4:
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
; OPT-NEXT: br label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
; OPT: bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: false_phi_cond_break_loop:
Expand Down Expand Up @@ -445,24 +454,25 @@ define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: bb1:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
; OPT: bb4:
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
; OPT-NEXT: br label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
; OPT-NEXT: [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
; OPT-NEXT: br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
; OPT-NEXT: [[MY_TMP3_INV:%.*]] = xor i1 [[MY_TMP3]], true
; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3_INV]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
; OPT: bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: invert_true_phi_cond_break_loop:
Expand Down
11 changes: 6 additions & 5 deletions llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,13 @@ exit1: ; preds = %LeafBlock, %LeafBlock1

; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
; IR: Flow2:
; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %16)
; IR: %8 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
; IR: %9 = phi i1 [ false, %exit1 ], [ %13, %Flow1 ]
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %17)

; IR: UnifiedReturnBlock:
; IR: %UnifiedRetVal = phi float [ 2.000000e+00, %Flow2 ], [ 1.000000e+00, %exit0 ]
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %11)
; IR: %UnifiedRetVal = phi float [ %8, %Flow2 ], [ 1.000000e+00, %exit0 ]
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %12)
; IR: ret float %UnifiedRetVal
define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
entry:
Expand Down Expand Up @@ -366,7 +367,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; GCN: {{^}}[[FLOW]]:

; GCN: s_or_b64 exec, exec
; GCN: v_mov_b32_e32 v0, 2.0
; GCN: v_mov_b32_e32 v0, s6
; GCN-NOT: s_and_b64 exec, exec
; GCN: v_mov_b32_e32 v0, 1.0

Expand Down
73 changes: 37 additions & 36 deletions llvm/test/CodeGen/AMDGPU/multilevel-break.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Expand All @@ -10,34 +9,32 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
; OPT-NEXT: main_body:
; OPT-NEXT: br label [[LOOP_OUTER:%.*]]
; OPT: LOOP.outer:
; OPT-NEXT: [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP10:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
; OPT-NEXT: [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP4:%.*]], [[FLOW1]] ]
; OPT-NEXT: [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
; OPT-NEXT: [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP3:%.*]], [[FLOW1]] ]
; OPT-NEXT: br label [[LOOP:%.*]]
; OPT: LOOP:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
; OPT-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP4]], [[FLOW]] ]
; OPT-NEXT: [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP5:%.*]], [[FLOW]] ]
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
; OPT-NEXT: [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP3]], [[FLOW]] ]
; OPT-NEXT: [[TMP48:%.*]] = icmp slt i32 [[TMP45]], [[UB:%.*]]
; OPT-NEXT: [[TMP1:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP1]], 0
; OPT-NEXT: [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP1]], 1
; OPT-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
; OPT-NEXT: br i1 [[TMP1]], label [[ENDIF:%.*]], label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[TMP4]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ [[TMP0]], [[LOOP]] ]
; OPT-NEXT: [[TMP5]] = phi i32 [ [[TMP47]], [[ENDIF]] ], [ undef, [[LOOP]] ]
; OPT-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: [[TMP7:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]])
; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP7]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
; OPT-NEXT: [[TMP10]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP6]], i64 [[PHI_BROKEN2]])
; OPT-NEXT: br i1 [[TMP9]], label [[FLOW1]], label [[LOOP]]
; OPT-NEXT: [[TMP3]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ undef, [[LOOP]] ]
; OPT-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
; OPT-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP7:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]])
; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]])
; OPT-NEXT: br i1 [[TMP7]], label [[FLOW1]], label [[LOOP]]
; OPT: Flow1:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
; OPT-NEXT: [[TMP11:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP10]])
; OPT-NEXT: br i1 [[TMP11]], label [[IF:%.*]], label [[LOOP_OUTER]]
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
; OPT-NEXT: br i1 [[TMP9]], label [[IF:%.*]], label [[LOOP_OUTER]]
; OPT: IF:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]])
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
; OPT-NEXT: ret void
; OPT: ENDIF:
; OPT-NEXT: [[TMP47]] = add i32 [[TMP45]], 1
Expand Down Expand Up @@ -118,9 +115,9 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; OPT-NEXT: [[TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: bb1:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW4:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW4]] ]
; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP4:%.*]], [[FLOW4:%.*]] ], [ 0, [[BB:%.*]] ]
; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP2:%.*]], [[FLOW4]] ]
; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
; OPT-NEXT: [[LOAD0:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: br label [[NODEBLOCK:%.*]]
Expand All @@ -131,33 +128,37 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[LOAD0]], 1
; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[CASE1:%.*]], label [[FLOW3:%.*]]
; OPT: Flow3:
; OPT-NEXT: [[TMP0:%.*]] = phi i1 [ [[CMP2:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ]
; OPT-NEXT: [[TMP0:%.*]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE1]] ], [ undef, [[LEAFBLOCK1]] ]
; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP2:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ]
; OPT-NEXT: br label [[FLOW]]
; OPT: LeafBlock:
; OPT-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[LOAD0]], 0
; OPT-NEXT: br i1 [[SWITCHLEAF]], label [[CASE0:%.*]], label [[FLOW5:%.*]]
; OPT: Flow4:
; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP6:%.*]], [[FLOW5]] ], [ [[TMP4:%.*]], [[FLOW]] ]
; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
; OPT-NEXT: [[TMP2]] = phi i32 [ [[TMP9:%.*]], [[FLOW5]] ], [ [[TMP6:%.*]], [[FLOW]] ]
; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP10:%.*]], [[FLOW5]] ], [ [[TMP7:%.*]], [[FLOW]] ]
; OPT-NEXT: [[TMP4]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP3]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP4]])
; OPT-NEXT: br i1 [[TMP5]], label [[BB9:%.*]], label [[BB1]]
; OPT: case0:
; OPT-NEXT: [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[TMP]], [[LOAD1]]
; OPT-NEXT: br label [[FLOW5]]
; OPT: Flow:
; OPT-NEXT: [[TMP4]] = phi i1 [ [[TMP0]], [[FLOW3]] ], [ true, [[NODEBLOCK]] ]
; OPT-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[FLOW3]] ], [ true, [[NODEBLOCK]] ]
; OPT-NEXT: br i1 [[TMP5]], label [[LEAFBLOCK:%.*]], label [[FLOW4]]
; OPT-NEXT: [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW3]] ], [ undef, [[NODEBLOCK]] ]
; OPT-NEXT: [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW3]] ], [ true, [[NODEBLOCK]] ]
; OPT-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[FLOW3]] ], [ true, [[NODEBLOCK]] ]
; OPT-NEXT: br i1 [[TMP8]], label [[LEAFBLOCK:%.*]], label [[FLOW4]]
; OPT: case1:
; OPT-NEXT: [[LOAD2:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; OPT-NEXT: [[CMP2]] = icmp sge i32 [[TMP]], [[LOAD2]]
; OPT-NEXT: br label [[FLOW3]]
; OPT: Flow5:
; OPT-NEXT: [[TMP6]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP4]], [[LEAFBLOCK]] ]
; OPT-NEXT: [[TMP9]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE0]] ], [ undef, [[LEAFBLOCK]] ]
; OPT-NEXT: [[TMP10]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP7]], [[LEAFBLOCK]] ]
; OPT-NEXT: br label [[FLOW4]]
; OPT: bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP4]])
; OPT-NEXT: ret void
;
; GCN-LABEL: multi_if_break_loop:
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Expand All @@ -14,27 +13,28 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: s_and_b64 s[2:3], exec, -1
; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0
; GCN-NEXT: ds_read_b64 v[0:1], v0
; GCN-NEXT: s_and_b64 s[0:1], exec, 0
; GCN-NEXT: s_and_b64 vcc, exec, 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_mov_b64 vcc, s[0:1]
; GCN-NEXT: ; implicit-def: $sgpr2
; GCN-NEXT: s_mov_b64 vcc, vcc
; GCN-NEXT: s_cbranch_vccz .LBB0_4
; GCN-NEXT: .LBB0_2: ; %bb5
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_mov_b64 s[4:5], -1
; GCN-NEXT: s_mov_b64 vcc, s[2:3]
; GCN-NEXT: s_cbranch_vccz .LBB0_1
; GCN-NEXT: s_cmp_lg_u32 s2, 1
; GCN-NEXT: s_mov_b64 s[0:1], -1
; GCN-NEXT: s_cbranch_scc0 .LBB0_1
; GCN-NEXT: ; %bb.3: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_mov_b64 s[0:1], 0
; GCN-NEXT: s_branch .LBB0_1
; GCN-NEXT: .LBB0_4: ; %loop.exit.guard
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
; GCN-NEXT: s_and_b64 vcc, exec, s[0:1]
; GCN-NEXT: s_cbranch_vccz .LBB0_7
; GCN-NEXT: ; %bb.5: ; %bb8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -48,9 +48,9 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
; GCN-NEXT: s_endpgm
; IR-LABEL: @reduced_nested_loop_conditions(
; IR-NEXT: bb:
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]]
; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* [[ARG:%.*]], i32 [[MY_TMP]]
; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]]
; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]], align 4
; IR-NEXT: br label [[BB5:%.*]]
; IR: bb3:
; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]]
Expand Down Expand Up @@ -84,7 +84,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
; IR: bb16:
; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1
; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 [[MY_TMP17]]
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]]
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]], align 4
; IR-NEXT: br label [[BB20]]
; IR: bb20:
; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ]
Expand Down Expand Up @@ -190,19 +190,19 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
; GCN-NEXT: s_endpgm
; IR-LABEL: @nested_loop_conditions(
; IR-NEXT: bb:
; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef
; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9
; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]]
; IR: bb14.lr.ph:
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]]
; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64
; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG:%.*]], i64 [[MY_TMP1]]
; IR-NEXT: [[MY_TMP3:%.*]] = load i64, i64 addrspace(1)* [[MY_TMP2]], align 16
; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0
; IR-NEXT: br label [[BB14:%.*]]
; IR: Flow3:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP21:%.*]])
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20:%.*]])
; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]])
; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
Expand Down Expand Up @@ -244,7 +244,7 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]])
; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]]
; IR: bb18:
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9
; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]]
; IR: bb21:
Expand All @@ -261,20 +261,20 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 [[MY_TMP7]]
; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16
; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0
; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef
; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9
; IR-NEXT: br label [[FLOW1]]
; IR: Flow2:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]])
; IR-NEXT: [[TMP19:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
; IR-NEXT: [[TMP20:%.*]] = extractvalue { i1, i64 } [[TMP19]], 0
; IR-NEXT: [[TMP21]] = extractvalue { i1, i64 } [[TMP19]], 1
; IR-NEXT: br i1 [[TMP20]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]]
; IR-NEXT: [[TMP18:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
; IR-NEXT: [[TMP19:%.*]] = extractvalue { i1, i64 } [[TMP18]], 0
; IR-NEXT: [[TMP20]] = extractvalue { i1, i64 } [[TMP18]], 1
; IR-NEXT: br i1 [[TMP19]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]]
; IR: bb31.loopexit:
; IR-NEXT: br label [[FLOW3]]
; IR: bb31:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef, align 4
; IR-NEXT: ret void
bb:
%my.tmp1134 = load volatile i32, i32 addrspace(1)* undef
Expand Down
103 changes: 103 additions & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-undef-for-phi.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-- -S -amdgpu-rewrite-undef-for-phi %s | FileCheck -check-prefix=OPT %s

define amdgpu_ps float @basic(float inreg %c, i32 %x) #0 {
; OPT-LABEL: @basic(
; OPT-NEXT: entry:
; OPT-NEXT: [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
; OPT-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]]
; OPT: if:
; OPT-NEXT: br label [[END]]
; OPT: end:
; OPT-NEXT: ret float [[C:%.*]]
;
entry:
%cc = icmp slt i32 %x, 0
br i1 %cc, label %if, label %end

if:
br label %end

end:
%c2 = phi float [ undef, %if ], [ %c, %entry ]
ret float %c2
}

define amdgpu_ps float @with_uniform_region_inside(float inreg %c, i32 inreg %d, i32 %x) #0 {
; OPT-LABEL: @with_uniform_region_inside(
; OPT-NEXT: entry:
; OPT-NEXT: [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
; OPT-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]]
; OPT: if:
; OPT-NEXT: [[CC2:%.*]] = icmp slt i32 [[D:%.*]], 0
; OPT-NEXT: br i1 [[CC2]], label [[BB2:%.*]], label [[BB3:%.*]]
; OPT: bb2:
; OPT-NEXT: br label [[END]]
; OPT: bb3:
; OPT-NEXT: [[CC3:%.*]] = icmp slt i32 [[D]], 2
; OPT-NEXT: br i1 [[CC3]], label [[BB4:%.*]], label [[END]]
; OPT: bb4:
; OPT-NEXT: br label [[END]]
; OPT: end:
; OPT-NEXT: ret float [[C:%.*]]
;
entry:
%cc = icmp slt i32 %x, 0
br i1 %cc, label %if, label %end

if:
%cc2 = icmp slt i32 %d, 0
br i1 %cc2, label %bb2, label %bb3

bb2:
br label %end

bb3:
%cc3 = icmp slt i32 %d, 2
br i1 %cc3, label %bb4, label %end

bb4:
br label %end

end:
%c2 = phi float [ undef, %bb2 ], [ %c, %bb3 ], [ undef, %bb4 ], [ %c, %entry ]
ret float %c2
}

define amdgpu_ps float @exclude_backedge(float inreg %c, i32 %x) #0 {
; OPT-LABEL: @exclude_backedge(
; OPT-NEXT: entry:
; OPT-NEXT: [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
; OPT-NEXT: br i1 [[CC]], label [[END:%.*]], label [[LOOP:%.*]]
; OPT: loop:
; OPT-NEXT: [[IND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
; OPT-NEXT: [[C2:%.*]] = phi float [ [[C:%.*]], [[ENTRY]] ], [ undef, [[LOOP]] ]
; OPT-NEXT: [[INC]] = add i32 [[IND]], 1
; OPT-NEXT: [[LOOP_CC:%.*]] = icmp slt i32 [[INC]], 5
; OPT-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[LOOP_END:%.*]]
; OPT: loop_end:
; OPT-NEXT: br label [[END]]
; OPT: end:
; OPT-NEXT: [[R:%.*]] = phi float [ [[C2]], [[LOOP_END]] ], [ [[C]], [[ENTRY]] ]
; OPT-NEXT: ret float [[R]]
;
entry:
%cc = icmp slt i32 %x, 0
br i1 %cc, label %end, label %loop

loop:
%ind = phi i32 [ 0, %entry ], [ %inc, %loop ]
%c2 = phi float [ %c, %entry ], [ undef, %loop ]
%inc = add i32 %ind, 1
%loop_cc = icmp slt i32 %inc, 5
br i1 %loop_cc, label %loop, label %loop_end

loop_end:
br label %end

end:
%r = phi float [ %c2, %loop_end ], [ %c, %entry ]
ret float %r
}

attributes #0 = { nounwind noinline }
116 changes: 58 additions & 58 deletions llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,47 +183,47 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 3
; SI-NEXT: s_branch .LBB3_4
; SI-NEXT: .LBB3_1: ; %Flow6
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_branch .LBB3_3
; SI-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 s[10:11], -1
; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: .LBB3_2: ; %Flow5
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[12:13], 0
; SI-NEXT: .LBB3_3: ; %Flow
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[10:11]
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: .LBB3_2: ; %Flow
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[12:13]
; SI-NEXT: s_cbranch_vccnz .LBB3_8
; SI-NEXT: .LBB3_4: ; %while.cond
; SI-NEXT: .LBB3_3: ; %while.cond
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 s[8:9], -1
; SI-NEXT: s_mov_b64 s[10:11], -1
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 vcc, s[0:1]
; SI-NEXT: s_cbranch_vccz .LBB3_3
; SI-NEXT: ; %bb.5: ; %convex.exit
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[8:9], -1
; SI-NEXT: s_mov_b64 s[10:11], -1
; SI-NEXT: s_mov_b64 vcc, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB3_2
; SI-NEXT: ; %bb.6: ; %if.end
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[10:11], -1
; SI-NEXT: s_mov_b64 vcc, s[4:5]
; SI-NEXT: ; %bb.4: ; %convex.exit
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 vcc, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB3_1
; SI-NEXT: ; %bb.7: ; %if.else
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_mov_b64 s[10:11], 0
; SI-NEXT: ; %bb.5: ; %if.end
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 s[12:13], -1
; SI-NEXT: s_mov_b64 vcc, s[4:5]
; SI-NEXT: s_cbranch_vccz .LBB3_7
; SI-NEXT: ; %bb.6: ; %if.else
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_branch .LBB3_1
; SI-NEXT: s_mov_b64 s[12:13], 0
; SI-NEXT: .LBB3_7: ; %Flow6
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_mov_b64 s[10:11], 0
; SI-NEXT: ; implicit-def: $sgpr8_sgpr9
; SI-NEXT: s_branch .LBB3_2
; SI-NEXT: .LBB3_8: ; %loop.exit.guard4
; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[8:9]
; SI-NEXT: s_cbranch_vccz .LBB3_4
; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[10:11]
; SI-NEXT: s_cbranch_vccz .LBB3_3
; SI-NEXT: ; %bb.9: ; %loop.exit.guard
; SI-NEXT: s_and_b64 vcc, exec, s[12:13]
; SI-NEXT: s_and_b64 vcc, exec, s[8:9]
; SI-NEXT: s_cbranch_vccz .LBB3_13
; SI-NEXT: ; %bb.10: ; %for.cond.preheader
; SI-NEXT: s_cmpk_lt_i32 s14, 0x3e8
Expand Down Expand Up @@ -258,47 +258,47 @@ define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32
; FLAT-NEXT: s_mov_b32 s7, 0xf000
; FLAT-NEXT: s_mov_b32 s6, -1
; FLAT-NEXT: v_mov_b32_e32 v0, 3
; FLAT-NEXT: s_branch .LBB3_4
; FLAT-NEXT: .LBB3_1: ; %Flow6
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_branch .LBB3_3
; FLAT-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 s[10:11], -1
; FLAT-NEXT: s_mov_b64 s[8:9], 0
; FLAT-NEXT: .LBB3_2: ; %Flow5
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[12:13], 0
; FLAT-NEXT: .LBB3_3: ; %Flow
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11]
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: .LBB3_2: ; %Flow
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13]
; FLAT-NEXT: s_cbranch_vccnz .LBB3_8
; FLAT-NEXT: .LBB3_4: ; %while.cond
; FLAT-NEXT: .LBB3_3: ; %while.cond
; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 s[8:9], -1
; FLAT-NEXT: s_mov_b64 s[10:11], -1
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 vcc, s[0:1]
; FLAT-NEXT: s_cbranch_vccz .LBB3_3
; FLAT-NEXT: ; %bb.5: ; %convex.exit
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[8:9], -1
; FLAT-NEXT: s_mov_b64 s[10:11], -1
; FLAT-NEXT: s_mov_b64 vcc, s[2:3]
; FLAT-NEXT: s_cbranch_vccz .LBB3_2
; FLAT-NEXT: ; %bb.6: ; %if.end
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[10:11], -1
; FLAT-NEXT: s_mov_b64 vcc, s[4:5]
; FLAT-NEXT: ; %bb.4: ; %convex.exit
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 vcc, s[2:3]
; FLAT-NEXT: s_cbranch_vccz .LBB3_1
; FLAT-NEXT: ; %bb.7: ; %if.else
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_mov_b64 s[10:11], 0
; FLAT-NEXT: ; %bb.5: ; %if.end
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 s[12:13], -1
; FLAT-NEXT: s_mov_b64 vcc, s[4:5]
; FLAT-NEXT: s_cbranch_vccz .LBB3_7
; FLAT-NEXT: ; %bb.6: ; %if.else
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0
; FLAT-NEXT: s_waitcnt vmcnt(0)
; FLAT-NEXT: s_branch .LBB3_1
; FLAT-NEXT: s_mov_b64 s[12:13], 0
; FLAT-NEXT: .LBB3_7: ; %Flow6
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_mov_b64 s[10:11], 0
; FLAT-NEXT: ; implicit-def: $sgpr8_sgpr9
; FLAT-NEXT: s_branch .LBB3_2
; FLAT-NEXT: .LBB3_8: ; %loop.exit.guard4
; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[8:9]
; FLAT-NEXT: s_cbranch_vccz .LBB3_4
; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1
; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11]
; FLAT-NEXT: s_cbranch_vccz .LBB3_3
; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard
; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13]
; FLAT-NEXT: s_and_b64 vcc, exec, s[8:9]
; FLAT-NEXT: s_cbranch_vccz .LBB3_13
; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader
; FLAT-NEXT: s_cmpk_lt_i32 s14, 0x3e8
Expand Down
284 changes: 134 additions & 150 deletions llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
;
; This test shows a typical case that a PHI(%c2) in join block was treated as uniform
; as it has one unique uniform incoming value plus one additional undef incoming
; value. This case might suffer from correctness issue if %c2 was assigned a scalar
; register but meanwhile dead in %if. The problem is solved by replacing the %undef
; with %c (thus replacing %c2 with %c in this example).


define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x, i32 %y) #0 {
; GCN-LABEL: uniform_phi_with_undef:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_cmp_lt_i32_e64 s2, v2, v1
; GCN-NEXT: s_mov_b32 s1, exec_lo
; GCN-NEXT: s_and_b32 s2, s1, s2
; GCN-NEXT: s_mov_b32 exec_lo, s2
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
; GCN-NEXT: s_mov_b32 s2, 2.0
; GCN-NEXT: v_div_scale_f32 v1, s3, s2, s2, v0
; GCN-NEXT: v_rcp_f32_e64 v2, v1
; GCN-NEXT: s_mov_b32 s3, 1.0
; GCN-NEXT: v_fma_f32 v3, -v1, v2, s3
; GCN-NEXT: v_fmac_f32_e64 v2, v3, v2
; GCN-NEXT: v_div_scale_f32 v3, vcc_lo, v0, s2, v0
; GCN-NEXT: v_mul_f32_e64 v4, v3, v2
; GCN-NEXT: v_fma_f32 v5, -v1, v4, v3
; GCN-NEXT: v_fmac_f32_e64 v4, v5, v2
; GCN-NEXT: v_fma_f32 v1, -v1, v4, v3
; GCN-NEXT: v_div_fmas_f32 v1, v1, v2, v4
; GCN-NEXT: v_div_fixup_f32 v0, v1, s2, v0
; GCN-NEXT: .LBB0_2: ; %end
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GCN-NEXT: v_add_f32_e64 v0, v0, s0
; GCN-NEXT: ; return to shader part epilog
entry:
%cc = icmp slt i32 %y, %x
br i1 %cc, label %if, label %end

if:
%v.if = fdiv float %v, 2.0
br label %end

end:
%v2 = phi float [ %v.if, %if ], [ %v, %entry ]
%c2 = phi float [ undef, %if ], [ %c, %entry ]
%r = fadd float %v2, %c2
ret float %r
}

attributes #0 = { nounwind optnone noinline }
15 changes: 8 additions & 7 deletions llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,28 +73,29 @@ define amdgpu_ps float @else2(i32 %z, float %v) #0 {
; SI-NEXT: bb.1.Flow:
; SI-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; SI-NEXT: {{ $}}
; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %15:vgpr_32, %bb.0, %4, %bb.3
; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, %5, %bb.3
; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, [[COPY]], %bb.3
; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; SI-NEXT: S_BRANCH %bb.2
; SI-NEXT: {{ $}}
; SI-NEXT: bb.2.if:
; SI-NEXT: successors: %bb.4(0x80000000)
; SI-NEXT: {{ $}}
; SI-NEXT: %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; SI-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; SI-NEXT: S_BRANCH %bb.4
; SI-NEXT: {{ $}}
; SI-NEXT: bb.3.else:
; SI-NEXT: successors: %bb.1(0x80000000)
; SI-NEXT: {{ $}}
; SI-NEXT: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; SI-NEXT: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; SI-NEXT: S_BRANCH %bb.1
; SI-NEXT: {{ $}}
; SI-NEXT: bb.4.end:
; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.1, %3, %bb.2
; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, %3, %bb.2
; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %4, %bb.2
; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, %4, %bb.2
; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; SI-NEXT: %14:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI1]], 0, killed [[PHI2]], 0, 0, implicit $mode, implicit $exec
; SI-NEXT: $vgpr0 = COPY killed %14
; SI-NEXT: %15:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI2]], 0, killed [[PHI3]], 0, 0, implicit $mode, implicit $exec
; SI-NEXT: $vgpr0 = COPY killed %15
; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0
main_body:
%cc = icmp sgt i32 %z, 5
Expand Down
155 changes: 155 additions & 0 deletions llvm/test/CodeGen/AMDGPU/while-break.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN

define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 {
; GCN-LABEL: while_break:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_mov_b32 s1, -1
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %Flow2
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GCN-NEXT: s_and_b32 s2, exec_lo, s3
; GCN-NEXT: s_or_b32 s0, s2, s0
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; GCN-NEXT: s_cbranch_execz .LBB0_8
; GCN-NEXT: .LBB0_2: ; %header
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_add_i32 s1, s1, 1
; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: v_cmp_ge_i32_e32 vcc_lo, s1, v2
; GCN-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GCN-NEXT: s_xor_b32 s3, exec_lo, s3
; GCN-NEXT: ; %bb.3: ; %else
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v3
; GCN-NEXT: s_and_b32 s2, vcc_lo, exec_lo
; GCN-NEXT: ; %bb.4: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_andn2_saveexec_b32 s3, s3
; GCN-NEXT: ; %bb.5: ; %if
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
; GCN-NEXT: s_or_b32 s2, s2, exec_lo
; GCN-NEXT: ; %bb.6: ; %Flow1
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GCN-NEXT: s_mov_b32 s3, -1
; GCN-NEXT: s_and_saveexec_b32 s4, s2
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.7: ; %latch
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v0
; GCN-NEXT: s_orn2_b32 s3, vcc_lo, exec_lo
; GCN-NEXT: s_branch .LBB0_1
; GCN-NEXT: .LBB0_8: ; %end
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: ; return to shader part epilog
entry:
br label %header

header:
%v.1 = phi float [ %v, %entry ], [ %v.2, %latch ]
%ind = phi i32 [ 0, %entry], [ %ind.inc, %latch ]
%cc = icmp slt i32 %ind, %x
br i1 %cc, label %if, label %else

if:
%v.if = fadd float %v.1, 1.0
br label %latch

else:
%cc2 = icmp slt i32 %ind, %y
br i1 %cc2, label %latch, label %end

latch:
%v.2 = phi float [ %v.if, %if ], [ %v.1, %else ]
%ind.inc = add i32 %ind, 1
%cc3 = icmp slt i32 %ind, %z
br i1 %cc3, label %end, label %header

end:
%r = phi float [ %v.2, %latch ], [ %v.1, %else ]
ret float %r
}

; Just different dfs order from while_break.
define amdgpu_ps float @while_break2(i32 %z, float %v, i32 %x, i32 %y) #0 {
; GCN-LABEL: while_break2:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_mov_b32 s1, -1
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_branch .LBB1_2
; GCN-NEXT: .LBB1_1: ; %Flow2
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GCN-NEXT: s_and_b32 s2, exec_lo, s3
; GCN-NEXT: s_or_b32 s0, s2, s0
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; GCN-NEXT: s_cbranch_execz .LBB1_8
; GCN-NEXT: .LBB1_2: ; %header
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_add_i32 s1, s1, 1
; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: v_cmp_ge_i32_e32 vcc_lo, s1, v2
; GCN-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GCN-NEXT: s_xor_b32 s3, exec_lo, s3
; GCN-NEXT: ; %bb.3: ; %if
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
; GCN-NEXT: s_mov_b32 s2, exec_lo
; GCN-NEXT: ; %bb.4: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_andn2_saveexec_b32 s3, s3
; GCN-NEXT: ; %bb.5: ; %else
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v3
; GCN-NEXT: s_andn2_b32 s2, s2, exec_lo
; GCN-NEXT: s_and_b32 s4, vcc_lo, exec_lo
; GCN-NEXT: s_or_b32 s2, s2, s4
; GCN-NEXT: ; %bb.6: ; %Flow1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GCN-NEXT: s_mov_b32 s3, -1
; GCN-NEXT: s_and_saveexec_b32 s4, s2
; GCN-NEXT: s_cbranch_execz .LBB1_1
; GCN-NEXT: ; %bb.7: ; %latch
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v0
; GCN-NEXT: s_orn2_b32 s3, vcc_lo, exec_lo
; GCN-NEXT: s_branch .LBB1_1
; GCN-NEXT: .LBB1_8: ; %end
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: ; return to shader part epilog
entry:
br label %header

header:
%v.1 = phi float [ %v, %entry ], [ %v.2, %latch ]
%ind = phi i32 [ 0, %entry], [ %ind.inc, %latch ]
%cc = icmp slt i32 %ind, %x
br i1 %cc, label %else, label %if

if:
%v.if = fadd float %v.1, 1.0
br label %latch

else:
%cc2 = icmp slt i32 %ind, %y
br i1 %cc2, label %latch, label %end

latch:
%v.2 = phi float [ %v.if, %if ], [ %v.1, %else ]
%ind.inc = add i32 %ind, 1
%cc3 = icmp slt i32 %ind, %z
br i1 %cc3, label %end, label %header

end:
%r = phi float [ %v.2, %latch ], [ %v.1, %else ]
ret float %r
}

attributes #0 = { nounwind }
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: LOOP.HEADER:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW3:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[FLOW3:%.*]] ]
; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]]
Expand All @@ -49,8 +49,8 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52
; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
; CHECK: Flow2:
; CHECK-NEXT: [[TMP3]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP6:%.*]], [[FLOW]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP7:%.*]], [[FLOW]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP9:%.*]], [[FLOW]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]]
; CHECK: INNER_LOOP:
; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ]
Expand All @@ -66,19 +66,20 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
; CHECK-NEXT: [[LOAD13:%.*]] = icmp uge i32 [[TMP16]], 271
; CHECK-NEXT: br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW1:%.*]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ]
; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW4:%.*]], label [[LOOP_HEADER]]
; CHECK-NEXT: [[TMP5]] = phi i32 [ [[TMP3]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW2]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[FLOW4:%.*]], label [[LOOP_HEADER]]
; CHECK: Flow4:
; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]]
; CHECK-NEXT: br i1 [[TMP8:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]]
; CHECK: bb64:
; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #[[ATTR0]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW2]]
; CHECK-NEXT: [[TMP7]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP10]], label [[BB18]], label [[FLOW2]]
; CHECK: INCREMENT_I:
; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1
; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336
Expand Down
57 changes: 29 additions & 28 deletions llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,87 +14,88 @@ define i1 @test_nested(i32 %x, i1 %b1, i1 %b2, i1 %b3) {
; CHECK-NEXT: [[B3_INV:%.*]] = xor i1 [[B3:%.*]], true
; CHECK-NEXT: br label [[OUTER_LOOP_HEADER:%.*]]
; CHECK: Flow12:
; CHECK-NEXT: br i1 [[TMP2:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]]
; CHECK-NEXT: br i1 [[TMP3:%.*]], label [[EXIT_TRUE:%.*]], label [[FLOW13:%.*]]
; CHECK: exit.true:
; CHECK-NEXT: br label [[FLOW13]]
; CHECK: Flow13:
; CHECK-NEXT: br i1 [[TMP1:%.*]], label [[EXIT_FALSE:%.*]], label [[EXIT:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, [[EXIT_TRUE]] ], [ undef, [[FLOW12:%.*]] ]
; CHECK-NEXT: br i1 [[TMP2:%.*]], label [[EXIT_FALSE:%.*]], label [[EXIT:%.*]]
; CHECK: exit.false:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: outer.loop.header:
; CHECK-NEXT: br i1 [[B1:%.*]], label [[OUTER_LOOP_BODY:%.*]], label [[FLOW3:%.*]]
; CHECK: outer.loop.body:
; CHECK-NEXT: br label [[INNER_LOOP_HEADER:%.*]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP1]] = phi i1 [ [[TMP11:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP2]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP0]], label [[FLOW12:%.*]], label [[OUTER_LOOP_HEADER]]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW11:%.*]] ], [ true, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP2]] = phi i1 [ [[TMP12:%.*]], [[FLOW11]] ], [ false, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[FLOW11]] ], [ true, [[OUTER_LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP1]], label [[FLOW12]], label [[OUTER_LOOP_HEADER]]
; CHECK: inner.loop.header:
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW4:%.*]] ], [ false, [[OUTER_LOOP_BODY]] ]
; CHECK-NEXT: br i1 [[B2:%.*]], label [[INNER_LOOP_BODY:%.*]], label [[FLOW4]]
; CHECK: Flow6:
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_LATCH:%.*]] ], [ true, [[LEAFBLOCK:%.*]] ]
; CHECK-NEXT: br label [[FLOW5:%.*]]
; CHECK: Flow7:
; CHECK-NEXT: br i1 [[TMP9:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]]
; CHECK-NEXT: br i1 [[TMP10:%.*]], label [[INNER_LOOP_END:%.*]], label [[FLOW8:%.*]]
; CHECK: inner.loop.end:
; CHECK-NEXT: br label [[FLOW8]]
; CHECK: inner.loop.body:
; CHECK-NEXT: br i1 [[B3_INV]], label [[INNER_LOOP_BODY_ELSE:%.*]], label [[FLOW:%.*]]
; CHECK: inner.loop.body.else:
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ]
; CHECK-NEXT: br i1 [[TMP5]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INNER_LOOP_BODY_ELSE]] ], [ true, [[INNER_LOOP_BODY]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[INNER_LOOP_BODY_THEN:%.*]], label [[INNER_LOOP_COND:%.*]]
; CHECK: inner.loop.body.then:
; CHECK-NEXT: br label [[INNER_LOOP_COND]]
; CHECK: Flow4:
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP16:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP17:%.*]], [[FLOW5]] ], [ [[TMP3]], [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP9]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]]
; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[TMP17:%.*]], [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP18:%.*]], [[FLOW5]] ], [ [[TMP4]], [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP19:%.*]], [[FLOW5]] ], [ false, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP10]] = phi i1 [ false, [[FLOW5]] ], [ true, [[INNER_LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP7]], label [[FLOW7:%.*]], label [[INNER_LOOP_HEADER]]
; CHECK: inner.loop.cond:
; CHECK-NEXT: br label [[NODEBLOCK:%.*]]
; CHECK: NodeBlock:
; CHECK-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[X:%.*]], 1
; CHECK-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK]], label [[FLOW5]]
; CHECK: Flow8:
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ]
; CHECK-NEXT: br i1 [[TMP8]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]]
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ true, [[INNER_LOOP_END]] ], [ false, [[FLOW7]] ]
; CHECK-NEXT: br i1 [[TMP9]], label [[LEAFBLOCK1:%.*]], label [[FLOW9:%.*]]
; CHECK: LeafBlock1:
; CHECK-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[X]], 1
; CHECK-NEXT: br i1 [[SWITCHLEAF2]], label [[INNER_LOOP_BREAK:%.*]], label [[FLOW10:%.*]]
; CHECK: LeafBlock:
; CHECK-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[X]], 0
; CHECK-NEXT: br i1 [[SWITCHLEAF]], label [[INNER_LOOP_LATCH]], label [[FLOW6:%.*]]
; CHECK: Flow9:
; CHECK-NEXT: [[TMP11]] = phi i1 [ [[TMP13:%.*]], [[FLOW10]] ], [ [[TMP7]], [[FLOW8]] ]
; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ [[TMP14:%.*]], [[FLOW10]] ], [ [[TMP10]], [[FLOW8]] ]
; CHECK-NEXT: br i1 [[TMP12]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]]
; CHECK-NEXT: [[TMP12]] = phi i1 [ [[TMP14:%.*]], [[FLOW10]] ], [ [[TMP8]], [[FLOW8]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ [[TMP11]], [[FLOW8]] ]
; CHECK-NEXT: br i1 [[TMP13]], label [[OUTER_LOOP_CLEANUP:%.*]], label [[FLOW11]]
; CHECK: inner.loop.break:
; CHECK-NEXT: br label [[FLOW10]]
; CHECK: Flow10:
; CHECK-NEXT: [[TMP13]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ]
; CHECK-NEXT: [[TMP14]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP10]], [[LEAFBLOCK1]] ]
; CHECK-NEXT: [[TMP14]] = phi i1 [ false, [[INNER_LOOP_BREAK]] ], [ true, [[LEAFBLOCK1]] ]
; CHECK-NEXT: [[TMP15]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP11]], [[LEAFBLOCK1]] ]
; CHECK-NEXT: br label [[FLOW9]]
; CHECK: outer.loop.cleanup:
; CHECK-NEXT: br label [[OUTER_LOOP_LATCH:%.*]]
; CHECK: Flow11:
; CHECK-NEXT: [[TMP15]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ]
; CHECK-NEXT: [[TMP16]] = phi i1 [ false, [[OUTER_LOOP_LATCH]] ], [ true, [[FLOW9]] ]
; CHECK-NEXT: br label [[FLOW3]]
; CHECK: outer.loop.latch:
; CHECK-NEXT: br label [[FLOW11]]
; CHECK: Flow5:
; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP4]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
; CHECK-NEXT: [[TMP17]] = phi i1 [ [[TMP4]], [[FLOW6]] ], [ [[TMP3]], [[NODEBLOCK]] ]
; CHECK-NEXT: [[TMP18]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
; CHECK-NEXT: [[TMP17]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
; CHECK-NEXT: [[TMP18]] = phi i1 [ [[TMP5]], [[FLOW6]] ], [ [[TMP4]], [[NODEBLOCK]] ]
; CHECK-NEXT: [[TMP19]] = phi i1 [ false, [[FLOW6]] ], [ true, [[NODEBLOCK]] ]
; CHECK-NEXT: br label [[FLOW4]]
; CHECK: inner.loop.latch:
; CHECK-NEXT: br label [[FLOW6]]
; CHECK: exit:
; CHECK-NEXT: [[R:%.*]] = phi i1 [ true, [[FLOW13]] ], [ false, [[EXIT_FALSE]] ]
; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], [[FLOW13]] ], [ false, [[EXIT_FALSE]] ]
; CHECK-NEXT: ret i1 [[R]]
;
entry:
Expand Down
9 changes: 5 additions & 4 deletions llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@ define void @test1() {
; CHECK: Flow:
; CHECK-NEXT: br label [[FLOW1:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CTR_NEXT:%.*]], [[FLOW1]] ]
; CHECK-NEXT: [[CTR_NEXT]] = add i32 [[CTR]], 1
; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW1]] ]
; CHECK-NEXT: [[CTR_NEXT:%.*]] = add i32 [[CTR]], 1
; CHECK-NEXT: br i1 undef, label [[LOOP_A:%.*]], label [[FLOW1]]
; CHECK: loop.a:
; CHECK-NEXT: br i1 undef, label [[LOOP_B:%.*]], label [[FLOW:%.*]]
; CHECK: loop.b:
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow1:
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[FLOW]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK-NEXT: [[TMP0]] = phi i32 [ [[CTR_NEXT]], [[FLOW]] ], [ undef, [[LOOP]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[FLOW]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,28 @@ define void @blam(i32 addrspace(1)* nocapture %arg, float %arg1, float %arg2) {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[TMP1:%.*]], [[FLOW1:%.*]] ]
; CHECK-NEXT: [[TMP:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[TMP2:%.*]], [[FLOW1:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp ult float [[ARG1:%.*]], 3.500000e+00
; CHECK-NEXT: [[TMP4_INV:%.*]] = xor i1 [[TMP4]], true
; CHECK-NEXT: br i1 [[TMP4_INV]], label [[BB5:%.*]], label [[FLOW:%.*]]
; CHECK: bb5:
; CHECK-NEXT: [[TMP6:%.*]] = fcmp uge float 0.000000e+00, [[ARG2:%.*]]
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP6]], [[BB5]] ], [ [[TMP4]], [[BB3]] ]
; CHECK-NEXT: br i1 [[TMP0]], label [[BB7:%.*]], label [[FLOW1]]
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 15, [[BB5]] ], [ undef, [[BB3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP6]], [[BB5]] ], [ [[TMP4]], [[BB3]] ]
; CHECK-NEXT: br i1 [[TMP1]], label [[BB7:%.*]], label [[FLOW1]]
; CHECK: bb7:
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP]], 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge i64 [[TMP8]], 5
; CHECK-NEXT: br label [[FLOW1]]
; CHECK: Flow1:
; CHECK-NEXT: [[TMP1]] = phi i64 [ [[TMP8]], [[BB7]] ], [ undef, [[FLOW]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ 255, [[BB7]] ], [ 15, [[FLOW]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP9]], [[BB7]] ], [ true, [[FLOW]] ]
; CHECK-NEXT: br i1 [[TMP3]], label [[BB10:%.*]], label [[BB3]]
; CHECK-NEXT: [[TMP2]] = phi i64 [ [[TMP8]], [[BB7]] ], [ undef, [[FLOW]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 255, [[BB7]] ], [ [[TMP0]], [[FLOW]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP9]], [[BB7]] ], [ true, [[FLOW]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[BB10:%.*]], label [[BB3]]
; CHECK: bb10:
; CHECK-NEXT: store i32 [[TMP2]], i32 addrspace(1)* [[ARG:%.*]], align 4
; CHECK-NEXT: store i32 [[TMP3]], i32 addrspace(1)* [[ARG:%.*]], align 4
; CHECK-NEXT: ret void
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,23 @@ define void @irreducible(i1 %PredEntry, i1 %PredB1, i1 %PredB2, i1 %PredB3, i1 %
; CHECK: B2:
; CHECK-NEXT: br i1 [[PREDB2_INV]], label [[B3:%.*]], label [[FLOW3:%.*]]
; CHECK: Flow2:
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP4:%.*]], [[FLOW3]] ], [ true, [[FLOW1]] ]
; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[IRR_GUARD]]
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW3]] ], [ undef, [[FLOW1]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP6:%.*]], [[FLOW3]] ], [ true, [[FLOW1]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[EXIT:%.*]], label [[IRR_GUARD]]
; CHECK: B3:
; CHECK-NEXT: br label [[FLOW3]]
; CHECK: B4:
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP4]] = phi i1 [ false, [[B3]] ], [ true, [[B2]] ]
; CHECK-NEXT: [[TMP5]] = phi i1 [ [[PREDB3:%.*]], [[B3]] ], [ undef, [[B2]] ]
; CHECK-NEXT: [[TMP6]] = phi i1 [ false, [[B3]] ], [ true, [[B2]] ]
; CHECK-NEXT: br label [[FLOW2]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: irr.guard:
; CHECK-NEXT: [[GUARD_B1:%.*]] = phi i1 [ [[PREDENTRY:%.*]], [[ENTRY:%.*]] ], [ [[PREDB3:%.*]], [[FLOW2]] ]
; CHECK-NEXT: [[TMP5:%.*]] = xor i1 [[GUARD_B1]], true
; CHECK-NEXT: br i1 [[TMP5]], label [[B4]], label [[FLOW]]
; CHECK-NEXT: [[GUARD_B1:%.*]] = phi i1 [ [[PREDENTRY:%.*]], [[ENTRY:%.*]] ], [ [[TMP3]], [[FLOW2]] ]
; CHECK-NEXT: [[GUARD_B1_INV:%.*]] = xor i1 [[GUARD_B1]], true
; CHECK-NEXT: br i1 [[GUARD_B1_INV]], label [[B4]], label [[FLOW]]
;
{
entry:
Expand Down
56 changes: 32 additions & 24 deletions llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,31 +40,36 @@ define void @irreducible_mountain_bug(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3
; CHECK: while.cond:
; CHECK-NEXT: br i1 [[PRED3_INV]], label [[LOR_RHS:%.*]], label [[FLOW12:%.*]]
; CHECK: Flow7:
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[PRED7:%.*]], [[COND_END61:%.*]] ], [ false, [[IRR_GUARD:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[COND_END61]] ], [ true, [[IRR_GUARD]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[COND_TRUE49:%.*]], label [[FLOW8:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[COND_END61:%.*]] ], [ undef, [[IRR_GUARD:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[COND_END61]] ], [ undef, [[IRR_GUARD]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[PRED7:%.*]], [[COND_END61]] ], [ false, [[IRR_GUARD]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[COND_END61]] ], [ true, [[IRR_GUARD]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[COND_TRUE49:%.*]], label [[FLOW8:%.*]]
; CHECK: cond.true49:
; CHECK-NEXT: br label [[FLOW8]]
; CHECK: Flow8:
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[COND_TRUE49]] ], [ true, [[FLOW7:%.*]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[PRED4_INV]], [[COND_TRUE49]] ], [ [[TMP3]], [[FLOW7]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[WHILE_BODY63:%.*]], label [[FLOW9:%.*]]
; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[COND_TRUE49]] ], [ [[TMP3]], [[FLOW7:%.*]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[COND_TRUE49]] ], [ [[TMP4]], [[FLOW7]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[PRED4_INV]], [[COND_TRUE49]] ], [ [[TMP5]], [[FLOW7]] ]
; CHECK-NEXT: br i1 [[TMP9]], label [[WHILE_BODY63:%.*]], label [[FLOW9:%.*]]
; CHECK: while.body63:
; CHECK-NEXT: br i1 [[PRED5_INV]], label [[WHILE_COND47:%.*]], label [[FLOW10:%.*]]
; CHECK: Flow9:
; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ true, [[FLOW10]] ], [ false, [[FLOW8]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[FLOW10]] ], [ [[TMP5]], [[FLOW8]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP15:%.*]], [[FLOW10]] ], [ true, [[FLOW8]] ]
; CHECK-NEXT: [[DOTINV11:%.*]] = xor i1 [[TMP7]], true
; CHECK-NEXT: [[DOTINV:%.*]] = xor i1 [[TMP8]], true
; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_EXIT_GUARD1:%.*]], label [[IRR_GUARD]]
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP22:%.*]], [[FLOW10]] ], [ undef, [[FLOW8]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[FLOW10]] ], [ undef, [[FLOW8]] ]
; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ true, [[FLOW10]] ], [ [[TMP7]], [[FLOW8]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[FLOW10]] ], [ [[TMP8]], [[FLOW8]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi i1 [ [[TMP23:%.*]], [[FLOW10]] ], [ true, [[FLOW8]] ]
; CHECK-NEXT: [[DOTINV11:%.*]] = xor i1 [[TMP12]], true
; CHECK-NEXT: [[DOTINV:%.*]] = xor i1 [[TMP13]], true
; CHECK-NEXT: br i1 [[TMP14]], label [[LOOP_EXIT_GUARD1:%.*]], label [[IRR_GUARD]]
; CHECK: while.cond47:
; CHECK-NEXT: br label [[FLOW10]]
; CHECK: cond.end61:
; CHECK-NEXT: br label [[FLOW7]]
; CHECK: Flow14:
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ false, [[FLOW15:%.*]] ], [ true, [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ [[TMP14:%.*]], [[FLOW15]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: [[TMP15:%.*]] = phi i1 [ [[TMP20:%.*]], [[FLOW15:%.*]] ], [ undef, [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: [[TMP16:%.*]] = phi i1 [ [[TMP21:%.*]], [[FLOW15]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: br label [[FLOW13:%.*]]
; CHECK: if.then69:
; CHECK-NEXT: br label [[FLOW15]]
Expand Down Expand Up @@ -97,25 +102,28 @@ define void @irreducible_mountain_bug(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: Flow12:
; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ false, [[LOR_RHS]] ], [ true, [[WHILE_COND]] ]
; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[PRED9:%.*]], [[LOR_RHS]] ], [ [[PRED3]], [[WHILE_COND]] ]
; CHECK-NEXT: br i1 [[TMP13]], label [[IRR_GUARD]], label [[FLOW13]]
; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ true, [[LOR_RHS]] ], [ undef, [[WHILE_COND]] ]
; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[LOR_RHS]] ], [ true, [[WHILE_COND]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[PRED9:%.*]], [[LOR_RHS]] ], [ [[PRED3]], [[WHILE_COND]] ]
; CHECK-NEXT: br i1 [[TMP19]], label [[IRR_GUARD]], label [[FLOW13]]
; CHECK: irr.guard:
; CHECK-NEXT: [[GUARD_COND_TRUE49:%.*]] = phi i1 [ [[PRED6:%.*]], [[FLOW9]] ], [ [[TMP12]], [[FLOW12]] ]
; CHECK-NEXT: [[GUARD_COND_TRUE49:%.*]] = phi i1 [ [[TMP10]], [[FLOW9]] ], [ [[TMP18]], [[FLOW12]] ]
; CHECK-NEXT: [[GUARD_COND_TRUE49_INV:%.*]] = xor i1 [[GUARD_COND_TRUE49]], true
; CHECK-NEXT: br i1 [[GUARD_COND_TRUE49_INV]], label [[COND_END61]], label [[FLOW7]]
; CHECK: Flow15:
; CHECK-NEXT: [[TMP14]] = phi i1 [ [[PRED8:%.*]], [[IF_THEN69:%.*]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD2:%.*]] ]
; CHECK-NEXT: [[TMP20]] = phi i1 [ false, [[IF_THEN69:%.*]] ], [ [[TMP11]], [[LOOP_EXIT_GUARD2:%.*]] ]
; CHECK-NEXT: [[TMP21]] = phi i1 [ [[PRED8:%.*]], [[IF_THEN69]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD2]] ]
; CHECK-NEXT: br label [[FLOW14:%.*]]
; CHECK: loop.exit.guard:
; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[WHILE_END76:%.*]], label [[FLOW6]]
; CHECK-NEXT: br i1 [[TMP24:%.*]], label [[WHILE_END76:%.*]], label [[FLOW6]]
; CHECK: Flow10:
; CHECK-NEXT: [[TMP15]] = phi i1 [ false, [[WHILE_COND47]] ], [ true, [[WHILE_BODY63]] ]
; CHECK-NEXT: [[TMP22]] = phi i1 [ [[PRED6:%.*]], [[WHILE_COND47]] ], [ undef, [[WHILE_BODY63]] ]
; CHECK-NEXT: [[TMP23]] = phi i1 [ false, [[WHILE_COND47]] ], [ true, [[WHILE_BODY63]] ]
; CHECK-NEXT: br label [[FLOW9]]
; CHECK: Flow13:
; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW14]] ], [ true, [[FLOW12]] ]
; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP11]], [[FLOW14]] ], [ true, [[FLOW12]] ]
; CHECK-NEXT: br i1 [[TMP17]], label [[LOOP_EXIT_GUARD:%.*]], label [[WHILE_COND]]
; CHECK-NEXT: [[TMP24]] = phi i1 [ [[TMP15]], [[FLOW14]] ], [ [[TMP17]], [[FLOW12]] ]
; CHECK-NEXT: [[TMP25:%.*]] = phi i1 [ [[TMP16]], [[FLOW14]] ], [ true, [[FLOW12]] ]
; CHECK-NEXT: br i1 [[TMP25]], label [[LOOP_EXIT_GUARD:%.*]], label [[WHILE_COND]]
; CHECK: loop.exit.guard1:
; CHECK-NEXT: br i1 [[DOTINV]], label [[LOOP_EXIT_GUARD2]], label [[FLOW14]]
; CHECK: loop.exit.guard2:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,18 @@ define void @exiting-block(i1 %PredH1, i1 %PredB2, i1 %PredB1, i1 %PredH2) {
; CHECK: H1:
; CHECK-NEXT: br i1 [[PREDH1_INV]], label [[B1:%.*]], label [[FLOW3:%.*]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[PREDB1:%.*]], [[B1]] ], [ [[PREDH1]], [[H1]] ]
; CHECK-NEXT: br i1 [[TMP0]], label [[H2:%.*]], label [[FLOW4:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, [[B1]] ], [ undef, [[H1]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[PREDB1:%.*]], [[B1]] ], [ [[PREDH1]], [[H1]] ]
; CHECK-NEXT: br i1 [[TMP1]], label [[H2:%.*]], label [[FLOW4:%.*]]
; CHECK: H2:
; CHECK-NEXT: br i1 [[PREDH2:%.*]], label [[B2:%.*]], label [[FLOW:%.*]]
; CHECK: B2:
; CHECK-NEXT: br i1 [[PREDB2_INV]], label [[L2:%.*]], label [[FLOW2:%.*]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[H2]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP4:%.*]], [[FLOW2]] ], [ true, [[H2]] ]
; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_EXIT_GUARD1:%.*]], label [[H2]]
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[FLOW2]] ], [ undef, [[H2]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[H2]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW2]] ], [ true, [[H2]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_EXIT_GUARD1:%.*]], label [[H2]]
; CHECK: L2:
; CHECK-NEXT: br label [[FLOW2]]
; CHECK: L1:
Expand All @@ -49,19 +51,20 @@ define void @exiting-block(i1 %PredH1, i1 %PredB2, i1 %PredB1, i1 %PredH2) {
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: Flow5:
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[L1:%.*]] ], [ true, [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ undef, [[L1:%.*]] ], [ [[TMP2]], [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[L1]] ], [ true, [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: br label [[FLOW4]]
; CHECK: loop.exit.guard:
; CHECK-NEXT: br i1 [[TMP5:%.*]], label [[C:%.*]], label [[EXIT]]
; CHECK-NEXT: br i1 [[TMP8:%.*]], label [[C:%.*]], label [[EXIT]]
; CHECK: Flow2:
; CHECK-NEXT: [[TMP4]] = phi i1 [ false, [[L2]] ], [ true, [[B2]] ]
; CHECK-NEXT: [[TMP7]] = phi i1 [ false, [[L2]] ], [ true, [[B2]] ]
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow4:
; CHECK-NEXT: [[TMP5]] = phi i1 [ false, [[FLOW5]] ], [ true, [[FLOW3]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP3]], [[FLOW5]] ], [ true, [[FLOW3]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_EXIT_GUARD:%.*]], label [[H1]]
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP5]], [[FLOW5]] ], [ [[TMP0]], [[FLOW3]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP6]], [[FLOW5]] ], [ true, [[FLOW3]] ]
; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_EXIT_GUARD:%.*]], label [[H1]]
; CHECK: loop.exit.guard1:
; CHECK-NEXT: br i1 [[TMP1]], label [[L1]], label [[FLOW5]]
; CHECK-NEXT: br i1 [[TMP3]], label [[L1]], label [[FLOW5]]
;
entry:
br label %H1
Expand Down Expand Up @@ -112,37 +115,39 @@ define void @incorrect-backedge(i1 %PredH2, i1 %PredH3, i1 %PredL2, i1 %PredL13,
; CHECK: L2:
; CHECK-NEXT: br i1 [[PREDL2_INV]], label [[L13:%.*]], label [[FLOW3:%.*]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[FLOW3]] ], [ true, [[H3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP6:%.*]], [[FLOW3]] ], [ true, [[H3]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW3]] ], [ true, [[H3]] ]
; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_EXIT_GUARD2:%.*]], label [[H3]]
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW3]] ], [ true, [[H3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP7]], [[FLOW3]] ], [ false, [[H3]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW3]] ], [ true, [[H3]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW3]] ], [ true, [[H3]] ]
; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT_GUARD2:%.*]], label [[H3]]
; CHECK: L13:
; CHECK-NEXT: br label [[FLOW3]]
; CHECK: Flow5:
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP8:%.*]], [[LOOP_EXIT_GUARD1:%.*]] ], [ true, [[LOOP_EXIT_GUARD:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[LOOP_EXIT_GUARD1]] ], [ true, [[LOOP_EXIT_GUARD]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[L1:%.*]], label [[FLOW6:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP10:%.*]], [[LOOP_EXIT_GUARD1:%.*]] ], [ true, [[LOOP_EXIT_GUARD:%.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[LOOP_EXIT_GUARD1]] ], [ true, [[LOOP_EXIT_GUARD]] ]
; CHECK-NEXT: br i1 [[TMP5]], label [[L1:%.*]], label [[FLOW6:%.*]]
; CHECK: L1:
; CHECK-NEXT: br label [[FLOW6]]
; CHECK: Flow6:
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[PREDL1:%.*]], [[L1]] ], [ [[TMP3]], [[FLOW5:%.*]] ]
; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[H1]]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ [[PREDL1:%.*]], [[L1]] ], [ [[TMP4]], [[FLOW5:%.*]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[EXIT:%.*]], label [[H1]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: loop.exit.guard:
; CHECK-NEXT: br i1 [[TMP11:%.*]], label [[LOOP_EXIT_GUARD1]], label [[FLOW5]]
; CHECK-NEXT: br i1 [[DOTINV:%.*]], label [[LOOP_EXIT_GUARD1]], label [[FLOW5]]
; CHECK: loop.exit.guard1:
; CHECK-NEXT: br label [[FLOW5]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP6]] = phi i1 [ true, [[L13]] ], [ false, [[L2]] ]
; CHECK-NEXT: [[TMP7]] = phi i1 [ [[PREDL13_INV]], [[L13]] ], [ true, [[L2]] ]
; CHECK-NEXT: [[TMP7]] = phi i1 [ false, [[L13]] ], [ undef, [[L2]] ]
; CHECK-NEXT: [[TMP8]] = phi i1 [ true, [[L13]] ], [ false, [[L2]] ]
; CHECK-NEXT: [[TMP9]] = phi i1 [ [[PREDL13_INV]], [[L13]] ], [ true, [[L2]] ]
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow4:
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP0]], [[LOOP_EXIT_GUARD2]] ], [ false, [[H2]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[LOOP_EXIT_GUARD2]] ], [ true, [[H2]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP1]], [[LOOP_EXIT_GUARD2]] ], [ true, [[H2]] ]
; CHECK-NEXT: [[TMP11]] = xor i1 [[TMP9]], true
; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP_EXIT_GUARD]], label [[H2]]
; CHECK-NEXT: [[TMP10]] = phi i1 [ [[TMP0]], [[LOOP_EXIT_GUARD2]] ], [ false, [[H2]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ [[TMP1]], [[LOOP_EXIT_GUARD2]] ], [ true, [[H2]] ]
; CHECK-NEXT: [[TMP12:%.*]] = phi i1 [ [[TMP2]], [[LOOP_EXIT_GUARD2]] ], [ true, [[H2]] ]
; CHECK-NEXT: [[DOTINV]] = xor i1 [[TMP11]], true
; CHECK-NEXT: br i1 [[TMP12]], label [[LOOP_EXIT_GUARD]], label [[H2]]
; CHECK: loop.exit.guard2:
; CHECK-NEXT: br label [[FLOW4]]
;
Expand Down