Skip to content

Commit

Permalink
[MBP] Move a latch block with conditional exit and multi predecessors…
Browse files Browse the repository at this point in the history
… to top of loop

Current findBestLoopTop can find and move one kind of block to top, a latch block has one successor. Another common case is:

    * a latch block
    * it has two successors, one is loop header, another is exit
    * it has more than one predecessors

If it is below one of its predecessors P, only P can fall through to it, all other predecessors need a jump to it, and another conditional jump to loop header. If it is moved before loop header, all its predecessors jump to it, then fall through to loop header. So all its predecessors except P can reduce one taken branch.

Differential Revision: https://reviews.llvm.org/D43256

llvm-svn: 363471
  • Loading branch information
weiguozhi committed Jun 14, 2019
1 parent db88fc5 commit d2210af
Show file tree
Hide file tree
Showing 70 changed files with 1,357 additions and 816 deletions.
283 changes: 233 additions & 50 deletions llvm/lib/CodeGen/MachineBlockPlacement.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) {
; CHECK: mov w22, #2
; CHECK-NOT: mov w22, #4
; CHECK-NOT: cmn w22, #4
; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]]
; CHECK: [[LOOP2:LBB[0-9]+_[0-9]+]]: ; %for.cond
; CHECK-NOT: b.ne [[LOOP2]]
; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}}
; CHECK: bl _foo
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/neg-imm.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -disable-block-placement -o - %s | FileCheck %s
; LSR used to pick a sub-optimal solution due to the target responding
; conservatively to isLegalAddImmediate for negative values.

Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s

; CHECK-LABEL: test:
; CHECK: LBB0_7:
; CHECK: b.hi
; CHECK-NEXT: b
; CHECK-LABEL: %cond.false12.i
; CHECK: b.gt
; CHECK-NEXT: LBB0_8:
; CHECK-NEXT: mov x8, x9
; CHECK-NEXT: LBB0_9:
Expand Down
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,11 @@ bb.end: ; preds = %bb.then, %bb
; Make sure scc liveness is updated if sor_b64 is removed
; ALL-LABEL: {{^}}scc_liveness:

; GCN: %bb10
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
; GCN: s_andn2_b64
; GCN-NEXT: s_cbranch_execz

; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
; GCN: s_andn2_b64 exec, exec,
; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
Expand All @@ -239,10 +244,6 @@ bb.end: ; preds = %bb.then, %bb

; GCN-NOT: s_or_b64 exec, exec

; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
; GCN: s_andn2_b64
; GCN-NEXT: s_cbranch_execnz

; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
; GCN: buffer_store_dword
; GCN: buffer_store_dword
Expand Down
41 changes: 21 additions & 20 deletions llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,39 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
; CHECK-NEXT: BB0_1: ; %loop
; CHECK-NEXT: s_branch BB0_3
; CHECK-NEXT: BB0_1: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: ; implicit-def: $vgpr1
; CHECK-NEXT: BB0_2: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
; CHECK-NEXT: s_cbranch_execz BB0_7
; CHECK-NEXT: BB0_3: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
; CHECK-NEXT: s_and_b64 vcc, exec, vcc
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec
; CHECK-NEXT: s_cbranch_vccz BB0_5
; CHECK-NEXT: ; %bb.2: ; %endif1
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: s_cbranch_vccz BB0_1
; CHECK-NEXT: ; %bb.4: ; %endif1
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_mov_b64 s[6:7], -1
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
; CHECK-NEXT: ; mask branch BB0_4
; CHECK-NEXT: BB0_3: ; %endif2
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: ; mask branch BB0_6
; CHECK-NEXT: BB0_5: ; %endif2
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
; CHECK-NEXT: BB0_4: ; %Flow1
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: BB0_6: ; %Flow1
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; CHECK-NEXT: s_branch BB0_6
; CHECK-NEXT: BB0_5: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: ; implicit-def: $vgpr1
; CHECK-NEXT: BB0_6: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
; CHECK-NEXT: s_cbranch_execnz BB0_1
; CHECK-NEXT: ; %bb.7: ; %Flow2
; CHECK-NEXT: s_branch BB0_2
; CHECK-NEXT: BB0_7: ; %Flow2
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; this is the divergent branch with the condition not marked as divergent
Expand Down
21 changes: 11 additions & 10 deletions llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s

; CHECK-LABEL: %bb11
; CHECK-LABEL: %bb22

; Load from %arg in a Loop body has alias store
; Load from %arg has alias store in Loop

; CHECK: flat_load_dword

; CHECK-LABEL: %bb20
; CHECK: flat_store_dword
; #####################################################################

; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]

; CHECK: s_load_dword

; #####################################################################

; CHECK-LABEL: %bb22
; CHECK-LABEL: %bb11

; Load from %arg has alias store in Loop
; Load from %arg in a Loop body has alias store

; CHECK: flat_load_dword

; #####################################################################

; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
; CHECK-LABEL: %bb20

; CHECK: s_load_dword
; CHECK: flat_store_dword

define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
bb:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/hoist-cond.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck %s

; Check that invariant compare is hoisted out of the loop.
; At the same time condition shall not be serialized into a VGPR and deserialized later
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@

; SI-LABEL: {{^}}i1_copy_from_loop:
;
; SI: ; %Flow
; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec
; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]

; SI: ; %for.body
; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
; SI-DAG: s_andn2_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec
; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]

; SI: ; %Flow1
; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec

; SI: ; %Flow
; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]

; SI: ; %for.end
; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]]

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
Original file line number Diff line number Diff line change
Expand Up @@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
; GCN-LABEL: {{^}}broken_phi_bb:
; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8

; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]]

; GCN: {{^BB[0-9]+_[0-9]+}}:
; GCN: s_mov_b64 exec,

; GCN: [[BB2]]:
; GCN: [[BB2:BB[0-9]+_[0-9]+]]:
; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
; GCN: buffer_load_dword

Expand All @@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
; IDXMODE: s_set_gpr_idx_off

; GCN: s_cbranch_execnz [[REGLOOP]]

; GCN: {{^; %bb.[0-9]}}:
; GCN: s_mov_b64 exec,
; GCN: s_branch [[BB2]]

define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
bb:
br label %bb2
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/loop_break.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s

; Uses llvm.amdgcn.break

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ loopexit:

; GCN-LABEL: {{^}}break_cond_is_arg:
; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]]
; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]],
; GCN: s_andn2_b64 exec, exec, [[REG3]]
; GCN: s_or_b64 [[REG3]], [[REG2]],

define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/madmk.ll
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,9 @@ define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias
}

; SI-LABEL: {{^}}kill_madmk_verifier_error:
; SI: s_or_b64
; SI: s_xor_b64
; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
; SI: s_or_b64
define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
bb:
br label %bb2
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/multilevel-break.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,29 @@
; GCN: ; %main_body
; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}}

; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2
; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]]
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]]
; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]]
; GCN: s_andn2_b64 exec, exec, [[TMP1]]
; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]]

; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}}
; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}}

; GCN: ; %Flow
; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]]
; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]]
; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]]
; GCN: s_andn2_b64 exec, exec, [[TMP0]]
; GCN: s_cbranch_execz [[FLOW2]]

; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}}
; GCN: s_or_b64 [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec
; GCN: s_or_b64 [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
; GCN: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], exec
; GCN: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], exec
; GCN: s_and_saveexec_b64 [[SAVE_EXEC]], vcc

; FIXME: duplicate comparison
; GCN: ; %ENDIF
Expand All @@ -43,23 +59,7 @@
; GCN-DAG: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]]
; GCN-DAG: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]]

; GCN: ; %Flow
; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]]
; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]]
; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]]
; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]]
; GCN: s_andn2_b64 exec, exec, [[TMP0]]
; GCN: s_cbranch_execnz [[INNER_LOOP]]

; GCN: ; %Flow2
; GCN: s_or_b64 exec, exec, [[TMP0]]
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]]
; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]]
; GCN: s_andn2_b64 exec, exec, [[TMP1]]
; GCN: s_cbranch_execnz [[OUTER_LOOP]]

; GCN: ; %IF
; GCN: [[IF_BLOCK]]: ; %IF
; GCN-NEXT: s_endpgm
define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
main_body:
Expand Down Expand Up @@ -92,12 +92,18 @@ ENDIF: ; preds = %LOOP
; GCN-LABEL: {{^}}multi_if_break_loop:
; GCN: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}}

; GCN: ; %Flow4
; GCN: s_and_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]]
; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]]
; GCN: s_andn2_b64 exec, exec, [[LEFT]]
; GCN-NEXT: s_cbranch_execz

; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}}
; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
; GCN: s_mov_b64 [[OLD_LEFT]], [[LEFT]]

; GCN: ; %LeafBlock1
; GCN: s_mov_b64
; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN: s_mov_b64 [[BREAK]], -1{{$}}

; GCN: ; %case1
; GCN: buffer_load_dword [[LOAD2:v[0-9]+]],
Expand All @@ -118,12 +124,6 @@ ENDIF: ; preds = %LOOP
; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]]

; GCN: ; %Flow4
; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]]
; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]]
; GCN: s_andn2_b64 exec, exec, [[LEFT]]
; GCN-NEXT: s_cbranch_execnz

define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
; GCN-LABEL: {{^}}negated_cond:
; GCN: BB0_1:
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
; GCN: BB0_2:
; GCN: BB0_3:
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_cmp
; GCN: s_andn2_b64 vcc, exec, [[CC]]
; GCN: s_cbranch_vccnz BB0_4
; GCN: s_cbranch_vccnz BB0_2
define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
bb:
br label %bb1
Expand Down Expand Up @@ -36,11 +36,11 @@ bb4:

; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
; GCN: BB1_1:
; GCN: %bb4
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_cmp
; GCN: s_andn2_b64 vcc, exec, [[CC]]
; GCN: s_cbranch_vccz BB1_3
; GCN: s_cbranch_vccnz BB1_1
define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
bb:
br label %bb2
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}loop_land_info_assert:
; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]]

; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond
; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]]

; SI: [[CONVEX_EXIT:BB[0-9_]+]]
; SI: s_mov_b64 vcc,
; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
; SI: s_cbranch_vccnz [[INFLOOP]]

; SI: s_cbranch_vccnz [[WHILELOOP]]

; SI: ; %if.else
; SI: buffer_store_dword

; SI: [[INFLOOP]]:
; SI: s_cbranch_vccnz [[CONVEX_EXIT]]

; SI: ; %for.cond.preheader
; SI: [[FOR_COND_PH]]: ; %for.cond.preheader
; SI: s_cbranch_vccz [[ENDPGM]]

; SI: [[ENDPGM]]:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/valu-i1.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement < %s | FileCheck -check-prefix=SI %s

declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

Expand Down
Loading

0 comments on commit d2210af

Please sign in to comment.