Skip to content

Commit

Permalink
[AMDGPU] Disable 'Skip Uniform Regions' optimization by default for A…
Browse files Browse the repository at this point in the history
…MDGPU.

Reviewers: sameerds, dstuttard

Differential Revision: https://reviews.llvm.org/D77228
  • Loading branch information
Konstantin Pyzhov authored and Konstantin Pyzhov committed Apr 6, 2020
1 parent ec69bac commit e1730cf
Show file tree
Hide file tree
Showing 32 changed files with 927 additions and 869 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -866,7 +866,7 @@ bool GCNPassConfig::addPreISel() {
if (EnableStructurizerWorkarounds) {
addPass(createUnifyLoopExitsPass());
}
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
}
addPass(createSinkingPass());
addPass(createAMDGPUAnnotateUniformValues());
Expand Down
Expand Up @@ -136,9 +136,10 @@ define void @constrained_if_register_class() {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
; CHECK-NEXT: s_cselect_b32 s4, 1, 0
; CHECK-NEXT: s_xor_b32 s4, s4, 1
; CHECK-NEXT: s_and_b32 s4, s4, 1
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc1 BB4_6
; CHECK-NEXT: s_cbranch_scc0 BB4_6
; CHECK-NEXT: ; %bb.1: ; %bb2
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4
Expand Down
87 changes: 55 additions & 32 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
Expand Up @@ -7,38 +7,45 @@
define amdgpu_kernel void @localize_constants(i1 %cond) {
; GFX9-LABEL: localize_constants:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_xor_b32 s1, s1, 1
; GFX9-NEXT: s_and_b32 s1, s1, 1
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
; GFX9-NEXT: s_cbranch_scc0 BB0_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
; GFX9-NEXT: ; %bb.1: ; %bb1
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_endpgm
; GFX9-NEXT: BB0_2: ; %bb1
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
; GFX9-NEXT: BB0_2: ; %Flow
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc0 BB0_4
; GFX9-NEXT: ; %bb.3: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: BB0_4: ; %bb2
; GFX9-NEXT: s_endpgm
entry:
br i1 %cond, label %bb0, label %bb1
Expand Down Expand Up @@ -75,31 +82,46 @@ bb2:
define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-LABEL: localize_globals:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_xor_b32 s1, s1, 1
; GFX9-NEXT: s_and_b32 s1, s1, 1
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
; GFX9-NEXT: s_cbranch_scc0 BB1_2
; GFX9-NEXT: ; %bb.1: ; %bb1
; GFX9-NEXT: s_getpc_b64 s[2:3]
; GFX9-NEXT: s_add_u32 s2, s2, gv2@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s3, s3, gv2@gotpcrel32@hi+4
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, gv3@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, gv3@gotpcrel32@hi+4
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v2, 1
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: BB1_2: ; %Flow
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc0 BB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: s_cbranch_scc0 BB1_4
; GFX9-NEXT: ; %bb.3: ; %bb0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+4
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-NEXT: s_getpc_b64 s[2:3]
; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+4
; GFX9-NEXT: s_branch BB1_3
; GFX9-NEXT: BB1_2: ; %bb1
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+4
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_getpc_b64 s[2:3]
; GFX9-NEXT: s_add_u32 s2, s2, gv3@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s3, s3, gv3@gotpcrel32@hi+4
; GFX9-NEXT: BB1_3: ; %bb2
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v3, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -108,6 +130,7 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: global_store_dword v[0:1], v3, off
; GFX9-NEXT: BB1_4: ; %bb2
; GFX9-NEXT: s_endpgm
entry:
br i1 %cond, label %bb0, label %bb1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.ll
Expand Up @@ -7,11 +7,11 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) #0

define amdgpu_kernel void @long_branch_dbg_value(float addrspace(1)* nocapture %arg, float %arg1) #1 !dbg !5 {
; GCN-LABEL: long_branch_dbg_value:
; GCN: BB0_4: ; %bb
; GCN: BB0_5: ; %bb
; GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value]
; GCN-NEXT: .loc 1 0 42 is_stmt 0 ; /tmp/test_debug_value.cl:0:42
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], BB0_3-(BB0_4+4)
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], BB0_4-(BB0_5+4)
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0
; GCN-NEXT: s_setpc_b64
bb:
Expand Down
53 changes: 19 additions & 34 deletions llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
Expand Up @@ -224,32 +224,25 @@ bb3:

; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch:
; GCN: s_cmp_eq_u32
; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_scc{{[0-1]}} [[BB2:BB[0-9]+_[0-9]+]]

; GCN-NEXT: [[LONG_JUMP0:BB[0-9]+_[0-9]+]]: ; %bb0
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4)
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}}
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}

; GCN-NEXT: [[BB2]]: ; %bb2
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
; GCN: buffer_store_dword [[BB2_K]]

; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB4:BB[0-9]_[0-9]+]]-([[LONG_JUMP1]]+4)
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}}
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}}

; GCN: [[BB3]]: ; %bb3
; GCN: [[BB2]]: ; %bb3
; GCN: v_nop_e64
; GCN: v_nop_e64
; GCN: v_nop_e64
; GCN: v_nop_e64
; GCN: ;;#ASMEND

; GCN-NEXT: [[BB4]]: ; %bb4
; GCN: [[BB3]]:
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
; GCN: buffer_store_dword [[BB2_K]]

; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63
; GCN: buffer_store_dword [[BB4_K]]
; GCN-NEXT: s_endpgm
Expand Down Expand Up @@ -317,23 +310,15 @@ loop:
; GCN-LABEL: {{^}}expand_requires_expand:
; GCN-NEXT: ; %bb.0: ; %bb0
; GCN: s_load_dword
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 0{{$}}
; GCN-NEXT: s_cbranch_scc0 [[BB1:BB[0-9]+_[0-9]+]]

; GCN-NEXT: [[LONGBB0:BB[0-9]+_[0-9]+]]: ; %bb0
; GCN: {{s|v}}_cmp_lt_i32
; GCN: s_cbranch

; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB0]]+4)
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}}
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}

; GCN-NEXT: [[BB1]]: ; %bb1
; GCN-NEXT: s_load_dword
; GCN: s_load_dword
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_eq_u32 s{{[0-9]+}}, 3{{$}}
; GCN-NEXT: s_cbranch_scc0 [[BB2:BB[0-9]_[0-9]+]]
; GCN-NEXT: v_cmp_{{eq|ne}}_u32_e64
; GCN: s_cbranch_vccz [[BB2:BB[0-9]_[0-9]+]]

; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]: ; %bb1
; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]:
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4)
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}}
Expand Down Expand Up @@ -451,7 +436,7 @@ endif:
; GCN: v_nop_e64
; GCN: v_nop_e64
; GCN: ;;#ASMEND
; GCN: s_cbranch_vccz [[RET:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_{{vccz|vccnz}} [[RET:BB[0-9]+_[0-9]+]]

; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
Expand Down Expand Up @@ -491,22 +476,22 @@ ret:

; GCN-LABEL: {{^}}long_branch_hang:
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
; GCN: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_scc{{[0-1]}} [[LONG_BR_0:BB[0-9]+_[0-9]+]]
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
; GCN-NEXT: s_addc_u32
; GCN-NEXT: s_setpc_b64

; GCN-NEXT: [[LONG_BR_0]]:
; GCN-DAG: v_cmp_lt_i32
; GCN-DAG: v_cmp_gt_i32
; GCN: s_cbranch_vccnz

; GCN: s_setpc_b64
; GCN: s_setpc_b64

; GCN: [[LONG_BR_DEST0]]

; GCN: s_cbranch_vccnz
; GCN-DAG: v_cmp_lt_i32
; GCN-DAG: v_cmp_ge_i32

; GCN: s_cbranch_vccz
; GCN: s_setpc_b64

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
Expand Up @@ -8,8 +8,8 @@
;
; CHECK-LABEL: {{^}}main:
; CHECK: ; %LOOP49
; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
; CHECK: s_cbranch_scc1
; CHECK: s_cmp_{{lg|eq}}_u32 s{{[0-9]+}}, 0
; CHECK: s_cbranch_scc{{[0-1]}}
; CHECK: ; %ENDIF53
define amdgpu_vs float @main(i32 %in) {
main_body:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
Expand Up @@ -102,7 +102,7 @@ for.body:
; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80
; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 4

; GCN: s_cbranch_vccnz [[LOOPBB]]
; GCN: s_cbranch_{{vccz|vccnz}} [[LOOPBB]]
; GCN-NEXT: ; %bb.2
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n) nounwind {
Expand Down
37 changes: 16 additions & 21 deletions llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
Expand Up @@ -27,13 +27,12 @@

; GCN-LABEL: {{^}}sink_ubfe_i32:
; GCN-NOT: lshr
; GCN: s_cbranch_scc1
; GCN: s_cbranch_scc{{[0-1]}}

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
; GCN: BB0_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008

; GCN: BB0_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008

; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
Expand Down Expand Up @@ -122,16 +121,15 @@ ret:
; GCN-NOT: lshr
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
; GCN: s_cbranch_scc1
; GCN: s_cbranch_scc{{[0-1]}}

; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff

; GCN: BB2_2:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f

; GCN: BB2_3:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff

; GCN: buffer_store_short
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
Expand Down Expand Up @@ -177,14 +175,13 @@ ret:

; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:

; GCN: s_cbranch_scc{{[0-1]}} BB3_2
; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
; GCN: s_cbranch_scc1 BB3_2
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]

; GCN: BB3_2:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]

; GCN: BB3_3:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]

; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
Expand Down Expand Up @@ -226,14 +223,13 @@ ret:

; GCN-LABEL: {{^}}sink_ubfe_i64_low32:

; GCN: s_cbranch_scc1 BB4_2
; GCN: s_cbranch_scc{{[0-1]}} BB4_2

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f

; GCN: BB4_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f

; GCN: BB4_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f

; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
Expand Down Expand Up @@ -274,13 +270,12 @@ ret:
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
; GCN: s_cbranch_scc1 BB5_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003

; GCN: BB5_2:
; GCN: s_cbranch_scc{{[0-1]}} BB5_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003

; GCN: BB5_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003

; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
Expand Down

0 comments on commit e1730cf

Please sign in to comment.