36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ bb3:
; GCN-NEXT: s_cbranch_scc0 [[LONGBB:.LBB[0-9]+_[0-9]+]]

; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]

; GCN-NEXT: [[LONGBB]]:
; GCN-NEXT: ;;#ASMSTART
Expand Down Expand Up @@ -106,11 +106,11 @@ bb3:
; GCN: s_cbranch_vccz [[LONGBB:.LBB[0-9]+_[0-9]+]]

; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]

; GCN-NEXT: [[LONGBB]]:
; GCN: v_nop_e64
Expand Down Expand Up @@ -195,11 +195,11 @@ bb3:
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb2
; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1

; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOPBB]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOPBB]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]

; GCN-NEXT: [[ENDBB]]:
; GCN-NEXT: s_endpgm
Expand Down Expand Up @@ -230,11 +230,11 @@ bb3:
; GCN: s_cbranch_scc{{[0-1]}} [[BB1:.LBB[0-9]+_[0-9]+]]

; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB4:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB4]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC0_LO]]:[[PC0_HI]]]

; GCN: [[BB1]]:
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
Expand Down Expand Up @@ -290,11 +290,11 @@ bb4:
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1

; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOP]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOP]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
entry:
Expand Down Expand Up @@ -326,11 +326,11 @@ loop:
; GCN: s_cbranch_vccz [[BB2:.LBB[0-9]_[0-9]+]]

; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}:
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC1_LO]]:[[PC1_HI]]]

; GCN-NEXT: [[BB2]]: ; %bb2
; GCN-NEXT: ;;#ASMSTART
Expand Down Expand Up @@ -385,11 +385,11 @@ bb3:
; GCN-NEXT: s_cbranch_execnz [[IF:.LBB[0-9]+_[0-9]+]]

; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %entry
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB2:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB2:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]

; GCN-NEXT: [[IF]]: ; %if
; GCN: s_cmp_lg_u32
Expand Down Expand Up @@ -448,11 +448,11 @@ endif:

; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOP_BODY]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOP_BODY]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]

; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
; GCN-NEXT: s_endpgm
Expand Down Expand Up @@ -488,11 +488,11 @@ ret:
; GCN: s_cbranch_scc{{[0-1]}} [[LONG_BR_0:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:

; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_BR_DEST0:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LONG_BR_DEST0]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[LONG_BR_0]]:

; GCN: [[LONG_BR_DEST0]]:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/build_vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; R600-NOT: MOV
; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
; GFX678: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
; GFX678: buffer_store_dwordx2 v[[[X]]:[[Y]]]
; GFX10: global_store_dwordx2 v2, v[0:1], s[0:1]
define amdgpu_kernel void @build_vector2 (<2 x i32> addrspace(1)* %out) {
entry:
Expand All @@ -27,7 +27,7 @@ entry:
; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
; SI-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
; GFX678: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
; GFX678: buffer_store_dwordx4 v[[[X]]:[[W]]]
; GFX10: global_store_dwordx4 v4, v[0:3], s[0:1]
define amdgpu_kernel void @build_vector4 (<4 x i32> addrspace(1)* %out) {
entry:
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/AMDGPU/call-argument-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ declare hidden void @external_void_func_v16i8(<16 x i8>) #0

; MESA-DAG: s_mov_b64 s[0:1], s[36:37]

; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+12
; GCN-DAG: v_mov_b32_e32 v0, 1{{$}}
; MESA-DAG: s_mov_b64 s[2:3], s[38:39]

; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
call void @external_void_func_i1(i1 true)
Expand All @@ -84,11 +84,11 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]]
; MESA-DAG: s_mov_b32 s32, 0{{$}}

; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+12
; GCN-NEXT: v_bfe_i32 v0, [[VAR]], 0, 1
; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
%var = load volatile i1, i1 addrspace(1)* undef
Expand All @@ -105,11 +105,11 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; MESA: buffer_load_ubyte [[VAL:v[0-9]+]]
; MESA-DAG: s_mov_b32 s32, 0{{$}}

; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+12
; GCN-NEXT: v_and_b32_e32 v0, 1, [[VAL]]
; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
%var = load volatile i1, i1 addrspace(1)* undef
Expand All @@ -119,14 +119,14 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {

; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm:

; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+12
; GCN-DAG: v_mov_b32_e32 v0, 0x7b

; GCN-DAG: s_mov_b32 s32, 0{{$}}

; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
call void @external_void_func_i8(i8 123)
Expand All @@ -137,14 +137,14 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i8_signext:

; GCN-DAG: buffer_load_sbyte [[VAL:v[0-9]+]]
; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+12

; GCN-DAG: s_mov_b32 s32, 0

; GCN-NOT: s_waitcnt
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
%var = load volatile i8, i8 addrspace(1)* undef
Expand All @@ -155,14 +155,14 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i8_zeroext:

; GCN-DAG: buffer_load_ubyte [[VAL:v[0-9]+]]
; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+12

; GCN-DAG: s_mov_b32 s32, 0

; GCN-NOT: s_waitcnt
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
%var = load volatile i8, i8 addrspace(1)* undef
Expand All @@ -184,14 +184,14 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i16_signext:

; GCN-DAG: buffer_load_sshort [[VAL:v[0-9]+]]
; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+12

; GCN-DAG: s_mov_b32 s32, 0

; GCN-NOT: s_waitcnt
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
%var = load volatile i16, i16 addrspace(1)* undef
Expand All @@ -201,14 +201,14 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {

; GCN-LABEL: {{^}}test_call_external_void_func_i16_zeroext:

; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+12

; GCN-DAG: s_mov_b32 s32, 0

; GCN-NOT: s_waitcnt
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
%var = load volatile i16, i16 addrspace(1)* undef
Expand All @@ -218,13 +218,13 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {

; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm:

; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+12
; GCN-DAG: v_mov_b32_e32 v0, 42
; GCN-DAG: s_mov_b32 s32, 0

; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
call void @external_void_func_i32(i32 42)
Expand All @@ -234,10 +234,10 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i64_imm:
; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}}
; GCN-DAG: v_mov_b32_e32 v1, 0{{$}}
; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i64@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i64@rel32@hi+12
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
call void @external_void_func_i64(i64 123)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]]
; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]]
define hidden void @use_queue_ptr_addrspacecast() #1 {
%asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
store volatile i32 0, i32* %asc
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32 addrspace(5
; on the leftover AssertZext's ValueType operand.

; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype:
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC_LO]], g1@gotpcrel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1@gotpcrel32@hi+12
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(half addrspace(1)* %
}

; GCN-LABEL: {{^}}v_clamp_add_src_v2f32:
; GCN: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}
; GCN: {{buffer|flat|global}}_load_dwordx2 v[[[A:[0-9]+]]:[[B:[0-9]+]]]
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[A]], 1.0 clamp{{$}}
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[B]], 1.0 clamp{{$}}
define amdgpu_kernel void @v_clamp_add_src_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %aptr) #0 {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; GCN: global_load_dwordx4 v[{{[0-9]*[02468]:[0-9]*[13579]}}], v{{[0-9]+}}, s[{{[0-9:]+}}]
; GCN-NEXT: s_waitcnt
; GCN-NEXT: v_mov_b32_e32 v{{[0-9]*}}[[LO:[02468]]], v{{[0-9]+}}
; GCN-NEXT: global_store_dwordx2 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+\]}}, s[{{[0-9:]+}}]
; GCN-NEXT: global_store_dwordx2 v{{[0-9]+}}, v[[[LO]]:{{[0-9]+\]}}, s[{{[0-9:]+}}]

define amdgpu_kernel void @test_odd_int4(<4 x i32> addrspace(1)* %arg, <2 x i32> addrspace(1)* %arg1) {
bb:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/combine-ftrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ define amdgpu_kernel void @combine_ftrunc_frint_f32(float addrspace(1)* %p) {

; GCN-LABEL: {{^}}combine_ftrunc_frint_v2f32:
; GCN: s_load_dwordx2
; GCN: s_load_dwordx2 s{{\[}}[[SRC1:[0-9]+]]:[[SRC2:[0-9]+]]{{\]}}
; GCN: s_load_dwordx2 s[[[SRC1:[0-9]+]]:[[SRC2:[0-9]+]]]
; GCN-DAG: v_rndne_f32_e32 v[[RND1:[0-9]+]], s[[SRC1]]
; GCN-DAG: v_rndne_f32_e32 v[[RND2:[0-9]+]], s[[SRC2]]
; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RND1]]:[[RND2]]{{\]}}
; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v[[[RND1]]:[[RND2]]]
define amdgpu_kernel void @combine_ftrunc_frint_v2f32(<2 x float> addrspace(1)* %p) {
%v = load <2 x float>, <2 x float> addrspace(1)* %p, align 8
%round = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %v)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/commute-compares.ll
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ define amdgpu_kernel void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrsp

; GCN-LABEL: {{^}}commute_ule_64_i64:
; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
; GCN: v_cmp_gt_u64_e32 vcc, s[[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ define amdgpu_vs float @load_addr_no_fold(i32 addrspace(6)* inreg noalias %p0) #
; GCN-LABEL: {{^}}vgpr_arg_src:
; GCN: v_readfirstlane_b32 s[[READLANE:[0-9]+]], v0
; GCN: s_mov_b32 s[[ZERO:[0-9]+]]
; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[READLANE]]:[[ZERO]]{{\]}}
; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[[[READLANE]]:[[ZERO]]]
define amdgpu_vs float @vgpr_arg_src(<4 x i32> addrspace(6)* %arg) {
main_body:
%tmp9 = load <4 x i32>, <4 x i32> addrspace(6)* %arg
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN-NEXT: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
%vreg = load volatile i64, i64 addrspace(1)* undef
%ctpop = call i64 @llvm.ctpop.i64(i64 %vreg)
Expand All @@ -102,14 +102,14 @@ define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
; The neg1 appears after folding the not 0
; GCN-LABEL: {{^}}fold_mi_or_neg1:
; GCN: buffer_load_dwordx2
; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}}
; GCN: buffer_load_dwordx2 v[[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]]

; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
%vreg0 = load volatile i64, i64 addrspace(1)* undef
%vreg1 = load volatile i64, i64 addrspace(1)* undef
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@
; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, s{{[0-9]+}}

; Spill saved exec
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_mov_b64 s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]

; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0
; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1
; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill

; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
; GCN: s_and_b64 s[[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]], s[[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]], [[CMP0]]
; GCN: s_mov_b64 exec, s[[[ANDEXEC_LO]]:[[ANDEXEC_HI]]]

; GCN: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]

Expand All @@ -57,7 +57,7 @@
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1

; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}}
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]

; Restore val
; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
Expand Down Expand Up @@ -92,7 +92,7 @@ endif:

; Spill load
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_mov_b64 s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], exec

; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
Expand All @@ -103,8 +103,8 @@ endif:
; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill


; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
; GCN: s_and_b64 s[[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]], s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], [[CMP0]]
; GCN: s_mov_b64 exec, s[[[ANDEXEC_LO]]:[[ANDEXEC_HI]]]
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]


Expand All @@ -129,7 +129,7 @@ endif:
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1

; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}}
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload

; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]
Expand Down Expand Up @@ -168,9 +168,9 @@ end:
; GCN: s_mov_b32 [[ZERO:s[0-9]+]], 0
; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, [[ZERO]]

; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
; GCN: s_mov_b64 s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], exec
; GCN: s_and_b64 s[[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]], s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], [[CMP0]]
; GCN: s_xor_b64 s[[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]], s[[[ANDEXEC_LO]]:[[ANDEXEC_HI]]], s[[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]]

; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
Expand All @@ -195,14 +195,14 @@ end:
; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 1

; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}
; GCN: s_or_saveexec_b64 s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]], s[[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]]

; Regular spill value restored after exec modification
; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload
; Followed by spill
; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], 0 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill

; GCN: s_and_b64 s{{\[}}[[FLOW_AND_EXEC_LO:[0-9]+]]:[[FLOW_AND_EXEC_HI:[0-9]+]]{{\]}}, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]]{{\]}}
; GCN: s_and_b64 s[[[FLOW_AND_EXEC_LO:[0-9]+]]:[[FLOW_AND_EXEC_HI:[0-9]+]]], exec, s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]]]

; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_AND_EXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]]
Expand All @@ -212,7 +212,7 @@ end:
; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC]], s[[FLOW_AND_EXEC_HI]], 1
; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC]], off, s[0:3], 0 offset:[[FLOW_SAVEEXEC_OFFSET:[0-9]+]] ; 4-byte Folded Spill

; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_AND_EXEC_LO]]:[[FLOW_AND_EXEC_HI]]{{\]}}
; GCN: s_xor_b64 exec, exec, s[[[FLOW_AND_EXEC_LO]]:[[FLOW_AND_EXEC_HI]]]
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]


Expand All @@ -239,7 +239,7 @@ end:
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1

; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}}
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]

; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]]
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/ctpop64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, [8 x i32]
}

; FUNC-LABEL: {{^}}v_ctpop_i64:
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; GCN: {{buffer|flat}}_load_dwordx2 v[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
Expand All @@ -44,13 +44,13 @@ define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrs
}

; FUNC-LABEL: {{^}}v_ctpop_i64_user:
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; GCN: {{buffer|flat}}_load_dwordx2 v[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -123,13 +123,13 @@ define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <
}

; FUNC-LABEL: {{^}}ctpop_i64_in_br:
; SI-DAG: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
; VI-DAG: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
; GCN-DAG: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
; SI-DAG: s_load_dwordx2 s[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0xd
; VI-DAG: s_load_dwordx2 s[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x34
; GCN-DAG: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]]
; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[ZERO]]
; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]]
; GCN: s_endpgm
define amdgpu_kernel void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
Expand Down Expand Up @@ -179,8 +179,8 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val)
; FIXME: Should not have extra add

; FUNC-LABEL: {{^}}v_ctpop_i128:
; SI: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
; VI: flat_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}
; SI: buffer_load_dwordx4 v[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
; VI: flat_load_dwordx4 v[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], v{{\[[0-9]+:[0-9]+\]}}

; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0
; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ define amdgpu_kernel void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)*
}

; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align4:
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
; CI-DAG: ds_read2_b32 v[[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read2_b32 v[[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]], v{{[0-9]+}} offset0:2 offset1:3{{$}}
; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_X]], v[[REG_Z]]
; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_Y]], v[[REG_W]]
; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD0]], v[[ADD1]]
Expand All @@ -62,7 +62,7 @@ define amdgpu_kernel void @simple_read2_v4f32_superreg_align4(float addrspace(1)
}

; CI-LABEL: {{^}}simple_read2_v3f32_superreg_align4:
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read2_b32 v[[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read_b32 v[[REG_Z:[0-9]+]], v{{[0-9]+}} offset:8{{$}}
; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_X]], v[[REG_Z]]
; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[ADD0]], v[[REG_Y]]
Expand Down Expand Up @@ -149,9 +149,9 @@ define amdgpu_kernel void @simple_read2_v16f32_superreg(<16 x float> addrspace(1

; Do scalar loads into the super register we need.
; CI-LABEL: {{^}}simple_read2_v2f32_superreg_scalar_loads_align4:
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read2_b32 v[[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}}
; CI: buffer_store_dwordx2 v{{\[}}[[REG_ELT0]]:[[REG_ELT1]]{{\]}}
; CI: buffer_store_dwordx2 v[[[REG_ELT0]]:[[REG_ELT1]]]
; CI: s_endpgm
define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x float> addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Expand All @@ -171,10 +171,10 @@ define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x

; Do scalar loads into the super register we need.
; CI-LABEL: {{^}}simple_read2_v4f32_superreg_scalar_loads_align4:
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT2:[0-9]+]]:[[REG_ELT3:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
; CI-DAG: ds_read2_b32 v[[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read2_b32 v[[[REG_ELT2:[0-9]+]]:[[REG_ELT3:[0-9]+]]], v{{[0-9]+}} offset0:2 offset1:3{{$}}
; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}}
; CI: buffer_store_dwordx4 v{{\[}}[[REG_ELT0]]:[[REG_ELT3]]{{\]}}
; CI: buffer_store_dwordx4 v[[[REG_ELT0]]:[[REG_ELT3]]]
; CI: s_endpgm
define amdgpu_kernel void @simple_read2_v4f32_superreg_scalar_loads_align4(<4 x float> addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; CI: s_mov_b32 m0
; GFX9-NOT: m0

; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
; GCN: ds_read2st64_b32 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset1:1
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; CI: buffer_store_dword [[RESULT]]
Expand All @@ -31,7 +31,7 @@ define amdgpu_kernel void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0
; CI: s_mov_b32 m0
; GFX9-NOT: m0

; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; GCN: ds_read2st64_b32 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:1 offset1:2
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; CI: buffer_store_dword [[RESULT]]
Expand All @@ -54,7 +54,7 @@ define amdgpu_kernel void @simple_read2st64_f32_1_2(float addrspace(1)* %out, fl
; CI: s_mov_b32 m0
; GFX9-NOT: m0

; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
; GCN: ds_read2st64_b32 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:1 offset1:255
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; CI: buffer_store_dword [[RESULT]]
Expand Down Expand Up @@ -139,9 +139,9 @@ define amdgpu_kernel void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out)
; CI: s_mov_b32 m0
; GFX9-NOT: m0

; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
; GCN: ds_read2st64_b64 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset1:1
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]]
; CI: buffer_store_dwordx2 [[RESULT]]
; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
Expand All @@ -161,9 +161,9 @@ define amdgpu_kernel void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #
; CI: s_mov_b32 m0
; GFX9-NOT: m0

; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; GCN: ds_read2st64_b64 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:1 offset1:2
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]]

; CI: buffer_store_dwordx2 [[RESULT]]
; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
Expand Down Expand Up @@ -208,9 +208,9 @@ define amdgpu_kernel void @misaligned_read2st64_f64(double addrspace(1)* %out, d
; CI: s_mov_b32 m0
; GFX9-NOT: m0

; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
; GCN: ds_read2st64_b64 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:4 offset1:127
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]]

; CI: buffer_store_dwordx2 [[RESULT]]
; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
; heuristics. Should not need -stress-early-ifcvt

; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle64:
; GCN: buffer_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
; GCN: v_cmp_neq_f64_e32 vcc, 1.0, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
; GCN: v_add_f64 v{{\[}}[[ADD_LO:[0-9]+]]:[[ADD_HI:[0-9]+]]{{\]}}, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
; GCN: buffer_load_dwordx2 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GCN: v_cmp_neq_f64_e32 vcc, 1.0, v[[[VAL_LO]]:[[VAL_HI]]]
; GCN: v_add_f64 v[[[ADD_LO:[0-9]+]]:[[ADD_HI:[0-9]+]]], v[[[VAL_LO]]:[[VAL_HI]]], v[[[VAL_LO]]:[[VAL_HI]]]
; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_LO:[0-9]+]], v[[ADD_LO]], v[[VAL_LO]], vcc
; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_HI:[0-9]+]], v[[ADD_HI]], v[[VAL_HI]], vcc
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
entry:
%v = load double, double addrspace(1)* %in
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ entry:
; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00
; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200
; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4
; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]]
; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
; GCN: store_short v[{{[0-9:]+}}], v[[VRL]]
define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) {
Expand Down Expand Up @@ -220,7 +220,7 @@ entry:
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double8_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel
Expand All @@ -236,7 +236,7 @@ entry:
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double7_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel
Expand Down Expand Up @@ -280,7 +280,7 @@ entry:
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double15_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel
Expand All @@ -296,7 +296,7 @@ entry:
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
Expand Down Expand Up @@ -353,7 +353,7 @@ entry:
; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201
; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605
; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3
; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]]
; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]]
define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) {
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,13 @@ define amdgpu_kernel void @v_extractelement_v4f16_2(half addrspace(1)* %out, <4

; GCN-LABEL: {{^}}v_insertelement_v4f16_dynamic_vgpr:
; GCN-DAG: {{flat|global|buffer}}_load_dword [[IDX:v[0-9]+]],
; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]]

; GFX89: v_lshrrev_b64 v{{\[}}[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]{{\]}}, [[SCALED_IDX]], v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX89: v_lshrrev_b64 v[[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]], [[SCALED_IDX]], v[[[LO]]:[[HI]]]
; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[SHIFT_LO]]

; SI: v_lshr_b64 v{{\[}}[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]{{\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}, [[SCALED_IDX]]
; SI: v_lshr_b64 v[[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]], v[[[LO]]:[[HI]]], [[SCALED_IDX]]
; SI: buffer_store_short v[[SHIFT_LO]]
define amdgpu_kernel void @v_insertelement_v4f16_dynamic_vgpr(half addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ define amdgpu_kernel void @extract_vector_elt_v3i16(i16 addrspace(1)* %out, <3 x
; SI: buffer_store_short
; SI: buffer_store_short

; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]{{\]}}, s[0:1], 0x2c
; GFX89-DAG: s_load_dwordx2 s[[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]], s[0:1], 0x2c
; GFX89-DAG: v_mov_b32_e32 [[VLOAD0:v[0-9]+]], s[[LOAD0]]
; GFX89-DAG: buffer_store_short [[VLOAD0]], off
; GFX89-DAG: v_mov_b32_e32 [[VLOAD1:v[0-9]+]], s[[LOAD1]]
Expand All @@ -101,8 +101,8 @@ define amdgpu_kernel void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x
; SI: s_load_dwordx2 s
; SI: s_load_dwordx2 s

; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]{{\]}}, s[0:1], 0x24
; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]{{\]}}, s[0:1], 0x4c
; GFX89-DAG: s_load_dwordx2 s[[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]], s[0:1], 0x24
; GFX89-DAG: s_load_dwordx2 s[[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]], s[0:1], 0x4c
; GFX89-DAG: s_load_dword s{{[0-9]+}}, s[0:1], 0x54

; GCN-NOT: {{buffer|flat|global}}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v4i8(i8 addrspace(1)* %out
; VI: s_load_dwordx2 [[VEC8:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x0

; VI: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 3
; VI: s_lshr_b64 s{{\[}}[[EXTRACT_LO:[0-9]+]]:{{[0-9]+\]}}, [[VEC8]], [[SCALED_IDX]]
; VI: s_lshr_b64 s[[[EXTRACT_LO:[0-9]+]]:{{[0-9]+\]}}, [[VEC8]], [[SCALED_IDX]]
; VI: v_mov_b32_e32 [[V_EXTRACT:v[0-9]+]], s[[EXTRACT_LO]]
; VI: buffer_store_byte [[V_EXTRACT]]
define amdgpu_kernel void @dynamic_extract_vector_elt_v8i8(i8 addrspace(1)* %out, <8 x i8> addrspace(4)* %vec.ptr, i32 %idx) #0 {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fabs.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half
}

; GCN-LABEL: {{^}}s_fabs_v4f16:
; CI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2
; GFX89: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
; CI: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2
; GFX89: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x8

; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x7fff7fff
; GCN-DAG: s_and_b32 s{{[0-9]+}}, s[[LO]], [[MASK]]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float
}

; GCN-LABEL: {{^}}fabs_fn_fold:
; SI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c
; SI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[MUL_VAL]]
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[ABS_VALUE]]|, [[V_MUL_VI]]
Expand All @@ -83,8 +83,8 @@ define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, fl
}

; FUNC-LABEL: {{^}}fabs_fold:
; SI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c
; SI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[MUL_VAL]]
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[ABS_VALUE]]|, [[V_MUL_VI]]
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)*
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0.0)
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -294,7 +294,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double -0.0)
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -304,7 +304,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 1.0)
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -314,7 +314,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double -1.0)
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -324,7 +324,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %o
; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 16.0)
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -334,7 +334,7 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -344,7 +344,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(dou
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -354,7 +354,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -364,7 +364,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(dou
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -374,7 +374,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -384,7 +384,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)*
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -394,7 +394,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double add
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -404,7 +404,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double add
; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -414,7 +414,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspa
; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -424,7 +424,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspa
; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
store double %canonicalized, double addrspace(1)* %out
Expand All @@ -434,7 +434,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspa
; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
store double %canonicalized, double addrspace(1)* %out
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ entry:

; VI: v_cmp_nlt_f16_e32 vcc, v[[B_F16_1]], v[[A_F16_1]]
; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[R_I32_0]]:[[R_I32_1]]]
; GCN: s_endpgm
define amdgpu_kernel void @fcmp_v2f16_nlt(
<2 x i32> addrspace(1)* %r,
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ entry:

; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f16_sign_f64:
; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]]
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[MAG_EXT_LO:[0-9]+]]:[[MAG_EXT_HI:[0-9]+]]{{\]}}, v[[MAG_EXT]]
; GCN-DAG: v_cvt_f64_f32_e32 v[[[MAG_EXT_LO:[0-9]+]]:[[MAG_EXT_HI:[0-9]+]]], v[[MAG_EXT]]
; GCN: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_EXT_HI]], v[[SIGN_HI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[MAG_EXT_LO]]:[[OUT_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[MAG_EXT_LO]]:[[OUT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copysign_out_f64_mag_f16_sign_f64(
double addrspace(1)* %arg_out,
Expand Down Expand Up @@ -113,14 +113,14 @@ entry:
}

; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f64_sign_f16:
; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]{{\]}}
; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]]
; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[SIGN:[0-9]+]]
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
; SI: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_F32]]
; GFX89-DAG: v_lshlrev_b32_e32 v[[SIGN_SHIFT:[0-9]+]], 16, v[[SIGN]]
; GFX89: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_SHIFT]]
; GCN: buffer_store_dwordx2 v{{\[}}[[MAG_LO]]:[[OUT_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[MAG_LO]]:[[OUT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copysign_out_f64_mag_f64_sign_f16(
double addrspace(1)* %arg_out,
Expand Down Expand Up @@ -168,7 +168,7 @@ entry:

; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f64:
; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]]
; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_HI]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read

; Try to identify arg based on higher address.
; FUNC-LABEL: {{^}}test_copysign_f32:
; SI: s_load_dwordx2 s{{\[}}[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]{{\]}}, {{.*}} 0xb
; VI: s_load_dwordx2 s{{\[}}[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]{{\]}}, {{.*}} 0x2c
; SI: s_load_dwordx2 s[[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]], {{.*}} 0xb
; VI: s_load_dwordx2 s[[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]], {{.*}} 0x2c

; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], s[[SSIGN]]
; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], s[[SMAG]]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind r
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone

; FUNC-LABEL: {{^}}test_copysign_f64:
; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x1d
; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x4c
; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x74
; SI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
; SI-DAG: s_load_dwordx2 s[[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x1d
; VI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x4c
; VI-DAG: s_load_dwordx2 s[[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x74
; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2
; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[VMAG_LO]]:[[VRESULT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copysign_f64(double addrspace(1)* %out, [8 x i32], double %mag, [8 x i32], double %sign) nounwind {
%result = call double @llvm.copysign.f64(double %mag, double %sign)
Expand All @@ -24,15 +24,15 @@ define amdgpu_kernel void @test_copysign_f64(double addrspace(1)* %out, [8 x i32
}

; FUNC-LABEL: {{^}}test_copysign_f64_f32:
; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x4c
; SI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
; VI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x4c
; GCN-DAG: s_load_dword s[[SSIGN:[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}
; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VSIGN:[0-9]+]], s[[SSIGN]]
; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[VMAG_LO]]:[[VRESULT_HI]]]
define amdgpu_kernel void @test_copysign_f64_f32(double addrspace(1)* %out, [8 x i32], double %mag, float %sign) nounwind {
%c = fpext float %sign to double
%result = call double @llvm.copysign.f64(double %mag, double %c)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ define amdgpu_kernel void @div_fast_2_x_pat_f64(double addrspace(1)* %out) #1 {
; GCN-LABEL: {{^}}div_fast_k_x_pat_f64:
; GCN-DAG: v_mov_b32_e32 v[[K_LO:[0-9]+]], 0x9999999a
; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0x3fb99999
; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
; GCN: buffer_store_dwordx2 [[MUL]]
define amdgpu_kernel void @div_fast_k_x_pat_f64(double addrspace(1)* %out) #1 {
%x = load double, double addrspace(1)* undef
Expand All @@ -149,7 +149,7 @@ define amdgpu_kernel void @div_fast_k_x_pat_f64(double addrspace(1)* %out) #1 {
; GCN-LABEL: {{^}}div_fast_neg_k_x_pat_f64:
; GCN-DAG: v_mov_b32_e32 v[[K_LO:[0-9]+]], 0x9999999a
; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0xbfb99999
; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
; GCN: buffer_store_dwordx2 [[MUL]]
define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(double addrspace(1)* %out) #1 {
%x = load double, double addrspace(1)* undef
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg
}

; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp:
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
Expand Down Expand Up @@ -112,7 +112,7 @@ define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %load, !fpmath !0
Expand All @@ -121,7 +121,7 @@ define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
}

; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp:
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
Expand Down Expand Up @@ -157,7 +157,7 @@ define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %
}

; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp:
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
Expand Down Expand Up @@ -185,7 +185,7 @@ define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %
; GCN-FLUSH: v_rcp_f32_e64
; GCN-FLUSH: v_rcp_f32_e64
; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]]
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%neg = fneg <4 x float> %load
Expand All @@ -195,7 +195,7 @@ define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %
}

; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp:
; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
Expand Down Expand Up @@ -223,7 +223,7 @@ define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%neg = fneg <4 x float> %load
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ work:
; GCN: s_not_b64 exec, exec
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 4, i32 1)

; GCN: s_or_saveexec_b64 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, -1
; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
%tmp1191 = mul i32 %tmp1189, 4

; GCN: s_mov_b64 exec, s{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
%tmp1196 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp1191)

%tmp34 = icmp eq i32 %arg, 0
Expand Down Expand Up @@ -65,11 +65,11 @@ work:
; GCN: s_not_b64 exec, exec
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 4, i32 1)

; GCN: s_or_saveexec_b64 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, -1
; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
%tmp1191 = mul i32 %tmp1189, 4

; GCN: s_mov_b64 exec, s{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
%tmp1196 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp1191)

%tmp34 = icmp eq i32 %arg, 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/flat-address-space.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,GFX10 %s

; CHECK-LABEL: {{^}}store_flat_i32:
; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
; CHECK-DAG: s_load_dwordx2 s[[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
; CHECK: s_waitcnt lgkmcnt(0)
; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
; CHECK: flat_store_dword v[[[LO_VREG]]:[[HI_VREG]]], v[[DATA]]
define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
store volatile i32 %x, i32* %fptr, align 4
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -909,8 +909,8 @@ entry:
}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
%gep = getelementptr i64, i64* %out, i64 4
Expand All @@ -931,8 +931,8 @@ entry:
}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64* %out, i64 %index
Expand All @@ -952,8 +952,8 @@ entry:
}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
%val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
Expand All @@ -972,8 +972,8 @@ entry:
}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64* %out, i64 %index
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(
}

; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: s_load_dwordx2 s[[[A:[0-9]+]]:[[B:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}

; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]

Expand All @@ -56,7 +56,7 @@ define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(float addrspace(1)* %out,
; Nsz also needed
; FIXME: Should separate tests
; GCN-LABEL: {{^}}s_test_fmin_legacy_ule_f32_nnan_src:
; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN: s_load_dwordx2 s[[[A:[0-9]+]]:[[B:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}

; GCN-DAG: v_add_f32_e64 [[ADD_A:v[0-9]+]], s[[A]], 1.0
; GCN-DAG: v_add_f32_e64 [[ADD_B:v[0-9]+]], s[[B]], 2.0
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/fmul.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,15 @@ entry:
}

; GCN-LABEL: {{^}}fmul_v4f16:
; GFX9: buffer_load_dwordx2 v{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}
; GFX9: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; GFX9: buffer_load_dwordx2 v[[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]]
; GFX9: buffer_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]

; GFX9-DAG: v_pk_mul_f16 v[[MUL_LO:[0-9]+]], v[[A_LO]], v[[B_LO]]
; GFX9-DAG: v_pk_mul_f16 v[[MUL_HI:[0-9]+]], v[[A_HI]], v[[B_HI]]
; GFX9: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[MUL_HI]]{{\]}}
; GFX9: buffer_store_dwordx2 v[[[MUL_LO]]:[[MUL_HI]]]

; VI: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; VI: buffer_load_dwordx2 v{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}
; VI: buffer_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
; VI: buffer_load_dwordx2 v[[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]]
; VI: v_mul_f16_sdwa
; VI: v_mul_f16_e32
; VI: v_mul_f16_sdwa
Expand All @@ -194,13 +194,13 @@ entry:
}

; GCN-LABEL: {{^}}fmul_v4f16_imm_a:
; GFX89-DAG: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; GFX89-DAG: buffer_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
; GFX9-DAG: s_mov_b32 [[K1:s[0-9]+]], 0x44004200
; GFX9-DAG: s_mov_b32 [[K0:s[0-9]+]], 0x40004800

; GFX9-DAG: v_pk_mul_f16 v[[MUL_LO:[0-9]+]], v[[A_LO]], [[K0]]
; GFX9-DAG: v_pk_mul_f16 v[[MUL_HI:[0-9]+]], v[[A_HI]], [[K1]]
; GFX9: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[MUL_HI]]{{\]}}
; GFX9: buffer_store_dwordx2 v[[[MUL_LO]]:[[MUL_HI]]]

; VI-DAG: v_mov_b32_e32 [[K4:v[0-9]+]], 0x4400

Expand All @@ -212,7 +212,7 @@ entry:
; VI-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[MUL_LO_LO]], v[[MUL_LO_HI]]
; VI-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], v[[MUL_HI_LO]], v[[MUL_HI_HI]]

; VI: buffer_store_dwordx2 v{{\[}}[[OR0]]:[[OR1]]{{\]}}
; VI: buffer_store_dwordx2 v[[[OR0]]:[[OR1]]]
define amdgpu_kernel void @fmul_v4f16_imm_a(
<4 x half> addrspace(1)* %r,
<4 x half> addrspace(1)* %b) {
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -692,12 +692,12 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out,
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; SI: v_max_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]

; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
; VI: v_min_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[A]], 0.15915494
; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]

; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
Expand All @@ -716,7 +716,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, d
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]

; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
Expand Down Expand Up @@ -1534,10 +1534,10 @@ define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double add

; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[CVT_LO]]:[[CVT_HI]]]
define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
Expand All @@ -1553,10 +1553,10 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double add

; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v[[[CVT_LO]]:[[CVT_HI]]], 4.0
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -1640,11 +1640,11 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %
}

; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
; GCN: {{buffer|flat}}_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v[[[A_LO]]:[[A_HI]]]
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[A_LO]]:[[NEG_A_HI]]]
define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
Expand All @@ -1662,7 +1662,7 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrs
; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s[

; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64
}

; GCN-LABEL: {{^}}fneg_fabs_f64:
; SI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x13
; VI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x4c
; SI-DAG: s_load_dwordx2 s[[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x13
; VI-DAG: s_load_dwordx2 s[[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x4c
; GCN-DAG: s_bitset1_b32 s[[HI_X]], 31
; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
; GCN-DAG: v_mov_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[LO_V]]:[[HI_V]]]
define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, [8 x i32], double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
%fsub = fsub double -0.000000e+00, %fabs
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out,
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000

; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s[[[K0_LO]]:[[K0_HI]]]
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]

; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000

; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]], [[TRUNC]]
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out,
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000

; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s[[[K0_LO]]:[[K0_HI]]]
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]

; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000

; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]], [[TRUNC]]
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/fpext.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ entry:
; GCN-LABEL: {{^}}fpext_f16_to_f64
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; GCN: v_cvt_f64_f32_e32 v{{\[}}[[R_F64_0:[0-9]+]]:[[R_F64_1:[0-9]+]]{{\]}}, v[[A_F32]]
; GCN: buffer_store_dwordx2 v{{\[}}[[R_F64_0]]:[[R_F64_1]]{{\]}}
; GCN: v_cvt_f64_f32_e32 v[[[R_F64_0:[0-9]+]]:[[R_F64_1:[0-9]+]]], v[[A_F32]]
; GCN: buffer_store_dwordx2 v[[[R_F64_0]]:[[R_F64_1]]]
; GCN: s_endpgm
define amdgpu_kernel void @fpext_f16_to_f64(
double addrspace(1)* %r,
Expand All @@ -39,7 +39,7 @@ entry:
; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[R_F32_1:[0-9]+]], v[[A_F16_1]]
; GFX89: v_cvt_f32_f16_sdwa v[[R_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GCN: buffer_store_dwordx2 v{{\[}}[[R_F32_0]]:[[R_F32_1]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[R_F32_0]]:[[R_F32_1]]]
; GCN: s_endpgm

define amdgpu_kernel void @fpext_v2f16_to_v2f32(
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ entry:
; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; GCN: v_cvt_i32_f32_e32 v[[R_I64_Low:[0-9]+]], v[[A_F32]]
; GCN: v_ashrrev_i32_e32 v[[R_I64_High:[0-9]+]], 31, v[[R_I64_Low]]
; GCN: buffer_store_dwordx2 v{{\[}}[[R_I64_Low]]{{\:}}[[R_I64_High]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_f16_to_i64(
i64 addrspace(1)* %r,
Expand Down Expand Up @@ -121,7 +121,7 @@ entry:
; VI-NOT: DEADBEEF
; VI-DAG: v_ashrrev_i32_e32 v[[R_I64_1_High:[0-9]+]], 31, v[[R_I64_1_Low]]
; VI-DAG: v_ashrrev_i32_e32 v[[R_I64_0_High:[0-9]+]], 31, v[[R_I64_0_Low]]
; GCN: buffer_store_dwordx4 v{{\[}}[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]{{\]}}
; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_v2f16_to_v2i64(
<2 x i64> addrspace(1)* %r,
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ entry:
; GCN: v_mov_b32_e32 v[[R_I64_High:[0-9]+]], 0
; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; GCN: v_cvt_u32_f32_e32 v[[R_I64_Low:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_dwordx2 v{{\[}}[[R_I64_Low]]{{\:}}[[R_I64_High]]{{\]}}
; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_f16_to_i64(
i64 addrspace(1)* %r,
Expand Down Expand Up @@ -117,7 +117,7 @@ entry:
; VI: v_cvt_u32_f32_e32 v[[R_I64_0_Low:[0-9]+]], v[[A_F32_0]]
; VI: v_cvt_u32_f32_e32 v[[R_I64_1_Low:[0-9]+]], v[[A_F32_1]]
; GCN: v_mov_b32_e32 v[[R_I64_0_High:[0-9]+]], 0
; GCN: buffer_store_dwordx4 v{{\[}}[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]{{\]}}
; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_v2f16_to_v2i64(
<2 x i64> addrspace(1)* %r,
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ entry:
}

; GCN-LABEL: {{^}}fptrunc_f64_to_f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}}
; GCN: buffer_load_dwordx2 v[[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]]
; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v[[[A_F64_0]]:[[A_F64_1]]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
Expand All @@ -34,7 +34,7 @@ entry:
}

; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}}
; GCN: buffer_load_dwordx2 v[[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]]
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Expand All @@ -60,9 +60,9 @@ entry:
}

; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16:
; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}}
; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}}
; GCN: buffer_load_dwordx4 v[[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]]
; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v[[[A_F64_0]]:{{[0-9]+}}]
; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v[{{[0-9]+}}:[[A_F64_3]]]
; VI: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
;
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/fract.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ declare double @llvm.fabs.f64(double) #0
declare double @llvm.floor.f64(double) #0

; FUNC-LABEL: {{^}}fract_f64:
; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
; SI-DAG: v_min_f64 v[[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v[[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v[[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v[[[LO]]:[[HI]]], -v[[[RESLO]]:[[RESHI]]]
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v[[[LO]]:[[HI]]], -[[SUB0]]

; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
; CI: v_floor_f64_e32 [[FLOORX:v\[[0-9]+:[0-9]+\]]], [[X]]
Expand All @@ -36,15 +36,15 @@ define amdgpu_kernel void @fract_f64(double addrspace(1)* %out, double addrspace
}

; FUNC-LABEL: {{^}}fract_f64_neg:
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
; SI-DAG: v_min_f64 v[[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v[[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v[[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v[[[LO]]:[[HI]]], -v[[[RESLO]]:[[RESHI]]]
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v[[[LO]]:[[HI]]], -[[SUB0]]

; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -[[X]]
Expand All @@ -64,15 +64,15 @@ define amdgpu_kernel void @fract_f64_neg(double addrspace(1)* %out, double addrs
}

; FUNC-LABEL: {{^}}fract_f64_neg_abs:
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
; SI-DAG: v_min_f64 v[[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v[[[UPLO]]:[[UPHI]]]
; SI-DAG: v_cmp_class_f64_e64 vcc, v[[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -[[SUB0]]
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v[[[LO]]:[[HI]]]|, -v[[[RESLO]]:[[RESHI]]]
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v[[[LO]]:[[HI]]]|, -[[SUB0]]

; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -|[[X]]|
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/function-args.ll
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({
; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ARG1_LOAD0]]:[[ARG1_LOAD1]]{{\]}}, off
; GCN-DAG: buffer_store_dwordx2 v[[[ARG1_LOAD0]]:[[ARG1_LOAD1]]], off
define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 {
%arg0.load = load i32, i32 addrspace(5)* %arg0
%arg1.load = load i64, i64 addrspace(5)* %arg1
Expand All @@ -583,7 +583,7 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:12

; GCN: buffer_store_dword v[[LOAD_ARG1]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
; GCN: buffer_store_dwordx2 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]], off
define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile i32 %arg1, i32 addrspace(1)* undef
Expand Down Expand Up @@ -626,8 +626,8 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:16{{$}}

; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]{{\]}}, off
; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
; GCN: buffer_store_dwordx2 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]], off
; GCN: buffer_store_dwordx2 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]], off
define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef
Expand Down Expand Up @@ -658,8 +658,8 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:32{{$}}

; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off
define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef
Expand All @@ -678,8 +678,8 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:32{{$}}

; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off
define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef
Expand All @@ -706,10 +706,10 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s32 offset:60{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s32 offset:64{{$}}

; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_4]]:[[LOAD_ARG1_7]]{{\]}}, off
; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_4]]:[[LOAD_ARG2_7]]{{\]}}, off
; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_4]]:[[LOAD_ARG1_7]]], off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_4]]:[[LOAD_ARG2_7]]], off
; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off
define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/function-call-relocs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,31 @@ declare protected void @protected_func(i32 addrspace(1)* %out)
declare hidden void @hidden_func(i32 addrspace(1)* %out)

; CHECK-LABEL: call_func:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], func@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], func@gotpcrel32@hi+12
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0
; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]], 0x0
; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]]
define amdgpu_kernel void @call_func(i32 addrspace(1)* %out) {
call void @func(i32 addrspace(1)* %out)
ret void
}

; CHECK-LABEL: call_protected_func:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], protected_func@rel32@lo+4
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], protected_func@rel32@hi+12
; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]]
define amdgpu_kernel void @call_protected_func(i32 addrspace(1)* %out) {
call void @protected_func(i32 addrspace(1)* %out)
ret void
}

; CHECK-LABEL: call_hidden_func:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], hidden_func@rel32@lo+4
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], hidden_func@rel32@hi+12
; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]]
define amdgpu_kernel void @call_hidden_func(i32 addrspace(1)* %out) {
call void @hidden_func(i32 addrspace(1)* %out)
ret void
Expand All @@ -40,11 +40,11 @@ define amdgpu_kernel void @call_hidden_func(i32 addrspace(1)* %out) {
declare i64 @funci()

; CHECK-LABEL: tail_call_func:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], funci@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], funci@gotpcrel32@hi+12
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0
; CHECK: s_setpc_b64 s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]], 0x0
; CHECK: s_setpc_b64 s[[[ADDR_LO]]:[[ADDR_HI]]]
define i64 @tail_call_func() {
%ret = tail call i64 @funci()
ret i64 %ret
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/global-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
@available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer

; GCN-LABEL: {{^}}private_test:
; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
; GCN: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]]

; Non-R600 OSes use relocations.
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+12
; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
; GCN: s_getpc_b64 s[[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]]
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+12

Expand All @@ -30,7 +30,7 @@ define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
}

; GCN-LABEL: {{^}}available_externally_test:
; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
; GCN: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]]
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+12
; R600-LABEL: available_externally_test
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/global-extload-i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ define amdgpu_kernel void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace
; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
; SI-DAG: buffer_load_ushort v[[LO:[0-9]+]],
; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i64
Expand Down
Loading