22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/offset-split-global.ll
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -778,7 +778,7 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -883,7 +883,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -912,7 +912,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -941,7 +941,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -971,7 +971,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspa
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -1001,7 +1001,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspa
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -1031,7 +1031,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspa
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -1061,7 +1061,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspa
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -1091,7 +1091,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspa
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1800, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down Expand Up @@ -1121,7 +1121,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspa
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0x2000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)* %buffer) {
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1]
; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0x1000
Expand Down Expand Up @@ -622,7 +622,7 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)* %buffer) {
; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s35, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v0, s0, s34, v2
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x5000, v3
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo
; GFX10-NEXT: .LBB1_1: ; %for.cond.preheader
; GFX10-NEXT: ; =>This Loop Header: Depth=1
Expand Down Expand Up @@ -1087,7 +1087,7 @@ define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) {
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, 0x800, v0
Expand Down Expand Up @@ -1367,7 +1367,7 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)* %buffer) {
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1]
; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0xfffff800
Expand Down Expand Up @@ -1594,7 +1594,7 @@ define amdgpu_kernel void @p32Offset64(i8 addrspace(1)* %buffer) {
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0x80000000
Expand Down Expand Up @@ -1838,9 +1838,9 @@ define amdgpu_kernel void @DiffBase(i8 addrspace(1)* %buffer1,
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v0
; GFX10-NEXT: v_add_co_u32 v0, s0, s36, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s37, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s37, 0, s0
; GFX10-NEXT: v_add_co_u32 v14, s0, s38, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s0, s39, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v15, null, s39, 0, s0
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, 0x1800
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v14, 0x3000
Expand Down Expand Up @@ -2090,7 +2090,7 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) {
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1]
; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, 0x3800, v0
Expand Down Expand Up @@ -2366,7 +2366,7 @@ define hidden amdgpu_kernel void @negativeoffset(i8 addrspace(1)* nocapture %buf
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1]
; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v2, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v3, v1, vcc_lo
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v4
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/saddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ define amdgpu_kernel void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b
; GFX10-NEXT: s_xor_b32 s2, s2, s3
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX10-NEXT: s_endpgm
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1
; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039
; MUBUF-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039
; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen
; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit
; MUBUF-NEXT: s_endpgm
Expand Down Expand Up @@ -71,7 +71,7 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0
; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039
; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039
; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: .LBB0_2: ; %shader_eval_surface.exit
; FLATSCR-NEXT: s_endpgm
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/udiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2788,18 +2788,18 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; GFX1030-NEXT: v_add_co_u32 v5, vcc_lo, v2, v3
; GFX1030-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s4, v4, vcc_lo
; GFX1030-NEXT: v_mul_hi_u32 v8, v0, v5
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], s4, v1, v5, 0
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], s4, v0, v6, 0
; GFX1030-NEXT: v_mad_u64_u32 v[6:7], s4, v1, v6, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, v5, 0
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, v6, 0
; GFX1030-NEXT: v_mad_u64_u32 v[6:7], null, v1, v6, 0
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX1030-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v3, v5, vcc_lo
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v7, vcc_lo
; GFX1030-NEXT: v_add_co_u32 v5, vcc_lo, v2, v6
; GFX1030-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v3, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], s4, 0x186a0, v5, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], s4, 0x186a0, v6, v[3:4]
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x186a0, v5, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x186a0, v6, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: v_subrev_co_u32 v2, vcc_lo, 0x186a0, v0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ define amdgpu_kernel void @livevariables_update_missed_block(i8 addrspace(1)* %s
; SI-NEXT: {{ $}}
; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4)
; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %50, 0, implicit $exec
; SI-NEXT: %43:vgpr_32, dead %45:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; SI-NEXT: %43:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %43, %subreg.sub1
; SI-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8) from %ir.i10, addrspace 1)
; SI-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/wave32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@ bb:
}

; GCN-LABEL: {{^}}test_div_scale_f32:
; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
Expand All @@ -377,8 +377,8 @@ define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float ad
}

; GCN-LABEL: {{^}}test_div_scale_f64:
; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], s{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
Expand All @@ -394,8 +394,8 @@ define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double
}

; GCN-LABEL: {{^}}test_mad_i64_i32:
; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
%sext0 = sext i32 %arg0 to i64
%sext1 = sext i32 %arg1 to i64
Expand All @@ -405,8 +405,8 @@ define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
}

; GCN-LABEL: {{^}}test_mad_u64_u32:
; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
%sext0 = zext i32 %arg0 to i64
%sext1 = zext i32 %arg1 to i64
Expand Down Expand Up @@ -478,8 +478,8 @@ exit:
}

; GCN-LABEL: {{^}}fdiv_f32:
; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GCN: v_rcp_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
; GFX1032: v_div_scale_f32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX1064: v_div_scale_f32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
Expand Down