144 changes: 96 additions & 48 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll

Large diffs are not rendered by default.

493 changes: 262 additions & 231 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll

Large diffs are not rendered by default.

39 changes: 20 additions & 19 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ declare hidden void @external_void_func_void()
define void @tail_call_void_func_void() {
; CHECK-LABEL: name: tail_call_void_func_void
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
Expand All @@ -16,25 +16,26 @@ define void @tail_call_void_func_void() {
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32)
; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
tail call void @external_void_func_void()
ret void
Expand Down
60 changes: 40 additions & 20 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -187,21 +187,25 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) {
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX8-MIR-LABEL: name: ds_fmax_f32_vv
; GFX8-MIR: bb.1 (%ir-block.0):
; GFX8-MIR: liveins: $vgpr0, $vgpr1
; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX8-MIR: $m0 = S_MOV_B32 -1
; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
; GFX8-MIR: SI_RETURN implicit $vgpr0
; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
; GFX9-MIR-LABEL: name: ds_fmax_f32_vv
; GFX9-MIR: bb.1 (%ir-block.0):
; GFX9-MIR: liveins: $vgpr0, $vgpr1
; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
; GFX9-MIR: SI_RETURN implicit $vgpr0
; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
ret float %ret
}
Expand All @@ -223,21 +227,25 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset
; GFX8-MIR: bb.1 (%ir-block.0):
; GFX8-MIR: liveins: $vgpr0, $vgpr1
; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX8-MIR: $m0 = S_MOV_B32 -1
; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
; GFX8-MIR: SI_RETURN implicit $vgpr0
; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset
; GFX9-MIR: bb.1 (%ir-block.0):
; GFX9-MIR: liveins: $vgpr0, $vgpr1
; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
; GFX9-MIR: SI_RETURN implicit $vgpr0
; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%gep = getelementptr float, float addrspace(3)* %ptr, i32 128
%ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
ret float %ret
Expand All @@ -260,19 +268,23 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn
; GFX8-MIR: bb.1 (%ir-block.0):
; GFX8-MIR: liveins: $vgpr0, $vgpr1
; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX8-MIR: $m0 = S_MOV_B32 -1
; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
; GFX8-MIR: SI_RETURN
; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX8-MIR: S_SETPC_B64_return [[COPY3]]
; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn
; GFX9-MIR: bb.1 (%ir-block.0):
; GFX9-MIR: liveins: $vgpr0, $vgpr1
; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
; GFX9-MIR: SI_RETURN
; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX9-MIR: S_SETPC_B64_return [[COPY3]]
%ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
ret void
}
Expand All @@ -294,19 +306,23 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
; GFX8-MIR: bb.1 (%ir-block.0):
; GFX8-MIR: liveins: $vgpr0, $vgpr1
; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX8-MIR: $m0 = S_MOV_B32 -1
; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
; GFX8-MIR: SI_RETURN
; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX8-MIR: S_SETPC_B64_return [[COPY3]]
; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
; GFX9-MIR: bb.1 (%ir-block.0):
; GFX9-MIR: liveins: $vgpr0, $vgpr1
; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
; GFX9-MIR: SI_RETURN
; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX9-MIR: S_SETPC_B64_return [[COPY3]]
%gep = getelementptr float, float addrspace(3)* %ptr, i32 128
%ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
ret void
Expand All @@ -329,21 +345,25 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile
; GFX8-MIR: bb.1 (%ir-block.0):
; GFX8-MIR: liveins: $vgpr0, $vgpr1
; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX8-MIR: $m0 = S_MOV_B32 -1
; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
; GFX8-MIR: SI_RETURN implicit $vgpr0
; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile
; GFX9-MIR: bb.1 (%ir-block.0):
; GFX9-MIR: liveins: $vgpr0, $vgpr1
; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
; GFX9-MIR: SI_RETURN implicit $vgpr0
; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0
%ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
ret float %ret
}
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
Original file line number Diff line number Diff line change
Expand Up @@ -269,22 +269,22 @@ define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x)
; GFX10-NEXT: v_lshrrev_b16 v6, 8, v1
; GFX10-NEXT: ds_write_b8 v0, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX10-NEXT: v_lshrrev_b16 v7, 8, v2
; GFX10-NEXT: v_lshrrev_b16 v8, 8, v5
; GFX10-NEXT: v_lshrrev_b16 v8, 8, v2
; GFX10-NEXT: v_lshrrev_b16 v7, 8, v5
; GFX10-NEXT: ds_write_b8 v0, v2 offset:4
; GFX10-NEXT: ds_write_b8 v0, v6 offset:1
; GFX10-NEXT: ds_write_b8 v0, v5 offset:2
; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3
; GFX10-NEXT: ds_write_b8 v0, v7 offset:3
; GFX10-NEXT: v_lshrrev_b16 v2, 8, v1
; GFX10-NEXT: ds_write_b8 v0, v8 offset:3
; GFX10-NEXT: ds_write_b8 v0, v7 offset:5
; GFX10-NEXT: ds_write_b8 v0, v8 offset:5
; GFX10-NEXT: ds_write_b8 v0, v1 offset:6
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v3
; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3
; GFX10-NEXT: ds_write_b8 v0, v2 offset:7
; GFX10-NEXT: ds_write_b8 v0, v3 offset:8
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4
; GFX10-NEXT: ds_write_b8 v0, v5 offset:9
; GFX10-NEXT: v_lshrrev_b16 v3, 8, v1
; GFX10-NEXT: ds_write_b8 v0, v5 offset:9
; GFX10-NEXT: v_lshrrev_b16 v5, 8, v4
; GFX10-NEXT: ds_write_b8 v0, v1 offset:10
; GFX10-NEXT: v_lshrrev_b16 v1, 8, v2
Expand Down Expand Up @@ -344,21 +344,21 @@ define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x)
; GFX10-NEXT: ds_write_b8 v0, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX10-NEXT: v_lshrrev_b16 v6, 8, v2
; GFX10-NEXT: v_lshrrev_b16 v7, 8, v4
; GFX10-NEXT: ds_write_b8 v0, v2 offset:4
; GFX10-NEXT: v_lshrrev_b16 v2, 8, v4
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX10-NEXT: ds_write_b8 v0, v5 offset:1
; GFX10-NEXT: ds_write_b8 v0, v4 offset:2
; GFX10-NEXT: ds_write_b8 v0, v7 offset:3
; GFX10-NEXT: v_lshrrev_b16 v4, 8, v1
; GFX10-NEXT: ds_write_b8 v0, v2 offset:3
; GFX10-NEXT: ds_write_b8 v0, v6 offset:5
; GFX10-NEXT: v_lshrrev_b16 v2, 8, v3
; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3
; GFX10-NEXT: ds_write_b8 v0, v1 offset:6
; GFX10-NEXT: v_lshrrev_b16 v1, 8, v7
; GFX10-NEXT: v_lshrrev_b16 v1, 8, v2
; GFX10-NEXT: ds_write_b8 v0, v6 offset:5
; GFX10-NEXT: ds_write_b8 v0, v4 offset:7
; GFX10-NEXT: ds_write_b8 v0, v3 offset:8
; GFX10-NEXT: ds_write_b8 v0, v2 offset:9
; GFX10-NEXT: ds_write_b8 v0, v7 offset:10
; GFX10-NEXT: ds_write_b8 v0, v5 offset:9
; GFX10-NEXT: ds_write_b8 v0, v2 offset:10
; GFX10-NEXT: ds_write_b8 v0, v1 offset:11
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
Expand Down
120 changes: 80 additions & 40 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -40,28 +44,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -71,28 +79,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_ValK0__i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -102,28 +114,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_K0Val__i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -133,28 +149,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_ValK1_K0_i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -164,28 +184,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_K1Val_K0_i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -195,28 +219,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_ValK1__i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -226,28 +254,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 -12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMAX %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -257,12 +289,13 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
Expand All @@ -272,8 +305,10 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%9:sgpr(s32) = G_CONSTANT i32 17
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
%10:sgpr(s32) = G_CONSTANT i32 -12
Expand All @@ -283,7 +318,8 @@ body: |
%12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
%7:vgpr(<2 x s16>) = G_SMAX %12, %4
$vgpr0 = COPY %7(<2 x s16>)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%8:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %8, implicit $vgpr0
...

---
Expand Down Expand Up @@ -325,26 +361,30 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_non_inline_constant_i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_SMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 65
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_SMIN %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
120 changes: 80 additions & 40 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_max_ValK0_K1_u32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -40,28 +44,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -71,28 +79,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_ValK0__u32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -102,28 +114,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_min_K1max_K0Val__u32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %7, %0
%4:sgpr(s32) = G_CONSTANT i32 17
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -133,28 +149,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_ValK1_K0_u32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -164,28 +184,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_min_K1Val_K0_u32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -195,28 +219,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_ValK1__u32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %0, %7
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -226,28 +254,32 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__u32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMIN %7, %0
%4:sgpr(s32) = G_CONSTANT i32 12
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMAX %8, %3
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...

---
Expand All @@ -257,12 +289,13 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
Expand All @@ -272,8 +305,10 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%9:sgpr(s32) = G_CONSTANT i32 17
%2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
%10:sgpr(s32) = G_CONSTANT i32 12
Expand All @@ -283,7 +318,8 @@ body: |
%12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
%7:vgpr(<2 x s16>) = G_UMAX %12, %4
$vgpr0 = COPY %7(<2 x s16>)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%8:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %8, implicit $vgpr0
...

---
Expand Down Expand Up @@ -326,26 +362,30 @@ regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $vgpr0
liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-LABEL: name: test_non_inline_constant_i32
; CHECK: liveins: $vgpr0
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
%7:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_UMAX %0, %7
%4:sgpr(s32) = G_CONSTANT i32 65
%8:vgpr(s32) = COPY %4(s32)
%5:vgpr(s32) = G_UMIN %3, %8
$vgpr0 = COPY %5(s32)
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
%6:ccr_sgpr_64 = COPY %1
S_SETPC_B64_return %6, implicit $vgpr0
...
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll
Original file line number Diff line number Diff line change
Expand Up @@ -401,13 +401,13 @@ define <4 x half> @v_roundeven_v4f16(<4 x half> %x) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX10-NEXT: v_rndne_f16_e32 v3, v0
; GFX10-NEXT: v_rndne_f16_e32 v2, v0
; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_rndne_f16_e32 v4, v1
; GFX10-NEXT: v_rndne_f16_e32 v3, v1
; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_and_or_b32 v0, v3, v2, v0
; GFX10-NEXT: v_and_or_b32 v1, v4, v2, v1
; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0
; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x)
ret <4 x half> %roundeven
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -642,25 +642,25 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
; GFX10-NEXT: s_mov_b32 s4, 8
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3
; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7
; GFX10-NEXT: s_movk_i32 s4, 0xff
; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4
; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2
; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6
; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4
; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp
; GFX10-NEXT: v_pk_add_i16 v1, v3, v2 clamp
; GFX10-NEXT: v_pk_add_i16 v1, v2, v3 clamp
; GFX10-NEXT: v_mov_b32_e32 v2, 8
; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1]
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -642,25 +642,25 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
; GFX10-NEXT: s_mov_b32 s4, 8
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3
; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7
; GFX10-NEXT: s_movk_i32 s4, 0xff
; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4
; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2
; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6
; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4
; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
; GFX10-NEXT: v_pk_sub_i16 v1, v3, v2 clamp
; GFX10-NEXT: v_pk_sub_i16 v1, v2, v3 clamp
; GFX10-NEXT: v_mov_b32_e32 v2, 8
; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1]
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -471,25 +471,25 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
; GFX10-NEXT: s_mov_b32 s4, 8
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3
; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7
; GFX10-NEXT: s_movk_i32 s4, 0xff
; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4
; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2
; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6
; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4
; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX10-NEXT: v_pk_add_u16 v1, v3, v2 clamp
; GFX10-NEXT: v_pk_add_u16 v1, v2, v3 clamp
; GFX10-NEXT: v_mov_b32_e32 v2, 8
; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -459,25 +459,25 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
; GFX10-NEXT: s_mov_b32 s4, 8
; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3
; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7
; GFX10-NEXT: s_movk_i32 s4, 0xff
; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4
; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5
; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2
; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6
; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4
; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_mov_b32_e32 v4, 24
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp
; GFX10-NEXT: v_pk_sub_u16 v1, v3, v2 clamp
; GFX10-NEXT: v_pk_sub_u16 v1, v2, v3 clamp
; GFX10-NEXT: v_mov_b32_e32 v2, 8
; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ define void @parent_func_missing_inputs() #0 {
; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4
; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0
; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 0
; FIXEDABI-NEXT: v_readlane_b32 s5, v40, 1
; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00
; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2
; FIXEDABI-NEXT: s_or_saveexec_b64 s[4:5], -1
; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1
; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; FIXEDABI-NEXT: s_mov_b64 exec, s[4:5]
; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7]
; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
; FIXEDABI-NEXT: s_setpc_b64 s[30:31]
; FIXEDABI-NEXT: s_setpc_b64 s[4:5]
call void @requires_all_inputs()
ret void
}
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ attributes #0 = { nounwind }
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: no_stack_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x24{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: .sgpr_count: 0x26{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x2{{$}}
; GCN-NEXT: no_stack_extern_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
Expand Down Expand Up @@ -213,9 +213,9 @@ attributes #0 = { nounwind }
; GCN-NEXT: .vgpr_count: 0x2{{$}}
; GCN-NEXT: simple_stack_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x24{{$}}
; GCN-NEXT: .sgpr_count: 0x26{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x4{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: simple_stack_extern_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
Expand Down
294 changes: 147 additions & 147 deletions llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -903,80 +903,80 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 {
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_waitcnt expcnt(1)
; CHECK-NEXT: v_writelane_b32 v0, s30, 0
; CHECK-NEXT: v_writelane_b32 v0, s31, 1
; CHECK-NEXT: v_writelane_b32 v0, s33, 2
; CHECK-NEXT: v_writelane_b32 v0, s34, 3
; CHECK-NEXT: v_writelane_b32 v0, s35, 4
; CHECK-NEXT: v_writelane_b32 v0, s36, 5
; CHECK-NEXT: v_writelane_b32 v0, s37, 6
; CHECK-NEXT: v_writelane_b32 v0, s38, 7
; CHECK-NEXT: v_writelane_b32 v0, s39, 8
; CHECK-NEXT: v_writelane_b32 v0, s40, 9
; CHECK-NEXT: v_writelane_b32 v0, s41, 10
; CHECK-NEXT: v_writelane_b32 v0, s42, 11
; CHECK-NEXT: v_writelane_b32 v0, s43, 12
; CHECK-NEXT: v_writelane_b32 v0, s44, 13
; CHECK-NEXT: v_writelane_b32 v0, s45, 14
; CHECK-NEXT: v_writelane_b32 v0, s46, 15
; CHECK-NEXT: v_writelane_b32 v0, s47, 16
; CHECK-NEXT: v_writelane_b32 v0, s48, 17
; CHECK-NEXT: v_writelane_b32 v0, s49, 18
; CHECK-NEXT: v_writelane_b32 v0, s50, 19
; CHECK-NEXT: v_writelane_b32 v0, s51, 20
; CHECK-NEXT: v_writelane_b32 v0, s52, 21
; CHECK-NEXT: v_writelane_b32 v0, s53, 22
; CHECK-NEXT: v_writelane_b32 v0, s54, 23
; CHECK-NEXT: v_writelane_b32 v0, s55, 24
; CHECK-NEXT: v_writelane_b32 v0, s56, 25
; CHECK-NEXT: v_writelane_b32 v0, s57, 26
; CHECK-NEXT: v_writelane_b32 v0, s58, 27
; CHECK-NEXT: v_writelane_b32 v0, s59, 28
; CHECK-NEXT: v_writelane_b32 v0, s60, 29
; CHECK-NEXT: v_writelane_b32 v0, s61, 30
; CHECK-NEXT: v_writelane_b32 v0, s62, 31
; CHECK-NEXT: v_writelane_b32 v0, s63, 32
; CHECK-NEXT: v_writelane_b32 v0, s64, 33
; CHECK-NEXT: v_writelane_b32 v0, s65, 34
; CHECK-NEXT: v_writelane_b32 v0, s66, 35
; CHECK-NEXT: v_writelane_b32 v0, s67, 36
; CHECK-NEXT: v_writelane_b32 v0, s68, 37
; CHECK-NEXT: v_writelane_b32 v0, s69, 38
; CHECK-NEXT: v_writelane_b32 v0, s70, 39
; CHECK-NEXT: v_writelane_b32 v0, s71, 40
; CHECK-NEXT: v_writelane_b32 v0, s72, 41
; CHECK-NEXT: v_writelane_b32 v0, s73, 42
; CHECK-NEXT: v_writelane_b32 v0, s74, 43
; CHECK-NEXT: v_writelane_b32 v0, s75, 44
; CHECK-NEXT: v_writelane_b32 v0, s76, 45
; CHECK-NEXT: v_writelane_b32 v0, s77, 46
; CHECK-NEXT: v_writelane_b32 v0, s78, 47
; CHECK-NEXT: v_writelane_b32 v0, s79, 48
; CHECK-NEXT: v_writelane_b32 v0, s80, 49
; CHECK-NEXT: v_writelane_b32 v0, s81, 50
; CHECK-NEXT: v_writelane_b32 v0, s82, 51
; CHECK-NEXT: v_writelane_b32 v0, s83, 52
; CHECK-NEXT: v_writelane_b32 v0, s84, 53
; CHECK-NEXT: v_writelane_b32 v0, s85, 54
; CHECK-NEXT: v_writelane_b32 v0, s86, 55
; CHECK-NEXT: v_writelane_b32 v0, s87, 56
; CHECK-NEXT: v_writelane_b32 v0, s88, 57
; CHECK-NEXT: v_writelane_b32 v0, s33, 0
; CHECK-NEXT: v_writelane_b32 v0, s34, 1
; CHECK-NEXT: v_writelane_b32 v0, s35, 2
; CHECK-NEXT: v_writelane_b32 v0, s36, 3
; CHECK-NEXT: v_writelane_b32 v0, s37, 4
; CHECK-NEXT: v_writelane_b32 v0, s38, 5
; CHECK-NEXT: v_writelane_b32 v0, s39, 6
; CHECK-NEXT: v_writelane_b32 v0, s40, 7
; CHECK-NEXT: v_writelane_b32 v0, s41, 8
; CHECK-NEXT: v_writelane_b32 v0, s42, 9
; CHECK-NEXT: v_writelane_b32 v0, s43, 10
; CHECK-NEXT: v_writelane_b32 v0, s44, 11
; CHECK-NEXT: v_writelane_b32 v0, s45, 12
; CHECK-NEXT: v_writelane_b32 v0, s46, 13
; CHECK-NEXT: v_writelane_b32 v0, s47, 14
; CHECK-NEXT: v_writelane_b32 v0, s48, 15
; CHECK-NEXT: v_writelane_b32 v0, s49, 16
; CHECK-NEXT: v_writelane_b32 v0, s50, 17
; CHECK-NEXT: v_writelane_b32 v0, s51, 18
; CHECK-NEXT: v_writelane_b32 v0, s52, 19
; CHECK-NEXT: v_writelane_b32 v0, s53, 20
; CHECK-NEXT: v_writelane_b32 v0, s54, 21
; CHECK-NEXT: v_writelane_b32 v0, s55, 22
; CHECK-NEXT: v_writelane_b32 v0, s56, 23
; CHECK-NEXT: v_writelane_b32 v0, s57, 24
; CHECK-NEXT: v_writelane_b32 v0, s58, 25
; CHECK-NEXT: v_writelane_b32 v0, s59, 26
; CHECK-NEXT: v_writelane_b32 v0, s60, 27
; CHECK-NEXT: v_writelane_b32 v0, s61, 28
; CHECK-NEXT: v_writelane_b32 v0, s62, 29
; CHECK-NEXT: v_writelane_b32 v0, s63, 30
; CHECK-NEXT: v_writelane_b32 v0, s64, 31
; CHECK-NEXT: v_writelane_b32 v0, s65, 32
; CHECK-NEXT: v_writelane_b32 v0, s66, 33
; CHECK-NEXT: v_writelane_b32 v0, s67, 34
; CHECK-NEXT: v_writelane_b32 v0, s68, 35
; CHECK-NEXT: v_writelane_b32 v0, s69, 36
; CHECK-NEXT: v_writelane_b32 v0, s70, 37
; CHECK-NEXT: v_writelane_b32 v0, s71, 38
; CHECK-NEXT: v_writelane_b32 v0, s72, 39
; CHECK-NEXT: v_writelane_b32 v0, s73, 40
; CHECK-NEXT: v_writelane_b32 v0, s74, 41
; CHECK-NEXT: v_writelane_b32 v0, s75, 42
; CHECK-NEXT: v_writelane_b32 v0, s76, 43
; CHECK-NEXT: v_writelane_b32 v0, s77, 44
; CHECK-NEXT: v_writelane_b32 v0, s78, 45
; CHECK-NEXT: v_writelane_b32 v0, s79, 46
; CHECK-NEXT: v_writelane_b32 v0, s80, 47
; CHECK-NEXT: v_writelane_b32 v0, s81, 48
; CHECK-NEXT: v_writelane_b32 v0, s82, 49
; CHECK-NEXT: v_writelane_b32 v0, s83, 50
; CHECK-NEXT: v_writelane_b32 v0, s84, 51
; CHECK-NEXT: v_writelane_b32 v0, s85, 52
; CHECK-NEXT: v_writelane_b32 v0, s86, 53
; CHECK-NEXT: v_writelane_b32 v0, s87, 54
; CHECK-NEXT: v_writelane_b32 v0, s88, 55
; CHECK-NEXT: v_writelane_b32 v0, s89, 56
; CHECK-NEXT: v_writelane_b32 v0, s90, 57
; CHECK-NEXT: s_waitcnt expcnt(0)
; CHECK-NEXT: v_writelane_b32 v1, s95, 0
; CHECK-NEXT: v_writelane_b32 v0, s89, 58
; CHECK-NEXT: v_writelane_b32 v1, s96, 1
; CHECK-NEXT: v_writelane_b32 v0, s90, 59
; CHECK-NEXT: v_writelane_b32 v1, s97, 2
; CHECK-NEXT: v_writelane_b32 v0, s91, 60
; CHECK-NEXT: v_writelane_b32 v1, s98, 3
; CHECK-NEXT: v_writelane_b32 v0, s92, 61
; CHECK-NEXT: v_writelane_b32 v1, s99, 4
; CHECK-NEXT: v_writelane_b32 v0, s93, 62
; CHECK-NEXT: v_writelane_b32 v1, s100, 5
; CHECK-NEXT: s_mov_b32 s31, s12
; CHECK-NEXT: v_writelane_b32 v0, s94, 63
; CHECK-NEXT: v_writelane_b32 v1, s101, 6
; CHECK-NEXT: s_cmp_eq_u32 s31, 0
; CHECK-NEXT: v_writelane_b32 v1, s97, 0
; CHECK-NEXT: v_writelane_b32 v0, s91, 58
; CHECK-NEXT: v_writelane_b32 v1, s98, 1
; CHECK-NEXT: v_writelane_b32 v0, s92, 59
; CHECK-NEXT: v_writelane_b32 v1, s99, 2
; CHECK-NEXT: v_writelane_b32 v0, s93, 60
; CHECK-NEXT: v_writelane_b32 v1, s100, 3
; CHECK-NEXT: v_writelane_b32 v0, s94, 61
; CHECK-NEXT: v_writelane_b32 v1, s101, 4
; CHECK-NEXT: v_writelane_b32 v0, s95, 62
; CHECK-NEXT: v_writelane_b32 v1, s30, 5
; CHECK-NEXT: s_mov_b32 s29, s12
; CHECK-NEXT: v_writelane_b32 v0, s96, 63
; CHECK-NEXT: v_writelane_b32 v1, s31, 6
; CHECK-NEXT: s_cmp_eq_u32 s29, 0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: ;;#ASMEND
Expand Down Expand Up @@ -1336,6 +1336,7 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; reg use s5
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_readlane_b32 s4, v1, 5
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; reg use s6
; CHECK-NEXT: ;;#ASMEND
Expand Down Expand Up @@ -1630,83 +1631,82 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; reg use vcc_hi
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_readlane_b32 s101, v1, 6
; CHECK-NEXT: v_readlane_b32 s100, v1, 5
; CHECK-NEXT: v_readlane_b32 s99, v1, 4
; CHECK-NEXT: v_readlane_b32 s98, v1, 3
; CHECK-NEXT: v_readlane_b32 s97, v1, 2
; CHECK-NEXT: v_readlane_b32 s96, v1, 1
; CHECK-NEXT: v_readlane_b32 s95, v1, 0
; CHECK-NEXT: v_readlane_b32 s94, v0, 63
; CHECK-NEXT: v_readlane_b32 s93, v0, 62
; CHECK-NEXT: v_readlane_b32 s92, v0, 61
; CHECK-NEXT: v_readlane_b32 s91, v0, 60
; CHECK-NEXT: v_readlane_b32 s90, v0, 59
; CHECK-NEXT: v_readlane_b32 s89, v0, 58
; CHECK-NEXT: v_readlane_b32 s88, v0, 57
; CHECK-NEXT: v_readlane_b32 s87, v0, 56
; CHECK-NEXT: v_readlane_b32 s86, v0, 55
; CHECK-NEXT: v_readlane_b32 s85, v0, 54
; CHECK-NEXT: v_readlane_b32 s84, v0, 53
; CHECK-NEXT: v_readlane_b32 s83, v0, 52
; CHECK-NEXT: v_readlane_b32 s82, v0, 51
; CHECK-NEXT: v_readlane_b32 s81, v0, 50
; CHECK-NEXT: v_readlane_b32 s80, v0, 49
; CHECK-NEXT: v_readlane_b32 s79, v0, 48
; CHECK-NEXT: v_readlane_b32 s78, v0, 47
; CHECK-NEXT: v_readlane_b32 s77, v0, 46
; CHECK-NEXT: v_readlane_b32 s76, v0, 45
; CHECK-NEXT: v_readlane_b32 s75, v0, 44
; CHECK-NEXT: v_readlane_b32 s74, v0, 43
; CHECK-NEXT: v_readlane_b32 s73, v0, 42
; CHECK-NEXT: v_readlane_b32 s72, v0, 41
; CHECK-NEXT: v_readlane_b32 s71, v0, 40
; CHECK-NEXT: v_readlane_b32 s70, v0, 39
; CHECK-NEXT: v_readlane_b32 s69, v0, 38
; CHECK-NEXT: v_readlane_b32 s68, v0, 37
; CHECK-NEXT: v_readlane_b32 s67, v0, 36
; CHECK-NEXT: v_readlane_b32 s66, v0, 35
; CHECK-NEXT: v_readlane_b32 s65, v0, 34
; CHECK-NEXT: v_readlane_b32 s64, v0, 33
; CHECK-NEXT: v_readlane_b32 s63, v0, 32
; CHECK-NEXT: v_readlane_b32 s62, v0, 31
; CHECK-NEXT: v_readlane_b32 s61, v0, 30
; CHECK-NEXT: v_readlane_b32 s60, v0, 29
; CHECK-NEXT: v_readlane_b32 s59, v0, 28
; CHECK-NEXT: v_readlane_b32 s58, v0, 27
; CHECK-NEXT: v_readlane_b32 s57, v0, 26
; CHECK-NEXT: v_readlane_b32 s56, v0, 25
; CHECK-NEXT: v_readlane_b32 s55, v0, 24
; CHECK-NEXT: v_readlane_b32 s54, v0, 23
; CHECK-NEXT: v_readlane_b32 s53, v0, 22
; CHECK-NEXT: v_readlane_b32 s52, v0, 21
; CHECK-NEXT: v_readlane_b32 s51, v0, 20
; CHECK-NEXT: v_readlane_b32 s50, v0, 19
; CHECK-NEXT: v_readlane_b32 s49, v0, 18
; CHECK-NEXT: v_readlane_b32 s48, v0, 17
; CHECK-NEXT: v_readlane_b32 s47, v0, 16
; CHECK-NEXT: v_readlane_b32 s46, v0, 15
; CHECK-NEXT: v_readlane_b32 s45, v0, 14
; CHECK-NEXT: v_readlane_b32 s44, v0, 13
; CHECK-NEXT: v_readlane_b32 s43, v0, 12
; CHECK-NEXT: v_readlane_b32 s42, v0, 11
; CHECK-NEXT: v_readlane_b32 s41, v0, 10
; CHECK-NEXT: v_readlane_b32 s40, v0, 9
; CHECK-NEXT: v_readlane_b32 s39, v0, 8
; CHECK-NEXT: v_readlane_b32 s38, v0, 7
; CHECK-NEXT: v_readlane_b32 s37, v0, 6
; CHECK-NEXT: v_readlane_b32 s36, v0, 5
; CHECK-NEXT: v_readlane_b32 s35, v0, 4
; CHECK-NEXT: v_readlane_b32 s34, v0, 3
; CHECK-NEXT: v_readlane_b32 s33, v0, 2
; CHECK-NEXT: v_readlane_b32 s31, v0, 1
; CHECK-NEXT: v_readlane_b32 s30, v0, 0
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1
; CHECK-NEXT: v_readlane_b32 s5, v1, 6
; CHECK-NEXT: v_readlane_b32 s101, v1, 4
; CHECK-NEXT: v_readlane_b32 s100, v1, 3
; CHECK-NEXT: v_readlane_b32 s99, v1, 2
; CHECK-NEXT: v_readlane_b32 s98, v1, 1
; CHECK-NEXT: v_readlane_b32 s97, v1, 0
; CHECK-NEXT: v_readlane_b32 s96, v0, 63
; CHECK-NEXT: v_readlane_b32 s95, v0, 62
; CHECK-NEXT: v_readlane_b32 s94, v0, 61
; CHECK-NEXT: v_readlane_b32 s93, v0, 60
; CHECK-NEXT: v_readlane_b32 s92, v0, 59
; CHECK-NEXT: v_readlane_b32 s91, v0, 58
; CHECK-NEXT: v_readlane_b32 s90, v0, 57
; CHECK-NEXT: v_readlane_b32 s89, v0, 56
; CHECK-NEXT: v_readlane_b32 s88, v0, 55
; CHECK-NEXT: v_readlane_b32 s87, v0, 54
; CHECK-NEXT: v_readlane_b32 s86, v0, 53
; CHECK-NEXT: v_readlane_b32 s85, v0, 52
; CHECK-NEXT: v_readlane_b32 s84, v0, 51
; CHECK-NEXT: v_readlane_b32 s83, v0, 50
; CHECK-NEXT: v_readlane_b32 s82, v0, 49
; CHECK-NEXT: v_readlane_b32 s81, v0, 48
; CHECK-NEXT: v_readlane_b32 s80, v0, 47
; CHECK-NEXT: v_readlane_b32 s79, v0, 46
; CHECK-NEXT: v_readlane_b32 s78, v0, 45
; CHECK-NEXT: v_readlane_b32 s77, v0, 44
; CHECK-NEXT: v_readlane_b32 s76, v0, 43
; CHECK-NEXT: v_readlane_b32 s75, v0, 42
; CHECK-NEXT: v_readlane_b32 s74, v0, 41
; CHECK-NEXT: v_readlane_b32 s73, v0, 40
; CHECK-NEXT: v_readlane_b32 s72, v0, 39
; CHECK-NEXT: v_readlane_b32 s71, v0, 38
; CHECK-NEXT: v_readlane_b32 s70, v0, 37
; CHECK-NEXT: v_readlane_b32 s69, v0, 36
; CHECK-NEXT: v_readlane_b32 s68, v0, 35
; CHECK-NEXT: v_readlane_b32 s67, v0, 34
; CHECK-NEXT: v_readlane_b32 s66, v0, 33
; CHECK-NEXT: v_readlane_b32 s65, v0, 32
; CHECK-NEXT: v_readlane_b32 s64, v0, 31
; CHECK-NEXT: v_readlane_b32 s63, v0, 30
; CHECK-NEXT: v_readlane_b32 s62, v0, 29
; CHECK-NEXT: v_readlane_b32 s61, v0, 28
; CHECK-NEXT: v_readlane_b32 s60, v0, 27
; CHECK-NEXT: v_readlane_b32 s59, v0, 26
; CHECK-NEXT: v_readlane_b32 s58, v0, 25
; CHECK-NEXT: v_readlane_b32 s57, v0, 24
; CHECK-NEXT: v_readlane_b32 s56, v0, 23
; CHECK-NEXT: v_readlane_b32 s55, v0, 22
; CHECK-NEXT: v_readlane_b32 s54, v0, 21
; CHECK-NEXT: v_readlane_b32 s53, v0, 20
; CHECK-NEXT: v_readlane_b32 s52, v0, 19
; CHECK-NEXT: v_readlane_b32 s51, v0, 18
; CHECK-NEXT: v_readlane_b32 s50, v0, 17
; CHECK-NEXT: v_readlane_b32 s49, v0, 16
; CHECK-NEXT: v_readlane_b32 s48, v0, 15
; CHECK-NEXT: v_readlane_b32 s47, v0, 14
; CHECK-NEXT: v_readlane_b32 s46, v0, 13
; CHECK-NEXT: v_readlane_b32 s45, v0, 12
; CHECK-NEXT: v_readlane_b32 s44, v0, 11
; CHECK-NEXT: v_readlane_b32 s43, v0, 10
; CHECK-NEXT: v_readlane_b32 s42, v0, 9
; CHECK-NEXT: v_readlane_b32 s41, v0, 8
; CHECK-NEXT: v_readlane_b32 s40, v0, 7
; CHECK-NEXT: v_readlane_b32 s39, v0, 6
; CHECK-NEXT: v_readlane_b32 s38, v0, 5
; CHECK-NEXT: v_readlane_b32 s37, v0, 4
; CHECK-NEXT: v_readlane_b32 s36, v0, 3
; CHECK-NEXT: v_readlane_b32 s35, v0, 2
; CHECK-NEXT: v_readlane_b32 s34, v0, 1
; CHECK-NEXT: v_readlane_b32 s33, v0, 0
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: s_setpc_b64 s[4:5]
entry:
%cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0
%sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ define void @use_vcc() #1 {
; GCN: v_writelane_b32 v40, s30, 0
; GCN: v_writelane_b32 v40, s31, 1
; GCN: s_swappc_b64
; GCN: v_readlane_b32 s31, v40, 1
; GCN: v_readlane_b32 s30, v40, 0
; GCN: v_readlane_b32 s4, v40, 0
; GCN: v_readlane_b32 s5, v40, 1
; GCN: v_readlane_b32 s33, v40, 2
; GCN: s_setpc_b64 s[30:31]
; GCN: ; NumSgprs: 36
; GCN: ; NumVgprs: 41
define void @indirect_use_vcc() #1 {
Expand Down
29 changes: 13 additions & 16 deletions llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,26 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
; MUBUF: buffer_store_dword
; FLATSCR: scratch_store_dword
; GCN: v_writelane_b32 v40, s33, 4
; GCN: v_writelane_b32 v40, s30, 0
; GCN: v_writelane_b32 v40, s31, 1
; GCN: v_writelane_b32 v40, s34, 2
; GCN: v_writelane_b32 v40, s35, 3
; GCN: v_writelane_b32 v40, s34, 0
; GCN: v_writelane_b32 v40, s35, 1
; GCN: v_writelane_b32 v40, s30, 2
; GCN: v_writelane_b32 v40, s31, 3

; GCN: s_swappc_b64
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_swappc_b64
; GCN: v_readlane_b32 s35, v40, 3
; GCN: v_readlane_b32 s34, v40, 2
; MUBUF-DAG: v_readlane_b32 s31, v40, 1
; MUBUF-DAG: v_readlane_b32 s30, v40, 0
; FLATSCR-DAG: v_readlane_b32 s31, v40, 1
; FLATSCR-DAG: v_readlane_b32 s30, v40, 0
; MUBUF-DAG: v_readlane_b32 s4, v40, 2
; MUBUF-DAG: v_readlane_b32 s5, v40, 3
; FLATSCR-DAG: v_readlane_b32 s0, v40, 2
; FLATSCR-DAG: v_readlane_b32 s1, v40, 3
; GCN: v_readlane_b32 s35, v40, 1
; GCN: v_readlane_b32 s34, v40, 0

; GCN: v_readlane_b32 s33, v40, 4
; MUBUF: buffer_load_dword
; FLATSCR: scratch_load_dword
; GCN: s_setpc_b64 s[30:31]
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
call void @external_void_func_void()
call void asm sideeffect "", ""() #0
Expand Down Expand Up @@ -74,14 +74,11 @@ define void @test_func_call_external_void_funcx2() #0 {

; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
; GCN: s_waitcnt
; GCN: v_writelane_b32 v0, s30, 0
; GCN: v_writelane_b32 v0, s31, 1
; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31]
; GCN-NEXT: #ASMSTART
; GCN: ; clobber
; GCN-NEXT: #ASMEND
; GCN: v_readlane_b32 s31, v0, 1
; GCN: v_readlane_b32 s30, v0, 0
; GCN: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64 [[SAVEPC]]
define void @void_func_void_clobber_s30_s31() #2 {
call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
ret void
Expand Down
73 changes: 46 additions & 27 deletions llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,10 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {

; GCN: s_swappc_b64

; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]]
; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]]
; MUBUF-DAG: v_readlane_b32 s5, [[CSR_VGPR]]
; MUBUF-DAG: v_readlane_b32 s4, [[CSR_VGPR]]
; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]]
; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]]

; MUBUF: s_addk_i32 s32, 0xfc00{{$}}
; FLATSCR: s_add_i32 s32, s32, -16{{$}}
Expand All @@ -114,7 +116,7 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)

; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_and_call() #0 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
Expand Down Expand Up @@ -142,8 +144,10 @@ define void @callee_with_stack_and_call() #0 {
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
; GCN: s_swappc_b64

; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0
; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1
; MUBUF-DAG: v_readlane_b32 s4, v40, 0
; MUBUF-DAG: v_readlane_b32 s5, v40, 1
; FLATSCR-DAG: v_readlane_b32 s0, v40, 0
; FLATSCR-DAG: v_readlane_b32 s1, v40, 1

; MUBUF: s_addk_i32 s32, 0xfc00
; FLATSCR: s_add_i32 s32, s32, -16
Expand All @@ -153,7 +157,7 @@ define void @callee_with_stack_and_call() #0 {
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64
define void @callee_no_stack_with_call() #0 {
call void @external_void_func_void()
ret void
Expand Down Expand Up @@ -389,28 +393,31 @@ define void @realign_stack_no_fp_elim() #1 {
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
; GCN-NEXT: v_writelane_b32 v0, s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0
; GCN-NEXT: v_writelane_b32 v0, s30, 0
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; MUBUF: s_addk_i32 s32, 0x300
; FLATSCR: s_add_i32 s32, s32, 12
; GCN: v_writelane_b32 [[CSR_VGPR]], s31, 1
; GCN: v_writelane_b32 v0, s31, 1
; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN: ;;#ASMSTART
; GCN: v_readlane_b32 s31, [[CSR_VGPR]], 1
; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0
; MUBUF: s_addk_i32 s32, 0xfd00
; FLATSCR: s_add_i32 s32, s32, -12
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; MUBUF: s_addk_i32 s32, 0x300
; MUBUF-NEXT: v_readlane_b32 s4, v0, 0
; MUBUF-NEXT: v_readlane_b32 s5, v0, 1
; FLATSCR: s_add_i32 s32, s32, 12
; FLATSCR-NEXT: v_readlane_b32 s0, v0, 0
; FLATSCR-NEXT: v_readlane_b32 s1, v0, 1
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00
; FLATSCR-NEXT: s_add_i32 s32, s32, -12
; GCN-NEXT: v_readlane_b32 s33, v0, 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; MUBUF-NEXT: s_setpc_b64 s[4:5]
; FLATSCR-NEXT: s_setpc_b64 s[0:1]
define void @no_unused_non_csr_sgpr_for_fp() #1 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
Expand All @@ -434,22 +441,28 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; MUBUF: s_addk_i32 s32, 0x300{{$}}
; FLATSCR: s_add_i32 s32, s32, 12{{$}}
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0

; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
; MUBUF-DAG: buffer_store_dword
; FLATSCR-DAG: scratch_store_dword
; MUBUF: s_addk_i32 s32, 0x300{{$}}
; FLATSCR: s_add_i32 s32, s32, 12{{$}}

; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0
; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0
; GCN: ;;#ASMSTART
; MUBUF: s_addk_i32 s32, 0xfd00{{$}}
; FLATSCR: s_add_i32 s32, s32, -12{{$}}
; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1
; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1
; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}}
; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}}
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64
define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
Expand Down Expand Up @@ -481,15 +494,21 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
; GCN-DAG: s_mov_b32 s33, s32
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}}
; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}}
; MUBUF-DAG: buffer_store_dword
; FLATSCR-DAG: scratch_store_dword

; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0
; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0
; GCN: ;;#ASMSTART
; MUBUF: s_add_i32 s32, s32, 0xfffbfd00{{$}}
; FLATSCR: s_addk_i32 s32, 0xeff4{{$}}
; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1
; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1
; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}}
; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}}
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100
Expand All @@ -498,7 +517,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64
define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #1 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
Expand Down Expand Up @@ -530,7 +549,7 @@ define internal void @local_empty_func() #0 {
; An FP is needed, despite not needing any spills
; TODO: Ccould see callee does not use stack and omit FP.
; GCN-LABEL: {{^}}ipra_call_with_stack:
; GCN: v_writelane_b32 v0, s33, 2
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN: s_mov_b32 s33, s32
; MUBUF: s_addk_i32 s32, 0x400
; FLATSCR: s_add_i32 s32, s32, 16
Expand All @@ -539,7 +558,7 @@ define internal void @local_empty_func() #0 {
; GCN: s_swappc_b64
; MUBUF: s_addk_i32 s32, 0xfc00
; FLATSCR: s_add_i32 s32, s32, -16
; GCN: v_readlane_b32 s33, v0, 2
; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]]
define void @ipra_call_with_stack() #0 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
; GCN-NOT: s12
; GCN-NOT: s13
; GCN-NOT: s14
; GCN: v_readlane_b32 s30, v40, 0
; GCN: v_readlane_b32 s4, v40, 0
define hidden void @func_indirect_use_workgroup_id_x() #1 {
call void @use_workgroup_id_x()
ret void
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64 s[4:5]
bb0:
%split.ret.type = call <2 x float> @func_v2f32()
br label %bb1
Expand All @@ -73,15 +73,15 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64 s[4:5]
bb0:
%split.ret.type = call <3 x float> @func_v3f32()
br label %bb1
Expand All @@ -107,15 +107,15 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64 s[4:5]
bb0:
%split.ret.type = call <4 x half> @func_v4f16()
br label %bb1
Expand All @@ -141,16 +141,16 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_mov_b32_e32 v1, v4
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_setpc_b64 s[4:5]
bb0:
%split.ret.type = call { <4 x i32>, <4 x half> } @func_struct()
br label %bb1
Expand Down
74 changes: 40 additions & 34 deletions llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,32 @@
define float @fdiv_f32(float %a, float %b) #0 {
; GCN-LABEL: name: fdiv_f32
; GCN: bb.0.entry:
; GCN-NEXT: liveins: $vgpr0, $vgpr1
; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %8:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode
; GCN-NEXT: %12:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %13:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %16:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %14:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %18:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %19:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode
; GCN-NEXT: $vcc = COPY %5
; GCN-NEXT: %18:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec
; GCN-NEXT: %19:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr0 = COPY %19
; GCN-NEXT: SI_RETURN implicit $vgpr0
; GCN-NEXT: $vcc = COPY %7
; GCN-NEXT: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec
; GCN-NEXT: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; GCN-NEXT: $vgpr0 = COPY %21
; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
entry:
%fdiv = fdiv float %a, %b
ret float %fdiv
Expand All @@ -38,29 +41,32 @@ entry:
define float @fdiv_nnan_f32(float %a, float %b) #0 {
; GCN-LABEL: name: fdiv_nnan_f32
; GCN: bb.0.entry:
; GCN-NEXT: liveins: $vgpr0, $vgpr1
; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %8:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode
; GCN-NEXT: %12:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %13:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode
; GCN-NEXT: $vcc = COPY %5
; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec
; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr0 = COPY %19
; GCN-NEXT: SI_RETURN implicit $vgpr0
; GCN-NEXT: $vcc = COPY %7
; GCN-NEXT: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec
; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; GCN-NEXT: $vgpr0 = COPY %21
; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
entry:
%fdiv = fdiv nnan float %a, %b
ret float %fdiv
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
define i32 @fp_save_restore_in_temp_sgpr(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 4 %arg) #0 {
; GCN-LABEL: name: fp_save_restore_in_temp_sgpr
; GCN: bb.0.begin:
; GCN: liveins: $sgpr11
; GCN: liveins: $sgpr11, $sgpr30_sgpr31
; GCN: $sgpr11 = frame-setup COPY $sgpr33
; GCN: $sgpr33 = frame-setup COPY $sgpr32
; GCN: bb.1.lp_end:
; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31
; GCN: bb.2.lp_begin:
; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7
; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr30_sgpr31
; GCN: bb.3.Flow:
; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31
; GCN: bb.4.end:
; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5
; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31
; GCN: $sgpr33 = frame-destroy COPY $sgpr11
begin:
br label %lp_begin
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/fpow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX8-NEXT: v_exp_f32_e32 v1, v2
; GFX8-NEXT: v_exp_f32_e32 v2, v2
; GFX8-NEXT: v_exp_f32_e32 v0, v0
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -220,9 +220,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX9-NEXT: v_exp_f32_e32 v1, v2
; GFX9-NEXT: v_exp_f32_e32 v2, v2
; GFX9-NEXT: v_exp_f32_e32 v0, v0
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -238,9 +238,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX90A-NEXT: v_log_f32_e32 v0, v0
; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
; GFX90A-NEXT: v_exp_f32_e32 v1, v2
; GFX90A-NEXT: v_exp_f32_e32 v2, v2
; GFX90A-NEXT: v_exp_f32_e32 v0, v0
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX90A-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -302,9 +302,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX8-NEXT: v_exp_f32_e32 v1, v2
; GFX8-NEXT: v_exp_f32_e32 v2, v2
; GFX8-NEXT: v_exp_f32_e32 v0, v0
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -320,9 +320,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX9-NEXT: v_exp_f32_e32 v1, v2
; GFX9-NEXT: v_exp_f32_e32 v2, v2
; GFX9-NEXT: v_exp_f32_e32 v0, v0
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -338,9 +338,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
; GFX90A-NEXT: v_log_f32_e32 v0, v0
; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
; GFX90A-NEXT: v_exp_f32_e32 v1, v2
; GFX90A-NEXT: v_exp_f32_e32 v2, v2
; GFX90A-NEXT: v_exp_f32_e32 v0, v0
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX90A-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -403,9 +403,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX8-NEXT: v_exp_f32_e32 v1, v2
; GFX8-NEXT: v_exp_f32_e32 v2, v2
; GFX8-NEXT: v_exp_f32_e32 v0, v0
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -421,9 +421,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX9-NEXT: v_exp_f32_e32 v1, v2
; GFX9-NEXT: v_exp_f32_e32 v2, v2
; GFX9-NEXT: v_exp_f32_e32 v0, v0
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -439,9 +439,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
; GFX90A-NEXT: v_log_f32_e32 v0, v0
; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
; GFX90A-NEXT: v_exp_f32_e32 v1, v2
; GFX90A-NEXT: v_exp_f32_e32 v2, v2
; GFX90A-NEXT: v_exp_f32_e32 v0, v0
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX90A-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -509,9 +509,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX8-NEXT: v_exp_f32_e32 v1, v2
; GFX8-NEXT: v_exp_f32_e32 v2, v2
; GFX8-NEXT: v_exp_f32_e32 v0, v0
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -527,9 +527,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX9-NEXT: v_exp_f32_e32 v1, v2
; GFX9-NEXT: v_exp_f32_e32 v2, v2
; GFX9-NEXT: v_exp_f32_e32 v0, v0
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -545,9 +545,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
; GFX90A-NEXT: v_log_f32_e32 v0, v0
; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2
; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0
; GFX90A-NEXT: v_exp_f32_e32 v1, v2
; GFX90A-NEXT: v_exp_f32_e32 v2, v2
; GFX90A-NEXT: v_exp_f32_e32 v0, v0
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX90A-NEXT: s_setpc_b64 s[30:31]
Expand Down
Loading