56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600

define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 {
; GFX6-LABEL: s_uint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -43,11 +43,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp i64 %in to half
store half %result, half addrspace(1)* %out
store half %result, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -99,15 +99,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v3
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep
%in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %in.gep
%result = uitofp i64 %val to half
store half %result, half addrspace(1)* %out.gep
store half %result, ptr addrspace(1) %out.gep
ret void
}

define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
; GFX6-LABEL: s_uint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -144,11 +144,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp i64 %in to float
store float %result, float addrspace(1)* %out
store float %result, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -198,15 +198,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep
%in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %in.gep
%result = uitofp i64 %val to float
store float %result, float addrspace(1)* %out.gep
store float %result, ptr addrspace(1) %out.gep
ret void
}

define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{
; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
Expand Down Expand Up @@ -259,11 +259,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
%result = uitofp <2 x i64> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
store <2 x float> %result, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -368,15 +368,15 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
%in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
%out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
%value = load <4 x i64>, ptr addrspace(1) %in.gep
%result = uitofp <4 x i64> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
store <4 x float> %result, ptr addrspace(1) %out.gep
ret void
}

define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{
; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
Expand Down Expand Up @@ -436,11 +436,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp <2 x i64> %in to <2 x half>
store <2 x half> %result, <2 x half> addrspace(1)* %out
store <2 x half> %result, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -559,11 +559,11 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
%in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
%out.gep = getelementptr <4 x half>, ptr addrspace(1) %out, i32 %tid
%value = load <4 x i64>, ptr addrspace(1) %in.gep
%result = uitofp <4 x i64> %value to <4 x half>
store <4 x half> %result, <4 x half> addrspace(1)* %out.gep
store <4 x half> %result, ptr addrspace(1) %out.gep
ret void
}

Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@
; SI: v_cvt_f32_u32_e32

; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
%result = uitofp i32 %in to float
store float %result, float addrspace(1)* %out
store float %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}v_uint_to_fp_i32_to_f32:
; SI: v_cvt_f32_u32_e32 {{v[0-9]+}}, {{v[0-9]+$}}

; R600: INT_TO_FLT
define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep
%in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %in.gep
%result = uitofp i32 %val to float
store float %result, float addrspace(1)* %out.gep
store float %result, ptr addrspace(1) %out.gep
ret void
}

Expand All @@ -32,9 +32,9 @@ define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32

; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0 {
define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(ptr addrspace(1) %out, <2 x i32> %in) #0 {
%result = uitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
store <2 x float> %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -49,10 +49,10 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)*
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%value = load <4 x i32>, <4 x i32> addrspace(1) * %in
define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%value = load <4 x i32>, ptr addrspace(1) %in
%result = uitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
store <4 x float> %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -66,13 +66,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_uint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
%value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
%in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid
%out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
%value = load <4 x i32>, ptr addrspace(1) %in.gep
%result = uitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
store <4 x float> %result, ptr addrspace(1) %out.gep
ret void
}

Expand All @@ -82,20 +82,20 @@ define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) #0 {
define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
store float %fp, float addrspace(1)* %out
store float %fp, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}s_uint_to_fp_i1_to_f32_load:
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 {
define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(ptr addrspace(1) %out, i1 %in) #0 {
%fp = uitofp i1 %in to float
store float %fp, float addrspace(1)* %out
store float %fp, ptr addrspace(1) %out
ret void
}

Expand All @@ -106,13 +106,13 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out,
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
; SI: s_endpgm
define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i1, i1 addrspace(1)* %in.gep
%in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid
%out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
%val = load i1, ptr addrspace(1) %in.gep
%fp = uitofp i1 %val to float
store float %fp, float addrspace(1)* %out.gep
store float %fp, ptr addrspace(1) %out.gep
ret void
}

Expand All @@ -122,10 +122,10 @@ define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
; R600: CNDE_INT
; R600: UINT_TO_FLT

define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
entry:
%cvt = uitofp i64 %in to float
store float %cvt, float addrspace(1)* %out
store float %cvt, ptr addrspace(1) %out
ret void
}

Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_i16_to_f16(
half addrspace(1)* %r,
i16 addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load i16, i16 addrspace(1)* %a
%a.val = load i16, ptr addrspace(1) %a
%r.val = uitofp i16 %a.val to half
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -27,12 +27,12 @@ entry:
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_i32_to_f16(
half addrspace(1)* %r,
i32 addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load i32, i32 addrspace(1)* %a
%a.val = load i32, ptr addrspace(1) %a
%r.val = uitofp i32 %a.val to half
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -56,12 +56,12 @@ entry:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_v2i16_to_v2f16(
<2 x half> addrspace(1)* %r,
<2 x i16> addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
%a.val = load <2 x i16>, ptr addrspace(1) %a
%r.val = uitofp <2 x i16> %a.val to <2 x half>
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -84,12 +84,12 @@ entry:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_v2i32_to_v2f16(
<2 x half> addrspace(1)* %r,
<2 x i32> addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a
%a.val = load <2 x i32>, ptr addrspace(1) %a
%r.val = uitofp <2 x i32> %a.val to <2 x half>
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -101,14 +101,14 @@ entry:
; GCN-NEXT: v_cvt_f16_f32_e32 [[R_F16:v[0-9]+]], [[RESULT]]
; GCN: buffer_store_short
; GCN: s_endpgm
define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(half addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float, float addrspace(1) * %in0
%b = load float, float addrspace(1) * %in1
define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
%a = load float, ptr addrspace(1) %in0
%b = load float, ptr addrspace(1) %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 1.000000e+00
%result = xor i1 %acmp, %bcmp
%fp = uitofp i1 %result to half
store half %fp, half addrspace(1)* %out
store half %fp, ptr addrspace(1) %out
ret void
}

Expand Down