Expand Up
@@ -4,7 +4,7 @@
; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600
define amdgpu_kernel void @s_uint_to_fp_i64_to_f16 (half addrspace (1 )* %out , i64 %in ) #0 {
define amdgpu_kernel void @s_uint_to_fp_i64_to_f16 (ptr addrspace (1 ) %out , i64 %in ) #0 {
; GFX6-LABEL: s_uint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down
Expand Up
@@ -43,11 +43,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp i64 %in to half
store half %result , half addrspace (1 )* %out
store half %result , ptr addrspace (1 ) %out
ret void
}
define amdgpu_kernel void @v_uint_to_fp_i64_to_f16 (half addrspace (1 )* %out , i64 addrspace (1 )* %in ) #0 {
define amdgpu_kernel void @v_uint_to_fp_i64_to_f16 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
; GFX6-LABEL: v_uint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down
Expand Up
@@ -99,15 +99,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v3
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
%in.gep = getelementptr i64 , i64 addrspace (1 )* %in , i32 %tid
%out.gep = getelementptr half , half addrspace (1 )* %out , i32 %tid
%val = load i64 , i64 addrspace (1 )* %in.gep
%in.gep = getelementptr i64 , ptr addrspace (1 ) %in , i32 %tid
%out.gep = getelementptr half , ptr addrspace (1 ) %out , i32 %tid
%val = load i64 , ptr addrspace (1 ) %in.gep
%result = uitofp i64 %val to half
store half %result , half addrspace (1 )* %out.gep
store half %result , ptr addrspace (1 ) %out.gep
ret void
}
define amdgpu_kernel void @s_uint_to_fp_i64_to_f32 (float addrspace (1 )* %out , i64 %in ) #0 {
define amdgpu_kernel void @s_uint_to_fp_i64_to_f32 (ptr addrspace (1 ) %out , i64 %in ) #0 {
; GFX6-LABEL: s_uint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down
Expand Up
@@ -144,11 +144,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp i64 %in to float
store float %result , float addrspace (1 )* %out
store float %result , ptr addrspace (1 ) %out
ret void
}
define amdgpu_kernel void @v_uint_to_fp_i64_to_f32 (float addrspace (1 )* %out , i64 addrspace (1 )* %in ) #0 {
define amdgpu_kernel void @v_uint_to_fp_i64_to_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
; GFX6-LABEL: v_uint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down
Expand Up
@@ -198,15 +198,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
%in.gep = getelementptr i64 , i64 addrspace (1 )* %in , i32 %tid
%out.gep = getelementptr float , float addrspace (1 )* %out , i32 %tid
%val = load i64 , i64 addrspace (1 )* %in.gep
%in.gep = getelementptr i64 , ptr addrspace (1 ) %in , i32 %tid
%out.gep = getelementptr float , ptr addrspace (1 ) %out , i32 %tid
%val = load i64 , ptr addrspace (1 ) %in.gep
%result = uitofp i64 %val to float
store float %result , float addrspace (1 )* %out.gep
store float %result , ptr addrspace (1 ) %out.gep
ret void
}
define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32 (< 2 x float > addrspace (1 )* %out , <2 x i64 > %in ) #0 {
define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32 (ptr addrspace (1 ) %out , <2 x i64 > %in ) #0 {
; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
Expand Down
Expand Up
@@ -259,11 +259,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
%result = uitofp <2 x i64 > %in to <2 x float >
store <2 x float > %result , < 2 x float > addrspace (1 )* %out
store <2 x float > %result , ptr addrspace (1 ) %out
ret void
}
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32 (< 4 x float > addrspace (1 )* %out , < 4 x i64 > addrspace (1 )* %in ) #0 {
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down
Expand Up
@@ -368,15 +368,15 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
%in.gep = getelementptr <4 x i64 >, < 4 x i64 > addrspace (1 )* %in , i32 %tid
%out.gep = getelementptr <4 x float >, < 4 x float > addrspace (1 )* %out , i32 %tid
%value = load <4 x i64 >, < 4 x i64 > addrspace (1 )* %in.gep
%in.gep = getelementptr <4 x i64 >, ptr addrspace (1 ) %in , i32 %tid
%out.gep = getelementptr <4 x float >, ptr addrspace (1 ) %out , i32 %tid
%value = load <4 x i64 >, ptr addrspace (1 ) %in.gep
%result = uitofp <4 x i64 > %value to <4 x float >
store <4 x float > %result , < 4 x float > addrspace (1 )* %out.gep
store <4 x float > %result , ptr addrspace (1 ) %out.gep
ret void
}
define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16 (< 2 x half > addrspace (1 )* %out , <2 x i64 > %in ) #0 {
define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16 (ptr addrspace (1 ) %out , <2 x i64 > %in ) #0 {
; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
Expand Down
Expand Up
@@ -436,11 +436,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp <2 x i64 > %in to <2 x half >
store <2 x half > %result , < 2 x half > addrspace (1 )* %out
store <2 x half > %result , ptr addrspace (1 ) %out
ret void
}
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16 (< 4 x half > addrspace (1 )* %out , < 4 x i64 > addrspace (1 )* %in ) #0 {
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down
Expand Up
@@ -559,11 +559,11 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
%in.gep = getelementptr <4 x i64 >, < 4 x i64 > addrspace (1 )* %in , i32 %tid
%out.gep = getelementptr <4 x half >, < 4 x half > addrspace (1 )* %out , i32 %tid
%value = load <4 x i64 >, < 4 x i64 > addrspace (1 )* %in.gep
%in.gep = getelementptr <4 x i64 >, ptr addrspace (1 ) %in , i32 %tid
%out.gep = getelementptr <4 x half >, ptr addrspace (1 ) %out , i32 %tid
%value = load <4 x i64 >, ptr addrspace (1 ) %in.gep
%result = uitofp <4 x i64 > %value to <4 x half >
store <4 x half > %result , < 4 x half > addrspace (1 )* %out.gep
store <4 x half > %result , ptr addrspace (1 ) %out.gep
ret void
}
Expand Down