14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ declare double @llvm.maxnum.f64(double, double) nounwind readnone
; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[MAX0]], [[QUIET_C]]
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
define amdgpu_kernel void @test_fmax3_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
%bptr = getelementptr double, double addrspace(1)* %aptr, i32 1
%cptr = getelementptr double, double addrspace(1)* %aptr, i32 2
%a = load volatile double, double addrspace(1)* %aptr, align 8
%b = load volatile double, double addrspace(1)* %bptr, align 8
%c = load volatile double, double addrspace(1)* %cptr, align 8
define amdgpu_kernel void @test_fmax3_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
%bptr = getelementptr double, ptr addrspace(1) %aptr, i32 1
%cptr = getelementptr double, ptr addrspace(1) %aptr, i32 2
%a = load volatile double, ptr addrspace(1) %aptr, align 8
%b = load volatile double, ptr addrspace(1) %bptr, align 8
%c = load volatile double, ptr addrspace(1) %cptr, align 8
%f0 = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone
%f1 = call double @llvm.maxnum.f64(double %f0, double %c) nounwind readnone
store double %f1, double addrspace(1)* %out, align 8
store double %f1, ptr addrspace(1) %out, align 8
ret void
}
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/AMDGPU/fmax3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
%a = load volatile float, float addrspace(1)* %aptr, align 4
%b = load volatile float, float addrspace(1)* %bptr, align 4
%c = load volatile float, float addrspace(1)* %cptr, align 4
define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%a = load volatile float, ptr addrspace(1) %aptr, align 4
%b = load volatile float, ptr addrspace(1) %bptr, align 4
%c = load volatile float, ptr addrspace(1) %cptr, align 4
%f0 = call float @llvm.maxnum.f32(float %a, float %b)
%f1 = call float @llvm.maxnum.f32(float %f0, float %c)
store float %f1, float addrspace(1)* %out, align 4
store float %f1, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -27,13 +27,13 @@ define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float
; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
%a = load volatile float, float addrspace(1)* %aptr, align 4
%b = load volatile float, float addrspace(1)* %bptr, align 4
%c = load volatile float, float addrspace(1)* %cptr, align 4
define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%a = load volatile float, ptr addrspace(1) %aptr, align 4
%b = load volatile float, ptr addrspace(1) %bptr, align 4
%c = load volatile float, ptr addrspace(1) %cptr, align 4
%f0 = call float @llvm.maxnum.f32(float %a, float %b)
%f1 = call float @llvm.maxnum.f32(float %c, float %f0)
store float %f1, float addrspace(1)* %out, align 4
store float %f1, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -56,13 +56,13 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float

; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
; GCN: buffer_store_short [[RESULT]],
define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
%a = load volatile half, half addrspace(1)* %aptr, align 2
%b = load volatile half, half addrspace(1)* %bptr, align 2
%c = load volatile half, half addrspace(1)* %cptr, align 2
define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%a = load volatile half, ptr addrspace(1) %aptr, align 2
%b = load volatile half, ptr addrspace(1) %bptr, align 2
%c = load volatile half, ptr addrspace(1) %cptr, align 2
%f0 = call half @llvm.maxnum.f16(half %a, half %b)
%f1 = call half @llvm.maxnum.f16(half %f0, half %c)
store half %f1, half addrspace(1)* %out, align 2
store half %f1, ptr addrspace(1) %out, align 2
ret void
}

Expand All @@ -86,13 +86,13 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half ad

; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]]
; GCN: buffer_store_short [[RESULT]],
define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
%a = load volatile half, half addrspace(1)* %aptr, align 2
%b = load volatile half, half addrspace(1)* %bptr, align 2
%c = load volatile half, half addrspace(1)* %cptr, align 2
define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%a = load volatile half, ptr addrspace(1) %aptr, align 2
%b = load volatile half, ptr addrspace(1) %bptr, align 2
%c = load volatile half, ptr addrspace(1) %cptr, align 2
%f0 = call half @llvm.maxnum.f16(half %a, half %b)
%f1 = call half @llvm.maxnum.f16(half %c, half %f0)
store half %f1, half addrspace(1)* %out, align 2
store half %f1, ptr addrspace(1) %out, align 2
ret void
}

Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

; Make sure we don't try to form FMAX_LEGACY nodes with f64

define amdgpu_kernel void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_uge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; SI-LABEL: test_fmax_legacy_uge_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -44,19 +44,19 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, d
; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1

%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
%a = load double, ptr addrspace(1) %gep.0, align 8
%b = load double, ptr addrspace(1) %gep.1, align 8

%cmp = fcmp uge double %a, %b
%val = select i1 %cmp, double %a, double %b
store double %val, double addrspace(1)* %out, align 8
store double %val, ptr addrspace(1) %out, align 8
ret void
}

define amdgpu_kernel void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_oge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; SI-LABEL: test_fmax_legacy_oge_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -96,19 +96,19 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, d
; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1

%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
%a = load double, ptr addrspace(1) %gep.0, align 8
%b = load double, ptr addrspace(1) %gep.1, align 8

%cmp = fcmp oge double %a, %b
%val = select i1 %cmp, double %a, double %b
store double %val, double addrspace(1)* %out, align 8
store double %val, ptr addrspace(1) %out, align 8
ret void
}

define amdgpu_kernel void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_ugt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; SI-LABEL: test_fmax_legacy_ugt_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -148,19 +148,19 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, d
; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1

%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
%a = load double, ptr addrspace(1) %gep.0, align 8
%b = load double, ptr addrspace(1) %gep.1, align 8

%cmp = fcmp ugt double %a, %b
%val = select i1 %cmp, double %a, double %b
store double %val, double addrspace(1)* %out, align 8
store double %val, ptr addrspace(1) %out, align 8
ret void
}

define amdgpu_kernel void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_ogt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; SI-LABEL: test_fmax_legacy_ogt_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -200,15 +200,15 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, d
; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1

%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
%a = load double, ptr addrspace(1) %gep.0, align 8
%b = load double, ptr addrspace(1) %gep.1, align 8

%cmp = fcmp ogt double %a, %b
%val = select i1 %cmp, double %a, double %b
store double %val, double addrspace(1)* %out, align 8
store double %val, ptr addrspace(1) %out, align 8
ret void
}

Expand Down
98 changes: 49 additions & 49 deletions llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]

; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1

%a = load volatile float, float addrspace(1)* %gep.0, align 4
%b = load volatile float, float addrspace(1)* %gep.1, align 4
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
%b = load volatile float, ptr addrspace(1) %gep.1, align 4

%cmp = fcmp uge float %a, %b
%val = select i1 %cmp, float %a, float %b
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -48,19 +48,19 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]

; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1

%a = load volatile float, float addrspace(1)* %gep.0, align 4
%b = load volatile float, float addrspace(1)* %gep.1, align 4
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
%b = load volatile float, ptr addrspace(1) %gep.1, align 4
%a.nnan = fadd nnan float %a, 1.0
%b.nnan = fadd nnan float %b, 2.0

%cmp = fcmp uge float %a.nnan, %b.nnan
%val = select i1 %cmp, float %a.nnan, float %b.nnan
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -75,17 +75,17 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)*

; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1

%a = load volatile float, float addrspace(1)* %gep.0, align 4
%b = load volatile float, float addrspace(1)* %gep.1, align 4
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
%b = load volatile float, ptr addrspace(1) %gep.1, align 4

%cmp = fcmp oge float %a, %b
%val = select i1 %cmp, float %a, float %b
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -101,17 +101,17 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, fl

; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1

%a = load volatile float, float addrspace(1)* %gep.0, align 4
%b = load volatile float, float addrspace(1)* %gep.1, align 4
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
%b = load volatile float, ptr addrspace(1) %gep.1, align 4

%cmp = fcmp ugt float %a, %b
%val = select i1 %cmp, float %a, float %b
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -126,17 +126,17 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, fl

; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1

%a = load volatile float, float addrspace(1)* %gep.0, align 4
%b = load volatile float, float addrspace(1)* %gep.1, align 4
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
%b = load volatile float, ptr addrspace(1) %gep.1, align 4

%cmp = fcmp ogt float %a, %b
%val = select i1 %cmp, float %a, float %b
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -152,17 +152,17 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, fl

; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr <1 x float>, ptr addrspace(1) %gep.0, i32 1

%a = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.0
%b = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.1
%a = load volatile <1 x float>, ptr addrspace(1) %gep.0
%b = load volatile <1 x float>, ptr addrspace(1) %gep.1

%cmp = fcmp ogt <1 x float> %a, %b
%val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
store <1 x float> %val, <1 x float> addrspace(1)* %out
store <1 x float> %val, ptr addrspace(1) %out
ret void
}

Expand All @@ -185,17 +185,17 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)*
; GCN-NONAN: v_max_f32_e32

; GCN-NOT: v_max
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr <3 x float>, ptr addrspace(1) %gep.0, i32 1

%a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
%b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
%a = load <3 x float>, ptr addrspace(1) %gep.0
%b = load <3 x float>, ptr addrspace(1) %gep.1

%cmp = fcmp ogt <3 x float> %a, %b
%val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
store <3 x float> %val, <3 x float> addrspace(1)* %out
store <3 x float> %val, ptr addrspace(1) %out
ret void
}

Expand All @@ -208,18 +208,18 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)*
; GCN-NOT: v_max_

; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1

%a = load volatile float, float addrspace(1)* %gep.0, align 4
%b = load volatile float, float addrspace(1)* %gep.1, align 4
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
%b = load volatile float, ptr addrspace(1) %gep.1, align 4

%cmp = fcmp ogt float %a, %b
%val = select i1 %cmp, float %a, float %b
store float %val, float addrspace(1)* %out0, align 4
store i1 %cmp, i1addrspace(1)* %out1
store float %val, ptr addrspace(1) %out0, align 4
store i1 %cmp, ptr addrspace(1) %out1
ret void
}

Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@ declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0

; FUNC-LABEL: @test_fmax_f64
; SI: v_max_f64
define amdgpu_kernel void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
define amdgpu_kernel void @test_fmax_f64(ptr addrspace(1) %out, double %a, double %b) nounwind {
%val = call double @llvm.maxnum.f64(double %a, double %b) #0
store double %val, double addrspace(1)* %out, align 8
store double %val, ptr addrspace(1) %out, align 8
ret void
}

; FUNC-LABEL: @test_fmax_v2f64
; SI: v_max_f64
; SI: v_max_f64
define amdgpu_kernel void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
define amdgpu_kernel void @test_fmax_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) nounwind {
%val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
store <2 x double> %val, ptr addrspace(1) %out, align 16
ret void
}

Expand All @@ -29,9 +29,9 @@ define amdgpu_kernel void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
define amdgpu_kernel void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
define amdgpu_kernel void @test_fmax_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) nounwind {
%val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
store <4 x double> %val, ptr addrspace(1) %out, align 32
ret void
}

Expand All @@ -44,9 +44,9 @@ define amdgpu_kernel void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
define amdgpu_kernel void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
define amdgpu_kernel void @test_fmax_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b) nounwind {
%val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
store <8 x double> %val, ptr addrspace(1) %out, align 64
ret void
}

Expand All @@ -67,9 +67,9 @@ define amdgpu_kernel void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
define amdgpu_kernel void @test_fmax_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
define amdgpu_kernel void @test_fmax_v16f64(ptr addrspace(1) %out, <16 x double> %a, <16 x double> %b) nounwind {
%val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
store <16 x double> %val, ptr addrspace(1) %out, align 128
ret void
}

Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/fmaxnum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]]
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(float addrspace(1)* %out, float %a, float %b) #0 {
define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(ptr addrspace(1) %out, float %a, float %b) #0 {
%val = call float @llvm.maxnum.f32(float %a, float %b) #1
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -24,9 +24,9 @@ define amdgpu_ps float @test_fmax_f32_ieee_mode_off(float %a, float %b) #0 {
; GCN-LABEL: {{^}}test_fmax_v2f32:
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
%val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
store <2 x float> %val, ptr addrspace(1) %out, align 8
ret void
}

Expand All @@ -35,9 +35,9 @@ define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
; GCN-NOT: v_max_f32
define amdgpu_kernel void @test_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b) nounwind {
define amdgpu_kernel void @test_fmax_v3f32(ptr addrspace(1) %out, <3 x float> %a, <3 x float> %b) nounwind {
%val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
store <3 x float> %val, <3 x float> addrspace(1)* %out, align 16
store <3 x float> %val, ptr addrspace(1) %out, align 16
ret void
}

Expand All @@ -46,9 +46,9 @@ define amdgpu_kernel void @test_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
%val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
store <4 x float> %val, ptr addrspace(1) %out, align 16
ret void
}

Expand All @@ -61,9 +61,9 @@ define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
%val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
store <8 x float> %val, ptr addrspace(1) %out, align 32
ret void
}

Expand All @@ -84,89 +84,89 @@ define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
%val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
store <16 x float> %val, ptr addrspace(1) %out, align 64
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32:
; GCN-NOT: v_max_f32_e32
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_nan:
; GCN-NOT: v_max_f32_e32
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32_val_nan:
; GCN-NOT: v_max_f32_e32
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_val:
; GCN-NOT: v_max_f32_e32
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_p0:
; GCN-NOT: v_max_f32_e32
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_n0:
; GCN-NOT: v_max_f32_e32
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_p0:
; GCN-NOT: v_max_f32_e32
; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_n0:
; GCN-NOT: v_max_f32_e32
; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 {
define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
store float %val, float addrspace(1)* %out, align 4
store float %val, ptr addrspace(1) %out, align 4
ret void
}

Expand Down
80 changes: 40 additions & 40 deletions llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ declare float @llvm.fabs.f32(float) #1
; GFX8_10: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
; GFX10: v_fma_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
define amdgpu_kernel void @multiple_fadd_use_test_f32(ptr addrspace(1) %out, float %x, float %y, float %z) #0 {
%a11 = fadd float %y, -1.0
%a12 = call float @llvm.fabs.f32(float %a11)
%a13 = fadd float %x, -1.0
Expand All @@ -38,7 +38,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out,
%a18 = fmul float %a17, %a17
%a19 = fmul float %a18, %a17
%a20 = fsub float 1.0, %a19
store float %a20, float addrspace(1)* %out
store float %a20, ptr addrspace(1) %out
ret void
}

Expand All @@ -49,12 +49,12 @@ define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out,
; GCN-DAG: buffer_store_{{dword|b32}} [[MUL2]]
; GCN-DAG: buffer_store_{{dword|b32}} [[MAD]]
; GCN: s_endpgm
define amdgpu_kernel void @multiple_use_fadd_fmac_f32(float addrspace(1)* %out, float %x, [8 x i32], float %y) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
define amdgpu_kernel void @multiple_use_fadd_fmac_f32(ptr addrspace(1) %out, float %x, [8 x i32], float %y) #0 {
%out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
%mul2 = fmul fast float %x, 2.0
%mad = fadd fast float %mul2, %y
store volatile float %mul2, float addrspace(1)* %out
store volatile float %mad, float addrspace(1)* %out.gep.1
store volatile float %mul2, ptr addrspace(1) %out
store volatile float %mad, ptr addrspace(1) %out.gep.1
ret void
}

Expand All @@ -65,13 +65,13 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f32(float addrspace(1)* %out,
; GCN-DAG: buffer_store_{{dword|b32}} [[MUL2]]
; GCN-DAG: buffer_store_{{dword|b32}} [[MAD]]
; GCN: s_endpgm
define amdgpu_kernel void @multiple_use_fadd_fmad_f32(float addrspace(1)* %out, float %x, float %y) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
define amdgpu_kernel void @multiple_use_fadd_fmad_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
%out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
%x.abs = call float @llvm.fabs.f32(float %x)
%mul2 = fmul fast float %x.abs, 2.0
%mad = fadd fast float %mul2, %y
store volatile float %mul2, float addrspace(1)* %out
store volatile float %mad, float addrspace(1)* %out.gep.1
store volatile float %mul2, ptr addrspace(1) %out
store volatile float %mad, ptr addrspace(1) %out.gep.1
ret void
}

Expand All @@ -80,27 +80,27 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f32(float addrspace(1)* %out,
; SIVI: v_mad_f32 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
; GFX10: v_fma_f32 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, {{s[0-9]+}}
; GFX10: v_fma_f32 {{v[0-9]+}}, |[[X]]|, 2.0, {{s[0-9]+}}
define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(ptr addrspace(1) %out, float %x, float %y, float %z) #0 {
%out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
%x.abs = call float @llvm.fabs.f32(float %x)
%mul2 = fmul fast float %x.abs, 2.0
%mad0 = fadd fast float %mul2, %y
%mad1 = fadd fast float %mul2, %z
store volatile float %mad0, float addrspace(1)* %out
store volatile float %mad1, float addrspace(1)* %out.gep.1
store volatile float %mad0, ptr addrspace(1) %out
store volatile float %mad1, ptr addrspace(1) %out.gep.1
ret void
}

; GCN-LABEL: {{^}}fmul_x2_xn2_f32:
; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
; GCN: buffer_store_{{dword|b32}} [[RESULT]]
define amdgpu_kernel void @fmul_x2_xn2_f32(float addrspace(1)* %out, float %x, float %y) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
define amdgpu_kernel void @fmul_x2_xn2_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
%out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
%mul2 = fmul fast float %x, 2.0
%muln2 = fmul fast float %x, -2.0
%mul = fmul fast float %mul2, %muln2
store volatile float %mul, float addrspace(1)* %out
store volatile float %mul, ptr addrspace(1) %out
ret void
}

Expand All @@ -110,12 +110,12 @@ define amdgpu_kernel void @fmul_x2_xn2_f32(float addrspace(1)* %out, float %x, f
; GFX10: v_mul_f32_e64 [[TMP0:v[0-9]+]], 0xc0c00000, [[X:s[0-9]+]]
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
; GCN: buffer_store_{{dword|b32}} [[RESULT]]
define amdgpu_kernel void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, float %y) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
define amdgpu_kernel void @fmul_x2_xn3_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
%out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
%mul2 = fmul fast float %x, 2.0
%muln2 = fmul fast float %x, -3.0
%mul = fmul fast float %mul2, %muln2
store volatile float %mul, float addrspace(1)* %out
store volatile float %mul, ptr addrspace(1) %out
ret void
}

Expand All @@ -130,7 +130,7 @@ define amdgpu_kernel void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, f
; VI-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
; GFX10-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
; GFX10-FLUSH: v_sub_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
%z = bitcast i16 %z.arg to half
Expand All @@ -144,7 +144,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i
%a18 = fmul half %a17, %a17
%a19 = fmul half %a18, %a17
%a20 = fsub half 1.0, %a19
store half %a20, half addrspace(1)* %out
store half %a20, ptr addrspace(1) %out
ret void
}

Expand All @@ -159,14 +159,14 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i
; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]]
; GCN-DAG: buffer_store_{{short|b16}} [[MAD]]
; GCN: s_endpgm
define amdgpu_kernel void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
define amdgpu_kernel void @multiple_use_fadd_fmac_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
%out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
%out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
%mul2 = fmul fast half %x, 2.0
%mad = fadd fast half %mul2, %y
store volatile half %mul2, half addrspace(1)* %out
store volatile half %mad, half addrspace(1)* %out.gep.1
store volatile half %mul2, ptr addrspace(1) %out
store volatile half %mad, ptr addrspace(1) %out.gep.1
ret void
}

Expand All @@ -181,15 +181,15 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i
; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]]
; GCN-DAG: buffer_store_{{short|b16}} [[MAD]]
; GCN: s_endpgm
define amdgpu_kernel void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
%out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
%out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
%x.abs = call half @llvm.fabs.f16(half %x)
%mul2 = fmul fast half %x.abs, 2.0
%mad = fadd fast half %mul2, %y
store volatile half %mul2, half addrspace(1)* %out
store volatile half %mad, half addrspace(1)* %out.gep.1
store volatile half %mul2, ptr addrspace(1) %out
store volatile half %mad, ptr addrspace(1) %out.gep.1
ret void
}

Expand All @@ -206,32 +206,32 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i
; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}}
; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}}

define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
%z = bitcast i16 %z.arg to half
%out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
%out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
%x.abs = call half @llvm.fabs.f16(half %x)
%mul2 = fmul fast half %x.abs, 2.0
%mad0 = fadd fast half %mul2, %y
%mad1 = fadd fast half %mul2, %z
store volatile half %mad0, half addrspace(1)* %out
store volatile half %mad1, half addrspace(1)* %out.gep.1
store volatile half %mad0, ptr addrspace(1) %out
store volatile half %mad1, ptr addrspace(1) %out.gep.1
ret void
}

; GCN-LABEL: {{^}}fmul_x2_xn2_f16:
; GCN: v_mul_f16_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0
; GCN: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
; GCN: buffer_store_{{short|b16}} [[RESULT]]
define amdgpu_kernel void @fmul_x2_xn2_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
define amdgpu_kernel void @fmul_x2_xn2_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
%out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
%out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
%mul2 = fmul fast half %x, 2.0
%muln2 = fmul fast half %x, -2.0
%mul = fmul fast half %mul2, %muln2
store volatile half %mul, half addrspace(1)* %out
store volatile half %mul, ptr addrspace(1) %out
ret void
}

Expand All @@ -241,14 +241,14 @@ define amdgpu_kernel void @fmul_x2_xn2_f16(half addrspace(1)* %out, i16 zeroext
; GFX10: v_mul_f16_e64 [[TMP0:v[0-9]+]], 0xc600, [[X:s[0-9]+]]
; GCN: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
; GCN: buffer_store_{{short|b16}} [[RESULT]]
define amdgpu_kernel void @fmul_x2_xn3_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
define amdgpu_kernel void @fmul_x2_xn3_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
%x = bitcast i16 %x.arg to half
%y = bitcast i16 %y.arg to half
%out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
%out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
%mul2 = fmul fast half %x, 2.0
%muln2 = fmul fast half %x, -3.0
%mul = fmul fast half %mul2, %muln2
store volatile half %mul, half addrspace(1)* %out
store volatile half %mul, ptr addrspace(1) %out
ret void
}

Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/AMDGPU/fmul.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_f16(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
entry:
%a.val = load volatile half, half addrspace(1)* %a
%b.val = load volatile half, half addrspace(1)* %b
%a.val = load volatile half, ptr addrspace(1) %a
%b.val = load volatile half, ptr addrspace(1) %b
%r.val = fmul half %a.val, %b.val
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -33,12 +33,12 @@ entry:
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_f16_imm_a(
half addrspace(1)* %r,
half addrspace(1)* %b) {
ptr addrspace(1) %r,
ptr addrspace(1) %b) {
entry:
%b.val = load volatile half, half addrspace(1)* %b
%b.val = load volatile half, ptr addrspace(1) %b
%r.val = fmul half 3.0, %b.val
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -52,12 +52,12 @@ entry:
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_f16_imm_b(
half addrspace(1)* %r,
half addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load volatile half, half addrspace(1)* %a
%a.val = load volatile half, ptr addrspace(1) %a
%r.val = fmul half %a.val, 4.0
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

Expand Down Expand Up @@ -89,14 +89,14 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_v2f16(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%a.val = load <2 x half>, ptr addrspace(1) %a
%b.val = load <2 x half>, ptr addrspace(1) %b
%r.val = fmul <2 x half> %a.val, %b.val
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}

Expand Down Expand Up @@ -124,12 +124,12 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_v2f16_imm_a(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %b) {
ptr addrspace(1) %r,
ptr addrspace(1) %b) {
entry:
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%b.val = load <2 x half>, ptr addrspace(1) %b
%r.val = fmul <2 x half> <half 3.0, half 4.0>, %b.val
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -156,12 +156,12 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fmul_v2f16_imm_b(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fmul <2 x half> %a.val, <half 4.0, half 3.0>
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -182,14 +182,14 @@ entry:
; VI: v_or_b32
; VI: v_or_b32
define amdgpu_kernel void @fmul_v4f16(
<4 x half> addrspace(1)* %r,
<4 x half> addrspace(1)* %a,
<4 x half> addrspace(1)* %b) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
entry:
%a.val = load <4 x half>, <4 x half> addrspace(1)* %a
%b.val = load <4 x half>, <4 x half> addrspace(1)* %b
%a.val = load <4 x half>, ptr addrspace(1) %a
%b.val = load <4 x half>, ptr addrspace(1) %b
%r.val = fmul <4 x half> %a.val, %b.val
store <4 x half> %r.val, <4 x half> addrspace(1)* %r
store <4 x half> %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -214,11 +214,11 @@ entry:

; VI: buffer_store_dwordx2 v[[[OR0]]:[[OR1]]]
define amdgpu_kernel void @fmul_v4f16_imm_a(
<4 x half> addrspace(1)* %r,
<4 x half> addrspace(1)* %b) {
ptr addrspace(1) %r,
ptr addrspace(1) %b) {
entry:
%b.val = load <4 x half>, <4 x half> addrspace(1)* %b
%b.val = load <4 x half>, ptr addrspace(1) %b
%r.val = fmul <4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, %b.val
store <4 x half> %r.val, <4 x half> addrspace(1)* %r
store <4 x half> %r.val, ptr addrspace(1) %r
ret void
}
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/fmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
; GCN: v_mul_f32

; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
define amdgpu_kernel void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
define amdgpu_kernel void @fmul_f32(ptr addrspace(1) %out, float %a, float %b) {
entry:
%0 = fmul float %a, %b
store float %0, float addrspace(1)* %out
store float %0, ptr addrspace(1) %out
ret void
}

Expand All @@ -19,10 +19,10 @@ entry:

; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
define amdgpu_kernel void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
define amdgpu_kernel void @fmul_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fmul <2 x float> %a, %b
store <2 x float> %0, <2 x float> addrspace(1)* %out
store <2 x float> %0, ptr addrspace(1) %out
ret void
}

Expand All @@ -36,23 +36,23 @@ entry:
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float>, <4 x float> addrspace(1) * %in
%b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
define amdgpu_kernel void @fmul_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
%a = load <4 x float>, ptr addrspace(1) %in
%b = load <4 x float>, ptr addrspace(1) %b_ptr
%result = fmul <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out
store <4 x float> %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_mul_2_k:
; GCN: v_mul_f32
; GCN-NOT: v_mul_f32
; GCN: s_endpgm
define amdgpu_kernel void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @test_mul_2_k(ptr addrspace(1) %out, float %x) #0 {
%y = fmul float %x, 2.0
%z = fmul float %y, 3.0
store float %z, float addrspace(1)* %out
store float %z, ptr addrspace(1) %out
ret void
}

Expand All @@ -61,10 +61,10 @@ define amdgpu_kernel void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
; GCN-NOT: v_mul_f32
; GCN-NOT: v_mad_f32
; GCN: s_endpgm
define amdgpu_kernel void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
define amdgpu_kernel void @test_mul_2_k_inv(ptr addrspace(1) %out, float %x) #0 {
%y = fmul float %x, 3.0
%z = fmul float %y, 2.0
store float %z, float addrspace(1)* %out
store float %z, ptr addrspace(1) %out
ret void
}

Expand All @@ -75,12 +75,12 @@ define amdgpu_kernel void @test_mul_2_k_inv(float addrspace(1)* %out, float %x)
; GCN: v_mul_f32
; GCN: v_mul_f32
; GCN-NOT: v_mul_f32
define amdgpu_kernel void @test_mul_twouse(float addrspace(1)* %out, float %x, float %y) #0 {
define amdgpu_kernel void @test_mul_twouse(ptr addrspace(1) %out, float %x, float %y) #0 {
%a = fmul float %x, 5.0
%b = fsub float -0.0, %a
%c = fmul float %b, %y
%d = fmul float %c, %a
store float %d, float addrspace(1)* %out
store float %d, ptr addrspace(1) %out
ret void
}

Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/fmul64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,24 @@

; FUNC-LABEL: {{^}}fmul_f64:
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double, double addrspace(1)* %in1
%r1 = load double, double addrspace(1)* %in2
define amdgpu_kernel void @fmul_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2) {
%r0 = load double, ptr addrspace(1) %in1
%r1 = load double, ptr addrspace(1) %in2
%r2 = fmul double %r0, %r1
store double %r2, double addrspace(1)* %out
store double %r2, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}fmul_v2f64:
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2) {
%r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
%r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
define amdgpu_kernel void @fmul_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2) {
%r0 = load <2 x double>, ptr addrspace(1) %in1
%r1 = load <2 x double>, ptr addrspace(1) %in2
%r2 = fmul <2 x double> %r0, %r1
store <2 x double> %r2, <2 x double> addrspace(1)* %out
store <2 x double> %r2, ptr addrspace(1) %out
ret void
}

Expand All @@ -29,11 +29,11 @@ define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2) {
%r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
%r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
define amdgpu_kernel void @fmul_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2) {
%r0 = load <4 x double>, ptr addrspace(1) %in1
%r1 = load <4 x double>, ptr addrspace(1) %in2
%r2 = fmul <4 x double> %r0, %r1
store <4 x double> %r2, <4 x double> addrspace(1)* %out
store <4 x double> %r2, ptr addrspace(1) %out
ret void
}
292 changes: 146 additions & 146 deletions llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll

Large diffs are not rendered by default.

292 changes: 146 additions & 146 deletions llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll

Large diffs are not rendered by default.

144 changes: 72 additions & 72 deletions llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

; GCN-LABEL: {{^}}fmuladd_f64:
; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
%r0 = load double, double addrspace(1)* %in1
%r1 = load double, double addrspace(1)* %in2
%r2 = load double, double addrspace(1)* %in3
define amdgpu_kernel void @fmuladd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
%r0 = load double, ptr addrspace(1) %in1
%r1 = load double, ptr addrspace(1) %in2
%r2 = load double, ptr addrspace(1) %in3
%r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
store double %r3, double addrspace(1)* %out
store double %r3, ptr addrspace(1) %out
ret void
}

Expand All @@ -22,28 +22,28 @@ define amdgpu_kernel void @fmuladd_f64(double addrspace(1)* %out, double addrspa

; GCN-STRICT: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; GCN-STRICT: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fmul_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
%r0 = load double, double addrspace(1)* %in1
%r1 = load double, double addrspace(1)* %in2
%r2 = load double, double addrspace(1)* %in3
define amdgpu_kernel void @fmul_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
%r0 = load double, ptr addrspace(1) %in1
%r1 = load double, ptr addrspace(1) %in2
%r2 = load double, ptr addrspace(1) %in3
%tmp = fmul double %r0, %r1
%r3 = fadd double %tmp, %r2
store double %r3, double addrspace(1)* %out
store double %r3, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}fmul_fadd_contract_f64:
; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}

define amdgpu_kernel void @fmul_fadd_contract_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
%r0 = load double, double addrspace(1)* %in1
%r1 = load double, double addrspace(1)* %in2
%r2 = load double, double addrspace(1)* %in3
define amdgpu_kernel void @fmul_fadd_contract_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
%r0 = load double, ptr addrspace(1) %in1
%r1 = load double, ptr addrspace(1) %in2
%r2 = load double, ptr addrspace(1) %in3
%tmp = fmul contract double %r0, %r1
%r3 = fadd contract double %tmp, %r2
store double %r3, double addrspace(1)* %out
store double %r3, ptr addrspace(1) %out
ret void
}

Expand All @@ -58,20 +58,20 @@ define amdgpu_kernel void @fmul_fadd_contract_f64(double addrspace(1)* %out, dou

; SI: buffer_store_dwordx2 [[RESULT]]
; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fadd_a_a_b_f64(double addrspace(1)* %out,
double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
define amdgpu_kernel void @fadd_a_a_b_f64(ptr addrspace(1) %out,
ptr addrspace(1) %in1,
ptr addrspace(1) %in2) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid

%r0 = load volatile double, double addrspace(1)* %gep.0
%r1 = load volatile double, double addrspace(1)* %gep.1
%r0 = load volatile double, ptr addrspace(1) %gep.0
%r1 = load volatile double, ptr addrspace(1) %gep.1

%add.0 = fadd double %r0, %r0
%add.1 = fadd double %add.0, %r1
store double %add.1, double addrspace(1)* %gep.out
store double %add.1, ptr addrspace(1) %gep.out
ret void
}

Expand All @@ -86,20 +86,20 @@ define amdgpu_kernel void @fadd_a_a_b_f64(double addrspace(1)* %out,

; SI: buffer_store_dwordx2 [[RESULT]]
; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fadd_b_a_a_f64(double addrspace(1)* %out,
double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
define amdgpu_kernel void @fadd_b_a_a_f64(ptr addrspace(1) %out,
ptr addrspace(1) %in1,
ptr addrspace(1) %in2) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid

%r0 = load volatile double, double addrspace(1)* %gep.0
%r1 = load volatile double, double addrspace(1)* %gep.1
%r0 = load volatile double, ptr addrspace(1) %gep.0
%r1 = load volatile double, ptr addrspace(1) %gep.1

%add.0 = fadd double %r0, %r0
%add.1 = fadd double %r1, %add.0
store double %add.1, double addrspace(1)* %gep.out
store double %add.1, ptr addrspace(1) %gep.out
ret void
}

Expand All @@ -108,21 +108,21 @@ define amdgpu_kernel void @fadd_b_a_a_f64(double addrspace(1)* %out,
; GCN-STRICT: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}

; GCN-CONTRACT: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
define amdgpu_kernel void @mad_sub_f64(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tid.ext = sext i32 %tid to i64
%gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
%gep0 = getelementptr double, ptr addrspace(1) %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
%gep1 = getelementptr double, double addrspace(1)* %ptr, i64 %add1
%gep1 = getelementptr double, ptr addrspace(1) %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
%gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
%outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %gep0, align 8
%b = load volatile double, double addrspace(1)* %gep1, align 8
%c = load volatile double, double addrspace(1)* %gep2, align 8
%gep2 = getelementptr double, ptr addrspace(1) %ptr, i64 %add2
%outgep = getelementptr double, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile double, ptr addrspace(1) %gep0, align 8
%b = load volatile double, ptr addrspace(1) %gep1, align 8
%c = load volatile double, ptr addrspace(1) %gep2, align 8
%mul = fmul double %a, %b
%sub = fsub double %mul, %c
store double %sub, double addrspace(1)* %outgep, align 8
store double %sub, ptr addrspace(1) %outgep, align 8
ret void
}

Expand All @@ -131,20 +131,20 @@ define amdgpu_kernel void @mad_sub_f64(double addrspace(1)* noalias nocapture %o
; GCN-STRICT: v_add_f64

; GCN-CONTRACT: v_fma_f64
define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(double addrspace(1)* %out,
double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(ptr addrspace(1) %out,
ptr addrspace(1) %in1,
ptr addrspace(1) %in2) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid

%r0 = load volatile double, double addrspace(1)* %gep.0
%r1 = load volatile double, double addrspace(1)* %gep.1
%r0 = load volatile double, ptr addrspace(1) %gep.0
%r1 = load volatile double, ptr addrspace(1) %gep.1

%add.0 = fadd fast double %r0, %r0
%add.1 = fadd double %add.0, %r1
store double %add.1, double addrspace(1)* %gep.out
store double %add.1, ptr addrspace(1) %gep.out
ret void
}

Expand All @@ -153,39 +153,39 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(double addrspace(1)* %out,
; GCN-STRICT: v_add_f64

; GCN-CONTRACT: v_fma_f64
define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(double addrspace(1)* %out,
double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(ptr addrspace(1) %out,
ptr addrspace(1) %in1,
ptr addrspace(1) %in2) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid

%r0 = load volatile double, double addrspace(1)* %gep.0
%r1 = load volatile double, double addrspace(1)* %gep.1
%r0 = load volatile double, ptr addrspace(1) %gep.0
%r1 = load volatile double, ptr addrspace(1) %gep.1

%add.0 = fadd double %r0, %r0
%add.1 = fadd fast double %add.0, %r1
store double %add.1, double addrspace(1)* %gep.out
store double %add.1, ptr addrspace(1) %gep.out
ret void
}

; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast:
; GCN: v_fma_f64
define amdgpu_kernel void @fadd_a_a_b_f64_fast(double addrspace(1)* %out,
double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
define amdgpu_kernel void @fadd_a_a_b_f64_fast(ptr addrspace(1) %out,
ptr addrspace(1) %in1,
ptr addrspace(1) %in2) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid

%r0 = load volatile double, double addrspace(1)* %gep.0
%r1 = load volatile double, double addrspace(1)* %gep.1
%r0 = load volatile double, ptr addrspace(1) %gep.0
%r1 = load volatile double, ptr addrspace(1) %gep.1

%add.0 = fadd fast double %r0, %r0
%add.1 = fadd fast double %add.0, %r1
store double %add.1, double addrspace(1)* %gep.out
store double %add.1, ptr addrspace(1) %gep.out
ret void
}

Expand Down
82 changes: 41 additions & 41 deletions llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define amdgpu_kernel void @fmuladd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
<2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
%r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
%r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
%r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
define amdgpu_kernel void @fmuladd_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
%r0 = load <2 x half>, ptr addrspace(1) %in1
%r1 = load <2 x half>, ptr addrspace(1) %in2
%r2 = load <2 x half>, ptr addrspace(1) %in3
%r3 = tail call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2)
store <2 x half> %r3, <2 x half> addrspace(1)* %out
store <2 x half> %r3, ptr addrspace(1) %out
ret void
}

Expand All @@ -32,14 +32,14 @@ define amdgpu_kernel void @fmuladd_v2f16(<2 x half> addrspace(1)* %out, <2 x hal
; GFX9-DENORM-STRICT: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; GFX9-DENORM-CONTRACT: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define amdgpu_kernel void @fmul_fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
<2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
%r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
%r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
%r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
define amdgpu_kernel void @fmul_fadd_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
%r0 = load <2 x half>, ptr addrspace(1) %in1
%r1 = load <2 x half>, ptr addrspace(1) %in2
%r2 = load <2 x half>, ptr addrspace(1) %in3
%r3 = fmul <2 x half> %r0, %r1
%r4 = fadd <2 x half> %r3, %r2
store <2 x half> %r4, <2 x half> addrspace(1)* %out
store <2 x half> %r4, ptr addrspace(1) %out
ret void
}

Expand All @@ -48,14 +48,14 @@ define amdgpu_kernel void @fmul_fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x h
; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define amdgpu_kernel void @fmul_fadd_contract_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
<2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
%r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
%r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
%r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
define amdgpu_kernel void @fmul_fadd_contract_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
%r0 = load <2 x half>, ptr addrspace(1) %in1
%r1 = load <2 x half>, ptr addrspace(1) %in2
%r2 = load <2 x half>, ptr addrspace(1) %in3
%r3 = fmul contract <2 x half> %r0, %r1
%r4 = fadd contract <2 x half> %r3, %r2
store <2 x half> %r4, <2 x half> addrspace(1)* %out
store <2 x half> %r4, ptr addrspace(1) %out
ret void
}

Expand All @@ -70,17 +70,17 @@ define amdgpu_kernel void @fmul_fadd_contract_v2f16(<2 x half> addrspace(1)* %ou

; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; GFX9-DENORM: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr <2 x half>, <2 x half> addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid

%r1 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.0
%r2 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.1
%r1 = load volatile <2 x half>, ptr addrspace(1) %gep.0
%r2 = load volatile <2 x half>, ptr addrspace(1) %gep.1

%r3 = tail call <2 x half> @llvm.fmuladd.v2f16(<2 x half> <half 2.0, half 2.0>, <2 x half> %r1, <2 x half> %r2)
store <2 x half> %r3, <2 x half> addrspace(1)* %gep.out
store <2 x half> %r3, ptr addrspace(1) %gep.out
ret void
}

Expand All @@ -94,17 +94,17 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(<2 x half> addrspace(1)* %out,

; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
; GFX9-DENORM: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr <2 x half>, <2 x half> addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid

%r1 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.0
%r2 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.1
%r1 = load volatile <2 x half>, ptr addrspace(1) %gep.0
%r2 = load volatile <2 x half>, ptr addrspace(1) %gep.1

%r3 = tail call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %r1, <2 x half> <half 2.0, half 2.0>, <2 x half> %r2)
store <2 x half> %r3, <2 x half> addrspace(1)* %gep.out
store <2 x half> %r3, ptr addrspace(1) %gep.out
ret void
}

Expand All @@ -120,20 +120,20 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(<2 x half> addrspace(1)* %out,
; GFX9-DENORM-CONTRACT: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]

; GCN: {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
define amdgpu_kernel void @fadd_a_a_b_v2f16(<2 x half> addrspace(1)* %out,
<2 x half> addrspace(1)* %in1,
<2 x half> addrspace(1)* %in2) #0 {
define amdgpu_kernel void @fadd_a_a_b_v2f16(ptr addrspace(1) %out,
ptr addrspace(1) %in1,
ptr addrspace(1) %in2) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%gep.1 = getelementptr <2 x half>, <2 x half> addrspace(1)* %gep.0, i32 1
%gep.out = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
%gep.out = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid

%r0 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.0
%r1 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.1
%r0 = load volatile <2 x half>, ptr addrspace(1) %gep.0
%r1 = load volatile <2 x half>, ptr addrspace(1) %gep.1

%add.0 = fadd <2 x half> %r0, %r0
%add.1 = fadd <2 x half> %add.0, %r1
store <2 x half> %add.1, <2 x half> addrspace(1)* %gep.out
store <2 x half> %add.1, ptr addrspace(1) %gep.out
ret void
}

Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/fnearbyint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ declare double @llvm.nearbyint.f64(double) #0
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0

define amdgpu_kernel void @fnearbyint_f16(half addrspace(1)* %out, half %in) #1 {
define amdgpu_kernel void @fnearbyint_f16(ptr addrspace(1) %out, half %in) #1 {
; SI-LABEL: fnearbyint_f16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
Expand Down Expand Up @@ -49,11 +49,11 @@ define amdgpu_kernel void @fnearbyint_f16(half addrspace(1)* %out, half %in) #1
; VI-NEXT: flat_store_short v[0:1], v2
; VI-NEXT: s_endpgm
%1 = call half @llvm.nearbyint.f16(half %in)
store half %1, half addrspace(1)* %out
store half %1, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 {
define amdgpu_kernel void @fnearbyint_f32(ptr addrspace(1) %out, float %in) #1 {
; SICI-LABEL: fnearbyint_f32:
; SICI: ; %bb.0: ; %entry
; SICI-NEXT: s_load_dword s4, s[0:1], 0xb
Expand All @@ -77,11 +77,11 @@ define amdgpu_kernel void @fnearbyint_f32(float addrspace(1)* %out, float %in) #
; VI-NEXT: s_endpgm
entry:
%0 = call float @llvm.nearbyint.f32(float %in)
store float %0, float addrspace(1)* %out
store float %0, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
define amdgpu_kernel void @fnearbyint_v2f32(ptr addrspace(1) %out, <2 x float> %in) #1 {
; SICI-LABEL: fnearbyint_v2f32:
; SICI: ; %bb.0: ; %entry
; SICI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand All @@ -107,11 +107,11 @@ define amdgpu_kernel void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x
; VI-NEXT: s_endpgm
entry:
%0 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %in)
store <2 x float> %0, <2 x float> addrspace(1)* %out
store <2 x float> %0, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 {
define amdgpu_kernel void @fnearbyint_v4f32(ptr addrspace(1) %out, <4 x float> %in) #1 {
; SICI-LABEL: fnearbyint_v4f32:
; SICI: ; %bb.0: ; %entry
; SICI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
Expand Down Expand Up @@ -141,11 +141,11 @@ define amdgpu_kernel void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x
; VI-NEXT: s_endpgm
entry:
%0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in)
store <4 x float> %0, <4 x float> addrspace(1)* %out
store <4 x float> %0, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @nearbyint_f64(double addrspace(1)* %out, double %in) {
define amdgpu_kernel void @nearbyint_f64(ptr addrspace(1) %out, double %in) {
; SI-LABEL: nearbyint_f64:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
Expand Down Expand Up @@ -192,10 +192,10 @@ define amdgpu_kernel void @nearbyint_f64(double addrspace(1)* %out, double %in)
; VI-NEXT: s_endpgm
entry:
%0 = call double @llvm.nearbyint.f64(double %in)
store double %0, double addrspace(1)* %out
store double %0, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
define amdgpu_kernel void @nearbyint_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
; SI-LABEL: nearbyint_v2f64:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
Expand Down Expand Up @@ -256,11 +256,11 @@ define amdgpu_kernel void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x
; VI-NEXT: s_endpgm
entry:
%0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in)
store <2 x double> %0, <2 x double> addrspace(1)* %out
store <2 x double> %0, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
define amdgpu_kernel void @nearbyint_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
; SI-LABEL: nearbyint_v4f64:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
Expand Down Expand Up @@ -350,7 +350,7 @@ define amdgpu_kernel void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x
; VI-NEXT: s_endpgm
entry:
%0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %in)
store <4 x double> %0, <4 x double> addrspace(1)* %out
store <4 x double> %0, ptr addrspace(1) %out
ret void
}

Expand Down
1,402 changes: 701 additions & 701 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.ll

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: {{buffer|flat}}_store_dword [[RESULT]]
define amdgpu_kernel void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
define amdgpu_kernel void @v_fneg_rcp_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%rcp = call float @llvm.amdgcn.rcp.legacy(float %a)
%fneg = fsub float -0.000000e+00, %rcp
store float %fneg, float addrspace(1)* %out.gep
store float %fneg, ptr addrspace(1) %out.gep
ret void
}

Expand Down
50 changes: 25 additions & 25 deletions llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@

; GFX89-NOT: _and
; GFX89: v_sub_f16_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
define amdgpu_kernel void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
define amdgpu_kernel void @fneg_fabs_fadd_f16(ptr addrspace(1) %out, half %x, half %y) {
%fabs = call half @llvm.fabs.f16(half %x)
%fsub = fsub half -0.0, %fabs
%fadd = fadd half %y, %fsub
store half %fadd, half addrspace(1)* %out, align 2
store half %fadd, ptr addrspace(1) %out, align 2
ret void
}

Expand All @@ -27,11 +27,11 @@ define amdgpu_kernel void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x,
; GFX89: v_mul_f16_e64 [[MUL:v[0-9]+]], {{s[0-9]+}}, -|{{v[0-9]+}}|
; GFX89-NOT: [[MUL]]
; GFX89: {{flat|global}}_store_short v{{.+}}, [[MUL]]
define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) {
define amdgpu_kernel void @fneg_fabs_fmul_f16(ptr addrspace(1) %out, half %x, half %y) {
%fabs = call half @llvm.fabs.f16(half %x)
%fsub = fsub half -0.0, %fabs
%fmul = fmul half %y, %fsub
store half %fmul, half addrspace(1)* %out, align 2
store half %fmul, ptr addrspace(1) %out, align 2
ret void
}

Expand All @@ -41,30 +41,30 @@ define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x,

; GCN-LABEL: {{^}}fneg_fabs_free_f16:
; GCN: {{s_or_b32 s[0-9]+, s[0-9]+, 0x8000|s_bitset1_b32 s[0-9]+, 15}}
define amdgpu_kernel void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
define amdgpu_kernel void @fneg_fabs_free_f16(ptr addrspace(1) %out, i16 %in) {
%bc = bitcast i16 %in to half
%fabs = call half @llvm.fabs.f16(half %bc)
%fsub = fsub half -0.0, %fabs
store half %fsub, half addrspace(1)* %out
store half %fsub, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_f16:
; GCN: {{s_or_b32 s[0-9]+, s[0-9]+, 0x8000|s_bitset1_b32 s[0-9]+, 15}}
define amdgpu_kernel void @fneg_fabs_f16(half addrspace(1)* %out, half %in) {
define amdgpu_kernel void @fneg_fabs_f16(ptr addrspace(1) %out, half %in) {
%fabs = call half @llvm.fabs.f16(half %in)
%fsub = fsub half -0.0, %fabs
store half %fsub, half addrspace(1)* %out, align 2
store half %fsub, ptr addrspace(1) %out, align 2
ret void
}

; GCN-LABEL: {{^}}v_fneg_fabs_f16:
; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
%val = load half, half addrspace(1)* %in, align 2
define amdgpu_kernel void @v_fneg_fabs_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%val = load half, ptr addrspace(1) %in, align 2
%fabs = call half @llvm.fabs.f16(half %val)
%fsub = fsub half -0.0, %fabs
store half %fsub, half addrspace(1)* %out, align 2
store half %fsub, ptr addrspace(1) %out, align 2
ret void
}

Expand All @@ -75,11 +75,11 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]]

; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(ptr addrspace(1) %out, <2 x half> %in) {
%add = fadd <2 x half> %in, <half 1.0, half 2.0>
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add)
%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
store <2 x half> %fneg.fabs, ptr addrspace(1) %out
ret void
}

Expand All @@ -89,21 +89,21 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)*

; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src:
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(ptr addrspace(1) %out, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
store <2 x half> %fneg.fabs, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_v4f16:
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
; GCN: {{flat|global}}_store_dwordx2
define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
define amdgpu_kernel void @fneg_fabs_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
%fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
%fsub = fsub <4 x half> <half -0.0, half -0.0, half -0.0, half -0.0>, %fabs
store <4 x half> %fsub, <4 x half> addrspace(1)* %out
store <4 x half> %fsub, ptr addrspace(1) %out
ret void
}

Expand All @@ -120,11 +120,11 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h

; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {
define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(ptr addrspace(1) %out, <2 x half> %in) #0 {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
%mul = fmul <2 x half> %fneg.fabs, <half 4.0, half 4.0>
store <2 x half> %mul, <2 x half> addrspace(1)* %out
store <2 x half> %mul, ptr addrspace(1) %out
ret void
}

Expand All @@ -135,23 +135,23 @@ define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %o
; GFX9-DAG: v_mov_b32_e32 [[V_NEG:v[0-9]+]], [[NEG]]
; GFX9-DAG: global_store_dword v{{[0-9]+}}, [[V_ABS]], s{{\[[0-9]+:[0-9]+\]}}
; GFX9: global_store_dword v{{[0-9]+}}, [[V_NEG]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs
store <2 x half> %fabs, <2 x half> addrspace(1)* %out0
store <2 x half> %fneg, <2 x half> addrspace(1)* %out1
store <2 x half> %fabs, ptr addrspace(1) %out0
store <2 x half> %fneg, ptr addrspace(1) %out1
ret void
}

; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16:
; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs
%mul = fmul <2 x half> %fneg, <half 4.0, half 4.0>
store <2 x half> %fabs, <2 x half> addrspace(1)* %out0
store <2 x half> %mul, <2 x half> addrspace(1)* %out1
store <2 x half> %fabs, ptr addrspace(1) %out0
store <2 x half> %mul, ptr addrspace(1) %out1
ret void
}

Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,51 @@

; GCN-LABEL: {{^}}fneg_fabs_fadd_f64:
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
define amdgpu_kernel void @fneg_fabs_fadd_f64(ptr addrspace(1) %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
%fadd = fadd double %y, %fsub
store double %fadd, double addrspace(1)* %out, align 8
store double %fadd, ptr addrspace(1) %out, align 8
ret void
}

define amdgpu_kernel void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
%x = load double, double addrspace(1)* %xptr, align 8
%y = load double, double addrspace(1)* %xptr, align 8
define amdgpu_kernel void @v_fneg_fabs_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %xptr, ptr addrspace(1) %yptr) {
%x = load double, ptr addrspace(1) %xptr, align 8
%y = load double, ptr addrspace(1) %xptr, align 8
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
%fadd = fadd double %y, %fsub
store double %fadd, double addrspace(1)* %out, align 8
store double %fadd, ptr addrspace(1) %out, align 8
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_fmul_f64:
; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
define amdgpu_kernel void @fneg_fabs_fmul_f64(ptr addrspace(1) %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
%fmul = fmul double %y, %fsub
store double %fmul, double addrspace(1)* %out, align 8
store double %fmul, ptr addrspace(1) %out, align 8
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_free_f64:
define amdgpu_kernel void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
define amdgpu_kernel void @fneg_fabs_free_f64(ptr addrspace(1) %out, i64 %in) {
%bc = bitcast i64 %in to double
%fabs = call double @llvm.fabs.f64(double %bc)
%fsub = fsub double -0.000000e+00, %fabs
store double %fsub, double addrspace(1)* %out
store double %fsub, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_fn_free_f64:
; SI: s_bitset1_b32
; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
define amdgpu_kernel void @fneg_fabs_fn_free_f64(ptr addrspace(1) %out, i64 %in) {
%bc = bitcast i64 %in to double
%fabs = call double @fabs(double %bc)
%fsub = fsub double -0.000000e+00, %fabs
store double %fsub, double addrspace(1)* %out
store double %fsub, ptr addrspace(1) %out
ret void
}

Expand All @@ -61,21 +61,21 @@ define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64
; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
; GCN-DAG: v_mov_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]]
; GCN: buffer_store_dwordx2 v[[[LO_V]]:[[HI_V]]]
define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, [8 x i32], double %in) {
define amdgpu_kernel void @fneg_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
%fsub = fsub double -0.000000e+00, %fabs
store double %fsub, double addrspace(1)* %out, align 8
store double %fsub, ptr addrspace(1) %out, align 8
ret void
}

; GCN-LABEL: {{^}}fneg_fabs_v2f64:
; GCN-NOT: 0x80000000
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
define amdgpu_kernel void @fneg_fabs_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
%fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
store <2 x double> %fsub, <2 x double> addrspace(1)* %out
store <2 x double> %fsub, ptr addrspace(1) %out
ret void
}

Expand All @@ -85,10 +85,10 @@ define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
define amdgpu_kernel void @fneg_fabs_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
%fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
store <4 x double> %fsub, <4 x double> addrspace(1)* %out
store <4 x double> %fsub, ptr addrspace(1) %out
ret void
}

Expand Down
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@
; FUNC-LABEL: {{^}}fneg_fabsf_fadd_f32:
; SI-NOT: and
; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
define amdgpu_kernel void @fneg_fabsf_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
define amdgpu_kernel void @fneg_fabsf_fadd_f32(ptr addrspace(1) %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
%fadd = fadd float %y, %fsub
store float %fadd, float addrspace(1)* %out, align 4
store float %fadd, ptr addrspace(1) %out, align 4
ret void
}

; FUNC-LABEL: {{^}}fneg_fabsf_fmul_f32:
; SI-NOT: and
; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
; SI-NOT: and
define amdgpu_kernel void @fneg_fabsf_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
define amdgpu_kernel void @fneg_fabsf_fmul_f32(ptr addrspace(1) %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
%fmul = fmul float %y, %fsub
store float %fmul, float addrspace(1)* %out, align 4
store float %fmul, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -36,11 +36,11 @@ define amdgpu_kernel void @fneg_fabsf_fmul_f32(float addrspace(1)* %out, float %

; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; VI: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabsf_free_f32(float addrspace(1)* %out, i32 %in) {
define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @llvm.fabs.f32(float %bc)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, float addrspace(1)* %out
store float %fsub, ptr addrspace(1) %out
ret void
}

Expand All @@ -50,30 +50,30 @@ define amdgpu_kernel void @fneg_fabsf_free_f32(float addrspace(1)* %out, i32 %in
; R600: -PV

; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @fneg_fabsf_fn_free_f32(float addrspace(1)* %out, i32 %in) {
define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @fabsf(float %bc)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, float addrspace(1)* %out
store float %fsub, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}fneg_fabsf_f32:
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @fneg_fabsf_f32(float addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fneg_fabsf_f32(ptr addrspace(1) %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, float addrspace(1)* %out, align 4
store float %fsub, ptr addrspace(1) %out, align 4
ret void
}

; FUNC-LABEL: {{^}}v_fneg_fabsf_f32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define amdgpu_kernel void @v_fneg_fabsf_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%val = load float, float addrspace(1)* %in, align 4
define amdgpu_kernel void @v_fneg_fabsf_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%val = load float, ptr addrspace(1) %in, align 4
%fabs = call float @llvm.fabs.f32(float %val)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, float addrspace(1)* %out, align 4
store float %fsub, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -86,10 +86,10 @@ define amdgpu_kernel void @v_fneg_fabsf_f32(float addrspace(1)* %out, float addr
; FIXME: In this case two uses of the constant should be folded
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabsf_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
define amdgpu_kernel void @fneg_fabsf_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
store <2 x float> %fsub, <2 x float> addrspace(1)* %out
store <2 x float> %fsub, ptr addrspace(1) %out
ret void
}

Expand All @@ -98,10 +98,10 @@ define amdgpu_kernel void @fneg_fabsf_v2f32(<2 x float> addrspace(1)* %out, <2 x
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @fneg_fabsf_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
define amdgpu_kernel void @fneg_fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
store <4 x float> %fsub, <4 x float> addrspace(1)* %out
store <4 x float> %fsub, ptr addrspace(1) %out
ret void
}

Expand Down
Loading