Skip to content

Commit

Permalink
[NFC][AMDGPU] Improve fused fmul+fadd tests.
Browse files Browse the repository at this point in the history
Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D84903
  • Loading branch information
dfukalov committed Jul 31, 2020
1 parent ec1445c commit aa77232
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 1 deletion.
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
Expand Up @@ -32,6 +32,46 @@ define amdgpu_kernel void @fmuladd_f16(half addrspace(1)* %out, half addrspace(1
ret void
}

; GCN-LABEL: {{^}}fmul_fadd_f16:
; VI-FLUSH: v_mac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; VI-DENORM-CONTRACT: v_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; GFX10-FLUSH: v_mul_f16_e32
; GFX10-FLUSH: v_add_f16_e32
; GFX10-DENORM-CONTRACT: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}

define amdgpu_kernel void @fmul_fadd_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
%r0 = load half, half addrspace(1)* %in1
%r1 = load half, half addrspace(1)* %in2
%r2 = load half, half addrspace(1)* %in3
%mul = fmul half %r0, %r1
%add = fadd half %mul, %r2
store half %add, half addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}fmul_fadd_contract_f16:
; VI-FLUSH: v_mac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; VI-DENORM: v_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; GFX10-FLUSH: v_mul_f16_e32
; GFX10-FLUSH: v_add_f16_e32
; GFX10-DENORM: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}

define amdgpu_kernel void @fmul_fadd_contract_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
%r0 = load half, half addrspace(1)* %in1
%r1 = load half, half addrspace(1)* %in2
%r2 = load half, half addrspace(1)* %in3
%mul = fmul half %r0, %r1
%add = fadd contract half %mul, %r2
store half %add, half addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f16
; GCN: {{buffer|flat|global}}_load_ushort [[R1:v[0-9]+]],
; GCN: {{buffer|flat|global}}_load_ushort [[R2:v[0-9]+]],
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
Expand Up @@ -69,6 +69,24 @@ define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspa
ret void
}

; GCN-LABEL: {{^}}fmul_fadd_contract_f32:
; GCN-FLUSH-FMAC: v_fmac_f32_e32

; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32
; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32

; GCN-DENORM-FASTFMA: v_fma_f32
define amdgpu_kernel void @fmul_fadd_contract_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
%r0 = load volatile float, float addrspace(1)* %in1
%r1 = load volatile float, float addrspace(1)* %in2
%r2 = load volatile float, float addrspace(1)* %in3
%mul = fmul float %r0, %r1
%add = fadd contract float %mul, %r2
store float %add, float addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f32
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
Expand Down
16 changes: 15 additions & 1 deletion llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
@@ -1,4 +1,4 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
Expand Down Expand Up @@ -33,6 +33,20 @@ define amdgpu_kernel void @fmul_fadd_f64(double addrspace(1)* %out, double addrs
ret void
}

; GCN-LABEL: {{^}}fmul_fadd_contract_f64:
; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}

define amdgpu_kernel void @fmul_fadd_contract_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
%r0 = load double, double addrspace(1)* %in1
%r1 = load double, double addrspace(1)* %in2
%r2 = load double, double addrspace(1)* %in3
%tmp = fmul double %r0, %r1
%r3 = fadd contract double %tmp, %r2
store double %r3, double addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}fadd_a_a_b_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[R1:v\[[0-9]+:[0-9]+\]]],
; GCN: {{buffer|flat}}_load_dwordx2 [[R2:v\[[0-9]+:[0-9]+\]]],
Expand Down
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
Expand Up @@ -27,6 +27,39 @@ define amdgpu_kernel void @fmuladd_v2f16(<2 x half> addrspace(1)* %out, <2 x hal
ret void
}

; GCN-LABEL: {{^}}fmul_fadd_v2f16:
; GFX9-DENORM-STRICT: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
; GFX9-DENORM-STRICT: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; GFX9-DENORM-CONTRACT: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define amdgpu_kernel void @fmul_fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
<2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
%r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
%r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
%r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
%r3 = fmul <2 x half> %r0, %r1
%r4 = fadd <2 x half> %r3, %r2
store <2 x half> %r4, <2 x half> addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}fmul_fadd_contract_v2f16:
; GFX9-FLUSH: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}

; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define amdgpu_kernel void @fmul_fadd_contract_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
<2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
%r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
%r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
%r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
%r3 = fmul <2 x half> %r0, %r1
%r4 = fadd contract <2 x half> %r3, %r2
store <2 x half> %r4, <2 x half> addrspace(1)* %out
ret void
}


; GCN-LABEL: {{^}}fmuladd_2.0_a_b_v2f16:
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
Expand Down

0 comments on commit aa77232

Please sign in to comment.