4,914 changes: 4,914 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll

Large diffs are not rendered by default.

190 changes: 190 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s

; --------------------------------------------------------------------------------
; rcp tests
; --------------------------------------------------------------------------------

define half @v_fneg_rcp_f16(half %a) #0 {
; GCN-LABEL: v_fneg_rcp_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_rcp_f16_e64 v0, -v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%rcp = call half @llvm.amdgcn.rcp.f16(half %a)
%fneg = fneg half %rcp
ret half %fneg
}

define half @v_fneg_rcp_fneg_f16(half %a) #0 {
; GCN-LABEL: v_fneg_rcp_fneg_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_rcp_f16_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
%fneg = fneg half %rcp
ret half %fneg
}

define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 {
; GCN-LABEL: v_fneg_rcp_store_use_fneg_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_rcp_f16_e32 v2, v0
; GCN-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GCN-NEXT: v_mov_b32_e32 v0, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
%fneg = fneg half %rcp
%insert.0 = insertvalue { half, half } poison, half %fneg, 0
%insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
ret { half, half } %insert.1
}

define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 {
; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_rcp_f16_e32 v2, v0
; GCN-NEXT: v_mul_f16_e64 v1, -v0, v1
; GCN-NEXT: v_mov_b32_e32 v0, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
%fneg.a = fneg half %a
%rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
%fneg = fneg half %rcp
%use1 = fmul half %fneg.a, %c
%insert.0 = insertvalue { half, half } poison, half %fneg, 0
%insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
ret { half, half } %insert.1
}

; --------------------------------------------------------------------------------
; sin tests
; --------------------------------------------------------------------------------

define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
; GCN-LABEL: v_fneg_amdgcn_sin_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_sin_f16_e64 v0, -v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%sin = call half @llvm.amdgcn.sin.f16(half %a)
%fneg = fneg half %sin
ret half %fneg
}

; --------------------------------------------------------------------------------
; vintrp tests
; --------------------------------------------------------------------------------

define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
; SI-LABEL: v_fneg_interp_p1_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_mul_f16_e64 v1, v0, -v1
; SI-NEXT: s_mov_b32 m0, 0
; SI-NEXT: v_interp_p1_f16 v0, v1, attr0.x
; SI-NEXT: v_interp_p1_f16 v1, v1, attr0.y
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GCN-LABEL: v_fneg_interp_p1_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e64 v1, v0, -v1
; GCN-NEXT: s_mov_b32 m0, 0
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GCN-NEXT: v_interp_p1ll_f16 v0, v1, attr0.x
; GCN-NEXT: v_interp_p1ll_f16 v1, v1, attr0.y
; GCN-NEXT: s_setpc_b64 s[30:31]
%mul = fmul float %a, %b
%fneg = fneg float %mul
%intrp0 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 0, i32 0, i1 false, i32 0)
%intrp1 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 1, i32 0, i1 false, i32 0)
%insert.0 = insertvalue { float, float } poison, float %intrp0, 0
%insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
ret { float, float } %insert.1
}

define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
; SI-LABEL: v_fneg_interp_p2_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_mul_f16_e64 v2, v0, -v1
; SI-NEXT: v_mov_b32_e32 v1, 4.0
; SI-NEXT: v_mov_b32_e32 v0, 4.0
; SI-NEXT: s_mov_b32 m0, 0
; SI-NEXT: v_interp_p2_f16 v0, v2, attr0.x
; SI-NEXT: v_interp_p2_f16 v1, v2, attr0.y
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GCN-LABEL: v_fneg_interp_p2_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e64 v1, v0, -v1
; GCN-NEXT: v_mov_b32_e32 v2, 4.0
; GCN-NEXT: s_mov_b32 m0, 0
; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GCN-NEXT: v_interp_p2_f16 v0, v1, attr0.x, v2
; GCN-NEXT: v_interp_p2_f16 v1, v1, attr0.y, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
%mul = fmul float %a, %b
%fneg = fneg float %mul
%intrp0 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 0, i32 0, i1 false, i32 0)
%intrp1 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 1, i32 0, i1 false, i32 0)
%insert.0 = insertvalue { half, half } poison, half %intrp0, 0
%insert.1 = insertvalue { half, half } %insert.0, half %intrp1, 1
ret { half, half } %insert.1
}

; --------------------------------------------------------------------------------
; arithmetic.fence tests
; --------------------------------------------------------------------------------

; FIXME: Legalization/promote is broken
define half @v_fneg_arithmetic_fence_f16(half %a) #0 {
; GCN-LABEL: v_fneg_arithmetic_fence_f16:
; GCN: ; %bb.0:
; GCN-NEXT: ;ARITH_FENCE
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%fence = call half @llvm.arithmetic.fence.f16(half %a)
%fneg = fneg half %fence
ret half %fneg
}

define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f16_e32 v0, v0, v1
; GCN-NEXT: ;ARITH_FENCE
; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %a, %b
%fence = call half @llvm.arithmetic.fence.f16(half %mul)
%fneg = fneg half %fence
ret half %fneg
}

declare half @llvm.amdgcn.rcp.f16(half) #1
declare half @llvm.amdgcn.sin.f16(half) #1
declare half @llvm.arithmetic.fence.f16(half) #1
declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0

attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "unsafe-fp-math"="true" }
attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN-NSZ: {{.*}}
; GCN-SAFE: {{.*}}
; VI: {{.*}}
; VI-NSZ: {{.*}}
; VI-SAFE: {{.*}}
2,829 changes: 2,829 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Large diffs are not rendered by default.

1,335 changes: 1,335 additions & 0 deletions llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI %s

; FIXME: This one should fold to rcp
define half @select_fneg_posk_src_rcp_f16(i32 %c, half %x, half %y) {
; VI-LABEL: select_fneg_posk_src_rcp_f16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rcp_f16_e32 v1, v1
; VI-NEXT: v_mov_b32_e32 v2, 0xc000
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %c, 0
%rcp = call half @llvm.amdgcn.rcp.f16(half %x)
%fneg = fneg half %rcp
%select = select i1 %cmp, half %fneg, half 2.0
ret half %select
}

declare half @llvm.amdgcn.rcp.f16(half) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
3,416 changes: 3,416 additions & 0 deletions llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll

Large diffs are not rendered by default.