diff --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll new file mode 100644 index 00000000000000..561a73b54b3bd1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll @@ -0,0 +1,1323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,GISEL %s + +; Test that fneg is folded into source modifiers when it wasn't +; possible to fold fsub to fneg without context. + +define float @no_fold_f32_fsub_into_fneg_modifier_ieee_pos1(float %v0, float %v1) #0 { +; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_pos1: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float 1.0, %v0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @no_fold_f32_fsub_into_fneg_modifier_daz_pos1(float %v0, float %v1) #1 { +; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_pos1: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float 1.0, %v0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @no_fold_f32_fsub_into_fneg_modifier_ieee_commuted(float %v0, float %v1) #0 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_commuted: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_add_f32_e32 v0, 0, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_commuted: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_subrev_f32_e32 v0, 0x80000000, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float %v0, -0.0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) #0 { +; CHECK-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_f32_e32 v0, 0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float 0.0, %v0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) #1 { +; CHECK-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_f32_e32 v0, 0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float 0.0, %v0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float %v1) #1 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_add_f32_e32 v0, 0, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_subrev_f32_e32 v0, 0x80000000, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float %v0, -0.0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 { +; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 { +; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_ieee_nsz(float %v0, float %v1) #0 { +; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz float -0.0, %v0 + %mul = fmul nsz float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) #1 { +; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz float -0.0, %v0 + %mul = fmul nsz float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 { +; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %mul = fmul float %sub, %v1 + ret float %mul +} + +define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1) #2 { +; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz float -0.0, %v0 + %mul = fmul nsz float %sub, %v1 + ret float %mul +} + +define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee(<2 x float> %v0, <2 x float> %v1) #0 { +; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 +; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x float> , %v0 + %mul = fmul <2 x float> %sub, %v1 + ret <2 x float> %mul +} + +define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x float> %v1) #1 { +; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x float> , %v0 + %mul = fmul <2 x float> %sub, %v1 + ret <2 x float> %mul +} + +define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee_nsz(<2 x float> %v0, <2 x float> %v1) #0 { +; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 +; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz <2 x float> , %v0 + %mul = fmul nsz <2 x float> %sub, %v1 + ret <2 x float> %mul +} + +define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0, <2 x float> %v1) #1 { +; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz <2 x float> , %v0 + %mul = fmul nsz <2 x float> %sub, %v1 + ret <2 x float> %mul +} + +define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0, <2 x float> %v1) #2 { +; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x float> , %v0 + %mul = fmul <2 x float> %sub, %v1 + ret <2 x float> %mul +} + +define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %v0, <2 x float> %v1) #2 { +; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz <2 x float> , %v0 + %mul = fmul nsz <2 x float> %sub, %v1 + ret <2 x float> %mul +} + + +define half @fold_f16_fsub_into_fneg_modifier_ieee(half %v0, half %v1) #0 { +; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %mul = fmul half %sub, %v1 + ret half %mul +} + +define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) #1 { +; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0 +; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %mul = fmul half %sub, %v1 + ret half %mul +} + +define half @fold_f16_fsub_into_fneg_modifier_ieee_nsz(half %v0, half %v1) #0 { +; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_ieee_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz half -0.0, %v0 + %mul = fmul nsz half %sub, %v1 + ret half %mul +} + +define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) #1 { +; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0 +; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz half -0.0, %v0 + %mul = fmul nsz half %sub, %v1 + ret half %mul +} + +define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) #2 { +; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0 +; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %mul = fmul half %sub, %v1 + ret half %mul +} + +define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) #2 { +; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0 +; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz half -0.0, %v0 + %mul = fmul nsz half %sub, %v1 + ret half %mul +} + +define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee(<2 x half> %v0, <2 x half> %v1) #0 { +; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x half> , %v0 + %mul = fmul <2 x half> %sub, %v1 + ret <2 x half> %mul +} + +define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz(<2 x half> %v0, <2 x half> %v1) #1 { +; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x half> , %v0 + %mul = fmul <2 x half> %sub, %v1 + ret <2 x half> %mul +} + +define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee_nsz(<2 x half> %v0, <2 x half> %v1) #0 { +; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz <2 x half> , %v0 + %mul = fmul nsz <2 x half> %sub, %v1 + ret <2 x half> %mul +} + +define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz_nsz(<2 x half> %v0, <2 x half> %v1) #1 { +; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz <2 x half> , %v0 + %mul = fmul nsz <2 x half> %sub, %v1 + ret <2 x half> %mul +} + +define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic(<2 x half> %v0, <2 x half> %v1) #2 { +; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x half> , %v0 + %mul = fmul <2 x half> %sub, %v1 + ret <2 x half> %mul +} + +define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz(<2 x half> %v0, <2 x half> %v1) #2 { +; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz <2 x half> , %v0 + %mul = fmul nsz <2 x half> %sub, %v1 + ret <2 x half> %mul +} + +define double @fold_f64_fsub_into_fneg_modifier_ieee(double %v0, double %v1) #0 { +; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %mul = fmul double %sub, %v1 + ret double %mul +} + +define double @fold_f64_fsub_into_fneg_modifier_daz(double %v0, double %v1) #1 { +; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %mul = fmul double %sub, %v1 + ret double %mul +} + +define double @fold_f64_fsub_into_fneg_modifier_ieee_nsz(double %v0, double %v1) #0 { +; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_ieee_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz double -0.0, %v0 + %mul = fmul nsz double %sub, %v1 + ret double %mul +} + +define double @fold_f64_fsub_into_fneg_modifier_daz_nsz(double %v0, double %v1) #1 { +; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_daz_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz double -0.0, %v0 + %mul = fmul nsz double %sub, %v1 + ret double %mul +} + +define double @fold_f64_fsub_into_fneg_modifier_dynamic(double %v0, double %v1) #2 { +; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %mul = fmul double %sub, %v1 + ret double %mul +} + +define double @fold_f64_fsub_into_fneg_modifier_dynamic_nsz(double %v0, double %v1) #2 { +; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic_nsz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic_nsz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub nsz double -0.0, %v0 + %mul = fmul nsz double %sub, %v1 + ret double %mul +} + +define float @fold_f32_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, float %v0, float %v1) #0 { +; SDAG-LABEL: fold_f32_select_user_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e64 v0, v2, -v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_select_user_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %mul = select i1 %cond, float %sub, float %v1 + ret float %mul +} + +define float @no_fold_f32_select_user_fsub_into_fneg_modifier_daz(i1 %cond, float %v0, float %v1) #1 { +; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %mul = select i1 %cond, float %sub, float %v1 + ret float %mul +} + +define float @no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, float %v0, float %v1) #2 { +; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %mul = select i1 %cond, float %sub, float %v1 + ret float %mul +} + +define half @fold_f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, half %v0, half %v1) #0 { +; SDAG-LABEL: fold_f16_select_user_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_xor_b32_e32 v1, 0x8000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_select_user_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %mul = select i1 %cond, half %sub, half %v1 + ret half %mul +} + +define half @no_fold_f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, half %v0, half %v1) #1 { +; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_sub_f16_e32 v1, 0x8000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %mul = select i1 %cond, half %sub, half %v1 + ret half %mul +} + +define half @no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, half %v0, half %v1) #2 { +; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_sub_f16_e32 v1, 0x8000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %mul = select i1 %cond, half %sub, half %v1 + ret half %mul +} + +define double @fold_f64_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, double %v0, double %v1) #0 { +; SDAG-LABEL: fold_f64_select_user_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f64_select_user_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[1:2], -v[1:2], -v[1:2] +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %mul = select i1 %cond, double %sub, double %v1 + ret double %mul +} + +define double @no_fold_f64_select_user_fsub_into_fneg_modifier_daz(i1 %cond, double %v0, double %v1) #1 { +; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[1:2], -v[1:2], -v[1:2] +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %mul = select i1 %cond, double %sub, double %v1 + ret double %mul +} + +define double @no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, double %v0, double %v1) #2 { +; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[1:2], -v[1:2], -v[1:2] +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %mul = select i1 %cond, double %sub, double %v1 + ret double %mul +} + +define <2 x half> @fold_v2f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, <2 x half> %v0, <2 x half> %v1) #0 { +; SDAG-LABEL: fold_v2f16_select_user_fsub_into_fneg_modifier_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_v2f16_select_user_fsub_into_fneg_modifier_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x half> , %v0 + %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1 + ret <2 x half> %mul +} + +define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, <2 x half> %v0, <2 x half> %v1) #1 { +; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x half> , %v0 + %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1 + ret <2 x half> %mul +} + +define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, <2 x half> %v0, <2 x half> %v1) #2 { +; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1] +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub <2 x half> , %v0 + %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1 + ret <2 x half> %mul +} + +define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 { +; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict") + %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %mul +} + +define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 { +; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict") + %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %mul +} + +define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 { +; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict") + %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %mul +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee(float %v0) #0 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz(float %v0) #1 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic(float %v0) #2 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee(float %v0) #0 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v1, 0x90 +; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mov_b32_e32 v1, 0x90 +; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz(float %v0) #1 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mov_b32_e32 v1, 0x90 +; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mov_b32_e32 v1, 0x90 +; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic(float %v0) #2 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_mov_b32_e32 v1, 0x90 +; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mov_b32_e32 v1, 0x90 +; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_ieee(float %v0, i32 %testmask) #0 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_ieee: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_ieee: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_daz(float %v0, i32 %testmask) #1 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask) + ret i1 %class +} + +define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic(float %v0, i32 %testmask) #2 { +; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask) + ret i1 %class +} + +define i1 @no_fold_f64_fsub_into_fneg_modifier_class_var_daz(double %v0, i32 %testmask) #1 { +; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_var_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], -v[0:1], v2 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_var_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %class = call i1 @llvm.amdgcn.class.f64(double %sub, i32 %testmask) + ret i1 %class +} + +define i1 @no_fold_f16_fsub_into_fneg_modifier_class_var_daz(half %v0, i32 %testmask) #1 { +; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_var_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0 +; SDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_var_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %class = call i1 @llvm.amdgcn.class.f16(half %sub, i32 %testmask) + ret i1 %class +} + +define i1 @no_fold_f64_fsub_into_fneg_modifier_class_daz(double %v0) #1 { +; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v2, 0x90 +; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], -v[0:1], v2 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GISEL-NEXT: v_mov_b32_e32 v2, 0x90 +; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub double -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f64(double %sub, i32 144) + ret i1 %class +} + +define i1 @no_fold_f16_fsub_into_fneg_modifier_class_daz(half %v0) #1 { +; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0 +; SDAG-NEXT: v_mov_b32_e32 v1, 0x90 +; SDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GISEL-NEXT: v_mov_b32_e32 v1, 0x90 +; GISEL-NEXT: v_cmp_class_f16_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub half -0.0, %v0 + %class = call i1 @llvm.is.fpclass.f16(half %sub, i32 144) + ret i1 %class +} + +define amdgpu_gfx float @fold_f32_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %v1) #1 { +; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_interp_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: s_mov_b32 m0, s4 +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_interp_p1_f32_e32 v0, v0, attr0.x +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_interp_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: s_mov_b32 m0, s4 +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_interp_p1_f32_e32 v0, v0, attr0.x +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %p0_0 = call float @llvm.amdgcn.interp.p1(float %sub, i32 0, i32 0, i32 %v1) + ret float %p0_0 +} + +define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %m0) #1 { +; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 +; SDAG-NEXT: s_mov_b32 m0, s4 +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 +; SDAG-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GISEL-NEXT: s_mov_b32 m0, s4 +; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 +; GISEL-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y +; GISEL-NEXT: s_setpc_b64 s[30:31] + %sub = fsub float -0.0, %v0 + %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %sub, i32 1, i32 2, i1 0, i32 %m0) + ret float %p1_0 +} + +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare i1 @llvm.is.fpclass.f32(float, i32 immarg) +declare i1 @llvm.amdgcn.class.f32(float, i32) +declare i1 @llvm.is.fpclass.f64(double, i32 immarg) +declare i1 @llvm.amdgcn.class.f64(double, i32) +declare i1 @llvm.is.fpclass.f16(half, i32 immarg) +declare i1 @llvm.amdgcn.class.f16(half, i32) +declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) +declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) + +attributes #0 = { "denormal-fp-math"="ieee,ieee" } +attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" } +attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }