diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 8443b1515cf81..ff13d6640bbc4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -569,7 +569,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); // FIXME: fpow has a selection pattern that should move to custom lowering. - auto &Exp2Ops = getActionDefinitionsBuilder({G_FEXP2, G_FLOG2, G_FPOW}); + auto &Exp2Ops = getActionDefinitionsBuilder({G_FEXP2, G_FLOG2}); if (ST.has16BitInsts()) Exp2Ops.legalFor({S32, S16}); else @@ -577,7 +577,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Exp2Ops.clampScalar(0, MinScalarFPTy, S32); Exp2Ops.scalarize(0); - auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FLOG, G_FLOG10}); + auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FLOG, G_FLOG10, G_FPOW}); if (ST.has16BitInsts()) ExpOps.customFor({{S32}, {S16}}); else @@ -1365,6 +1365,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f); case TargetOpcode::G_FEXP: return legalizeFExp(MI, B); + case TargetOpcode::G_FPOW: + return legalizeFPow(MI, B); case TargetOpcode::G_FFLOOR: return legalizeFFloor(MI, MRI, B); case TargetOpcode::G_BUILD_VECTOR: @@ -2099,6 +2101,42 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI, + MachineIRBuilder &B) const { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + unsigned Flags = MI.getFlags(); + LLT Ty = B.getMRI()->getType(Dst); + B.setInstr(MI); + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + + if (Ty == S32) { + auto Log = B.buildFLog2(S32, Src0, Flags); + auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false) + .addUse(Log.getReg(0)) + .addUse(Src1) + .setMIFlags(Flags); + B.buildFExp2(Dst, Mul, Flags); + } else if (Ty == S16) { + // There's no f16 fmul_legacy, so we need to convert for it. + auto Log = B.buildFLog2(S16, Src0, Flags); + auto Ext0 = B.buildFPExt(S32, Log, Flags); + auto Ext1 = B.buildFPExt(S32, Src1, Flags); + auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false) + .addUse(Ext0.getReg(0)) + .addUse(Ext1.getReg(0)) + .setMIFlags(Flags); + + B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags); + } else + return false; + + MI.eraseFromParent(); + return true; +} + // Find a source register, ignoring any possible source modifiers. static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) { Register ModSrc = OrigSrc; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index c16fb9c74a911..54e756a38f964 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -82,6 +82,7 @@ class AMDGPULegalizerInfo : public LegalizerInfo { bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B, double Log2BaseInverted) const; bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll new file mode 100644 index 0000000000000..499b38f03e966 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -0,0 +1,619 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s + +define float @v_pow_f32(float %x, float %y) { +; GFX6-LABEL: v_pow_f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %pow = call float @llvm.pow.f32(float %x, float %y) + ret float %pow +} + +define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { +; GFX6-LABEL: v_pow_v2f32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_log_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_exp_f32_e32 v1, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_v2f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f32_e32 v0, v0 +; GFX8-NEXT: v_log_f32_e32 v1, v1 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: v_exp_f32_e32 v1, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_v2f32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f32_e32 v0, v0 +; GFX9-NEXT: v_log_f32_e32 v1, v1 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: v_exp_f32_e32 v1, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define half @v_pow_f16(half %x, half %y) { +; GFX6-LABEL: v_pow_f16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: v_exp_f16_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f16_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: v_exp_f16_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %pow = call half @llvm.pow.f16(half %x, half %y) + ret half %pow +} + +define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { +; GFX6-LABEL: v_pow_v2f16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_log_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_exp_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_v2f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_log_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_exp_f16_e32 v1, v1 +; GFX8-NEXT: v_exp_f16_e32 v0, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f16_e32 v2, v0 +; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) + ret <2 x half> %pow +} + +define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { +; GFX6-LABEL: v_pow_v2f16_fneg_lhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_log_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_exp_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_v2f16_fneg_lhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_log_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_exp_f16_e32 v1, v1 +; GFX8-NEXT: v_exp_f16_e32 v0, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_v2f16_fneg_lhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GFX9-NEXT: v_log_f16_e32 v2, v0 +; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX9-NEXT: v_exp_f16_e32 v1, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %x.fneg = fneg <2 x half> %x + %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) + ret <2 x half> %pow +} + +define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { +; GFX6-LABEL: v_pow_v2f16_fneg_rhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 +; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_log_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 +; GFX6-NEXT: v_exp_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_v2f16_fneg_rhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_log_f16_e32 v0, v0 +; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: v_exp_f16_e32 v1, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_exp_f16_e32 v0, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_v2f16_fneg_rhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f16_e32 v2, v0 +; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: v_exp_f16_e32 v1, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %y.fneg = fneg <2 x half> %y + %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) + ret <2 x half> %pow +} + +define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { +; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v4 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: s_mov_b32 s4, 0x80008000 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_xor_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX6-NEXT: v_log_f32_e32 v2, v2 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v2, v3 +; GFX6-NEXT: v_exp_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, 0x80008000 +; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_log_f16_e32 v0, v0 +; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1 +; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: v_exp_f16_e32 v1, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_exp_f16_e32 v0, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s4, 0x80008000 +; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_log_f16_e32 v2, v0 +; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 +; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: v_exp_f16_e32 v1, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %x.fneg = fneg <2 x half> %x + %y.fneg = fneg <2 x half> %y + %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) + ret <2 x half> %pow +} + +; FIXME +; define double @v_pow_f64(double %x, double %y) { +; %pow = call double @llvm.pow.f64(double %x, double %y) +; ret double %pow +; } + +define float @v_pow_f32_fabs_lhs(float %x, float %y) { +; GFX6-LABEL: v_pow_f32_fabs_lhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_log_f32_e64 v0, |v0| +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_f32_fabs_lhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f32_e64 v0, |v0| +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_f32_fabs_lhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f32_e64 v0, |v0| +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %fabs.x = call float @llvm.fabs.f32(float %x) + %pow = call float @llvm.pow.f32(float %fabs.x, float %y) + ret float %pow +} + +define float @v_pow_f32_fabs_rhs(float %x, float %y) { +; GFX6-LABEL: v_pow_f32_fabs_rhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_f32_fabs_rhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_f32_fabs_rhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %fabs.y = call float @llvm.fabs.f32(float %y) + %pow = call float @llvm.pow.f32(float %x, float %fabs.y) + ret float %pow +} + +define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { +; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_log_f32_e64 v0, |v0| +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f32_e64 v0, |v0| +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f32_e64 v0, |v0| +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %fabs.x = call float @llvm.fabs.f32(float %x) + %fabs.y = call float @llvm.fabs.f32(float %y) + %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) + ret float %pow +} + +define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { +; GFX6-LABEL: v_pow_f32_sgpr_vgpr: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_log_f32_e32 v1, s0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_pow_f32_sgpr_vgpr: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_log_f32_e32 v1, s0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_pow_f32_sgpr_vgpr: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_log_f32_e32 v1, s0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: ; return to shader part epilog + %pow = call float @llvm.pow.f32(float %x, float %y) + ret float %pow +} + +define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { +; GFX6-LABEL: v_pow_f32_vgpr_sgpr: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_pow_f32_vgpr_sgpr: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_log_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_pow_f32_vgpr_sgpr: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_log_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: ; return to shader part epilog + %pow = call float @llvm.pow.f32(float %x, float %y) + ret float %pow +} + +define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { +; GFX6-LABEL: v_pow_f32_sgpr_sgpr: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_log_f32_e32 v0, s0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_pow_f32_sgpr_sgpr: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_log_f32_e32 v0, s0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_pow_f32_sgpr_sgpr: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_log_f32_e32 v0, s0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: ; return to shader part epilog + %pow = call float @llvm.pow.f32(float %x, float %y) + ret float %pow +} + +define float @v_pow_f32_fneg_lhs(float %x, float %y) { +; GFX6-LABEL: v_pow_f32_fneg_lhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_log_f32_e64 v0, -v0 +; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_f32_fneg_lhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f32_e64 v0, -v0 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_f32_fneg_lhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f32_e64 v0, -v0 +; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %neg.x = fneg float %x + %pow = call float @llvm.pow.f32(float %neg.x, float %y) + ret float %pow +} + +define float @v_pow_f32_fneg_rhs(float %x, float %y) { +; GFX6-LABEL: v_pow_f32_fneg_rhs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_log_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 +; GFX6-NEXT: v_exp_f32_e32 v0, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_pow_f32_fneg_rhs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_log_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_pow_f32_fneg_rhs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_log_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 +; GFX9-NEXT: v_exp_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %neg.y = fneg float %y + %pow = call float @llvm.pow.f32(float %x, float %neg.y) + ret float %pow +} + +declare half @llvm.pow.f16(half, half) +declare float @llvm.pow.f32(float, float) +declare double @llvm.pow.f64(double, double) + +declare half @llvm.fabs.f16(half) +declare float @llvm.fabs.f32(float) + +declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) +declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir index a03b6e2259b77..5771719886250 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fpow_s32 @@ -7,11 +8,20 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - ; CHECK-LABEL: name: test_fpow_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[FPOW:%[0-9]+]]:_(s32) = G_FPOW [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[FPOW]](s32) + ; GFX6-LABEL: name: test_fpow_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]] + ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32) + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX9-LABEL: name: test_fpow_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]] + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32) + ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FPOW %0, %1 @@ -24,15 +34,32 @@ body: | bb.0.entry: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; CHECK-LABEL: name: test_fpow_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[FPOW:%[0-9]+]]:_(s32) = G_FPOW [[UV]], [[UV2]] - ; CHECK: [[FPOW1:%[0-9]+]]:_(s32) = G_FPOW [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPOW]](s32), [[FPOW1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-LABEL: name: test_fpow_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV2]](s32) + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV3]](s32) + ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: test_fpow_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV2]](s32) + ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV3]](s32) + ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_FPOW %0, %1 @@ -45,18 +72,247 @@ body: | bb.0.entry: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 - ; CHECK-LABEL: name: test_fpow_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[FPOW:%[0-9]+]]:_(s32) = G_FPOW [[UV]], [[UV3]] - ; CHECK: [[FPOW1:%[0-9]+]]:_(s32) = G_FPOW [[UV1]], [[UV4]] - ; CHECK: [[FPOW2:%[0-9]+]]:_(s32) = G_FPOW [[UV2]], [[UV5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FPOW]](s32), [[FPOW1]](s32), [[FPOW2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-LABEL: name: test_fpow_v3s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV3]](s32) + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV4]](s32) + ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]] + ; GFX6: [[FLOG2_2:%[0-9]+]]:_(s32) = G_FLOG2 [[UV2]] + ; GFX6: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_2]](s32), [[UV5]](s32) + ; GFX6: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[INT2]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-LABEL: name: test_fpow_v3s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV3]](s32) + ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV4]](s32) + ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]] + ; GFX9: [[FLOG2_2:%[0-9]+]]:_(s32) = G_FLOG2 [[UV2]] + ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_2]](s32), [[UV5]](s32) + ; GFX9: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[INT2]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_FPOW %0, %1 $vgpr0_vgpr1_vgpr2 = COPY %2 ... + +--- +name: test_fpow_s32_flags +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_fpow_s32_flags + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[COPY]] + ; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32) + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT]] + ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX9-LABEL: name: test_fpow_s32_flags + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[COPY]] + ; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32) + ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT]] + ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = nnan nsz G_FPOW %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fpow_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_fpow_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]] + ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[FPEXT1]](s32) + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_fpow_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC]] + ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_]](s16) + ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32) + ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_FPOW %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_fpow_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_fpow_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]] + ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[FPEXT1]](s32) + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT2]] + ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[FPEXT3]](s32) + ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX9-LABEL: name: test_fpow_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC]] + ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_]](s16) + ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32) + ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC]] + ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC1]] + ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_1]](s16) + ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](s32), [[FPEXT3]](s32) + ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_FPOW %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fpow_v2s16_flags +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_fpow_v2s16_flags + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[FPEXT]] + ; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[FPEXT1]](s32) + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[FPEXT2]] + ; GFX6: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[FPEXT3]](s32) + ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT1]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX9-LABEL: name: test_fpow_v2s16_flags + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = nnan nsz G_FLOG2 [[TRUNC]] + ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[FLOG2_]](s16) + ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[TRUNC2]](s16) + ; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32) + ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) + ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = nnan nsz G_FEXP2 [[FPTRUNC]] + ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s16) = nnan nsz G_FLOG2 [[TRUNC1]] + ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[FLOG2_1]](s16) + ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[TRUNC3]](s16) + ; GFX9: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](s32), [[FPEXT3]](s32) + ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) + ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s16) = nnan nsz G_FEXP2 [[FPTRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = nnan nsz G_FPOW %0, %1 + $vgpr0 = COPY %2 +...