diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 6bf710f03bd27..67901329e8410 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1388,6 +1388,14 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { DefClamp->setImm(1); MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); MI.eraseFromParent(); + + // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac + // instruction, so we might as well convert it to the more flexible VOP3-only + // mad/fma form. + MachineFunction::iterator MBBI = Def->getParent()->getIterator(); + if (MachineInstr *NewMI = TII->convertToThreeAddress(MBBI, *Def, nullptr)) + Def->eraseFromParent(); + return true; } @@ -1526,6 +1534,14 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) { DefOMod->setImm(OMod); MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); MI.eraseFromParent(); + + // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac + // instruction, so we might as well convert it to the more flexible VOP3-only + // mad/fma form. + MachineFunction::iterator MBBI = Def->getParent()->getIterator(); + if (MachineInstr *NewMI = TII->convertToThreeAddress(MBBI, *Def, nullptr)) + Def->eraseFromParent(); + return true; } diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll index 40cc3b2b73989..ec9c22c34899d 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -70,7 +70,7 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: s_setpc_b64 -; CIVI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} +; CIVI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 { %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll index 7240a5c38c7de..6cf373254fe20 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -328,8 +328,7 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s ; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding -; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} -; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} +; CIVI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { %src0.hi = extractelement <2 x half> %src0, i32 1 %src1.hi = extractelement <2 x half> %src1, i32 1