diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 979f335d770c5..5f1a4c1b31d07 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -14,6 +14,7 @@ #include "SIMachineFunctionInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" #define DEBUG_TYPE "si-fold-operands" using namespace llvm; @@ -340,6 +341,9 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl &FoldList, // Check if changing this to a v_mad_{f16, f32} instruction will allow us // to fold the operand. MI->setDesc(TII->get(NewOpc)); + if (!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel) && + AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel)) + MI->addOperand(MachineOperand::CreateImm(0)); bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold); if (FoldAsMAD) { MI->untieRegOperand(OpNo); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0e4f2b02adb4a..c14b8df1f3902 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3464,6 +3464,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, getNamedOperand(MI, AMDGPU::OpName::src2_modifiers); const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); + const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel); if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 && !IsLegacy && @@ -3574,6 +3575,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .add(*Src2) .addImm(Clamp ? Clamp->getImm() : 0) .addImm(Omod ? Omod->getImm() : 0); + if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel)) + MIB.addImm(OpSel ? OpSel->getImm() : 0); updateLiveVariables(LV, MI, *MIB); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 2671cc9e70ad9..6e13074aa38a1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2160,6 +2160,7 @@ class getAsmVOP3P { @@ -2182,7 +2183,7 @@ class getAsmVOP3OpSel _ArgVT, bit _EnableClamp = 0> { field Operand Src1ModSDWA = getSrcModSDWA.ret; + field bit IsMAI = 0; + field bit IsVOP3P = 0; + field bit IsDOT = 0; + field bit IsSingle = 0; + field bit IsWMMA = 0; + field bit HasDst = !ne(DstVT.Value, untyped.Value); field bit HasDst32 = HasDst; field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case @@ -2486,7 +2493,7 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field bit IsPacked = isPackedType.ret; field bit HasOpSel = IsPacked; - field bit HasOMod = !if(HasOpSel, 0, isFloatType.ret); + field bit HasOMod = !if(IsVOP3P, 0, isFloatType.ret); field bit HasSDWAOMod = isFloatType.ret; field bit HasModifiers = !or(isModifierType.ret, @@ -2508,12 +2515,6 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field bit HasExtSDWA9 = HasExtSDWA; field int NeedPatGen = PatGenMode.NoPattern; - field bit IsMAI = 0; - field bit IsVOP3P = 0; - field bit IsDOT = 0; - field bit IsSingle = 0; - field bit IsWMMA = 0; - field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -2574,6 +2575,7 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field string AsmVOP3P = getAsmVOP3P.ret; field string AsmVOP3OpSel = getAsmVOP3OpSel.ret; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 1ba50d9402a2b..8d4676e859555 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -424,9 +424,9 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); - let InsVOP3Base = getIns64, 3, + let InsVOP3Base = getInsVOP3Base, 3, 0, HasModifiers, HasModifiers, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret; // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); let InsVOPDXDeferred = @@ -473,6 +473,9 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v def VOP_MAC_F16 : VOP_MAC ; def VOP_MAC_F16_t16 : VOP_MAC { let IsTrue16 = 1; + let HasOpSel = 1; + let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod, + HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret; let DstRC = VOPDstOperand; let DstRC64 = VOPDstOperand; let Src1RC32 = VGPRSrc_32_Lo128; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index feeef35357e61..72aeb2e129b4c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -502,7 +502,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile, let HasSrc2 = 0; let HasSrc2Mods = 1; let AsmVOP3OpSel = !subst(", $src2_modifiers", "", - getAsmVOP3OpSel<3, HasClamp, + getAsmVOP3OpSel<3, HasClamp, HasOMod, HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret); let HasExtVOP3DPP = 0; @@ -774,7 +774,7 @@ class VOP3_DOT_Profile : VOP let InsVOP3OpSel = getInsVOP3OpSel.ret; - let AsmVOP3OpSel = getAsmVOP3OpSel.ret; + let AsmVOP3OpSel = getAsmVOP3OpSel.ret; } let SubtargetPredicate = isGFX11Plus in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir index 42fd589ec74b6..4a1883f827b30 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir @@ -21,7 +21,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %6 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -48,7 +48,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %6 %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/commute-vop3.mir b/llvm/test/CodeGen/AMDGPU/commute-vop3.mir index 271a87cab25e2..bea113e44adc0 100644 --- a/llvm/test/CodeGen/AMDGPU/commute-vop3.mir +++ b/llvm/test/CodeGen/AMDGPU/commute-vop3.mir @@ -17,8 +17,8 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GFX9-NEXT: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX9-NEXT: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec @@ -31,8 +31,8 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec - ; GFX10-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec ; GFX10-NEXT: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec @@ -44,8 +44,8 @@ body: | %3:vgpr_32 = V_XOR3_B32_e64 %0, %1, %2, implicit $exec %4:vgpr_32 = V_XOR3_B32_e64 %1, %0, %2, implicit $exec ; Insts with MayRaiseFPException do not get CSE - %5:vgpr_32 = V_MED3_F16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec - %6:vgpr_32 = V_MED3_F16_e64 0, %1, 0, %0, 0, %2, 0, 0, implicit $mode, implicit $exec + %5:vgpr_32 = V_MED3_F16_e64 0, %0, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec + %6:vgpr_32 = V_MED3_F16_e64 0, %1, 0, %0, 0, %2, 0, 0, 0, implicit $mode, implicit $exec %7:vgpr_32 = V_MAX3_I32_e64 %0, %1, %2, implicit $exec %8:vgpr_32 = V_MAX3_I32_e64 %1, %0, %2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll index 4e44a9177de13..b498d876e3762 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX11-DENORM %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't @@ -129,6 +129,7 @@ define amdgpu_kernel void @fmul_x2_xn3_f32(ptr addrspace(1) %out, float %x, floa ; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 ; VI-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 ; GFX10-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 +; GFX11-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 ; GFX10-FLUSH: v_sub_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { %x = bitcast i16 %x.arg to half @@ -155,6 +156,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16 ; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}} ; GFX10-FLUSH-DAG: v_add_f16_e32 [[MAD:v[0-9]+]], s{{[0-9]+}}, [[MUL2]] ; GFX10-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, s{{[0-9]+}} +; GFX11-DENORM-DAG: v_fmac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0 ; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]] ; GCN-DAG: buffer_store_{{short|b16}} [[MAD]] @@ -177,6 +179,7 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f16(ptr addrspace(1) %out, i16 ; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}} ; GFX10-FLUSH-DAG: v_add_f16_e32 [[MAD:v[0-9]+]], s{{[0-9]+}}, [[MUL2]] ; GFX10-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, s{{[0-9]+}} +; GFX11-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, s{{[0-9]+}} ; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]] ; GCN-DAG: buffer_store_{{short|b16}} [[MAD]] @@ -204,7 +207,9 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16 ; GFX10-FLUSH: v_add_f16_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[MUL2]] ; GFX10-FLUSH: v_add_f16_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[MUL2]] ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}} +; GFX11-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}} ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}} +; GFX11-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}} define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { %x = bitcast i16 %x.arg to half diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir index df75667e986f9..dba62081a4139 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir @@ -17,12 +17,12 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = COPY %0.sub0 %3 = V_MOV_B32_e32 1078523331, implicit $exec - %4 = V_FMAC_F16_t16_e64 0, killed %2, 0, %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec + %4 = V_FMAC_F16_t16_e64 0, killed %2, 0, %3, 0, killed %1, 0, 0, 0, implicit $mode, implicit $exec ... @@ -42,12 +42,12 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = COPY %0.sub0 %3 = V_MOV_B32_e32 1078523331, implicit $exec - %4 = V_FMAC_F16_t16_e64 0, %2, 0, killed %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec + %4 = V_FMAC_F16_t16_e64 0, %2, 0, killed %3, 0, killed %1, 0, 0, 0, implicit $mode, implicit $exec ... @@ -67,12 +67,12 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec %0 = IMPLICIT_DEF %1 = COPY %0.sub0 %2 = COPY %0.sub1 %3 = V_MOV_B32_e32 1078523331, implicit $exec - %4 = V_FMAC_F16_t16_e64 0, killed %1, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec + %4 = V_FMAC_F16_t16_e64 0, killed %1, 0, %2, 0, %3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -89,12 +89,12 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec - ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 %0:vgpr_32 = COPY killed $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec - %2:vgpr_32 = V_FMAC_F16_t16_e64 0, 16384, 0, killed %0, 0, %1, 0, 0, implicit $mode, implicit $exec + %2:vgpr_32 = V_FMAC_F16_t16_e64 0, 16384, 0, killed %0, 0, %1, 0, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll index fcc3d6ec73697..daae5a7e98e03 100644 --- a/llvm/test/CodeGen/AMDGPU/omod.ll +++ b/llvm/test/CodeGen/AMDGPU/omod.ll @@ -388,6 +388,33 @@ define amdgpu_ps void @v_omod_mul2_f32(float %a) #0 { ret void } +define amdgpu_ps void @v_omod_mul2_med3(float %x, float %y, float %z) #0 { +; SI-LABEL: v_omod_mul2_med3: +; SI: ; %bb.0: +; SI-NEXT: v_med3_f32 v0, v0, v1, v2 mul:2 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: v_omod_mul2_med3: +; VI: ; %bb.0: +; VI-NEXT: v_med3_f32 v0, v0, v1, v2 mul:2 +; VI-NEXT: flat_store_dword v[0:1], v0 +; VI-NEXT: s_endpgm +; +; GFX11-LABEL: v_omod_mul2_med3: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_med3_f32 v0, v0, v1, v2 mul:2 +; GFX11-NEXT: global_store_b32 v[0:1], v0, off +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm + %fmed3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z) + %div2 = fmul float %fmed3, 2.0 + store float %div2, float addrspace(1)* undef + ret void +} + define amdgpu_ps void @v_omod_mul2_f64(double %a) #5 { ; SI-LABEL: v_omod_mul2_f64: ; SI: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir index 0a30d60ae3124..825e4d8e2b31b 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir @@ -258,7 +258,7 @@ body: | ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -278,7 +278,7 @@ body: | ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -298,7 +298,7 @@ body: | ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -318,6 +318,6 @@ body: | ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 991ef34807e85..d236874de5669 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -1675,6 +1675,9 @@ v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp // GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 +// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] + v_div_fixup_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] @@ -2248,6 +2251,9 @@ v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp // GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 +// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + v_fma_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] @@ -3208,6 +3214,9 @@ v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp // GFX11: encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_max3_f16 v5, v255, s2, s105 mul:2 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] + v_max3_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] @@ -3874,6 +3883,9 @@ v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp // GFX11: encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 +// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] + v_med3_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] @@ -4144,6 +4156,9 @@ v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp // GFX11: encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 +// GFX11: encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] + v_min3_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s index d8fc1e8fce4d3..fab4042e70cdf 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s @@ -9282,6 +9282,9 @@ v_min3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] v_min3_f16 v5, v1, v2, v3 clamp // CHECK: [0x05,0x80,0xf4,0xd1,0x01,0x05,0x0e,0x04] +v_min3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 +// CHECK: [0x05,0x78,0xf4,0xd1,0x01,0x05,0x0e,0x0c] + v_min3_i16 v5, v1, v2, v3 // CHECK: [0x05,0x00,0xf5,0xd1,0x01,0x05,0x0e,0x04] @@ -9840,6 +9843,9 @@ v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] v_max3_f16 v5, v1, v2, v3 clamp // CHECK: [0x05,0x80,0xf7,0xd1,0x01,0x05,0x0e,0x04] +v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 +// CHECK: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x0c] + v_max3_i16 v5, v1, v2, v3 // CHECK: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04] @@ -10398,6 +10404,9 @@ v_med3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] v_med3_f16 v5, v1, v2, v3 clamp // CHECK: [0x05,0x80,0xfa,0xd1,0x01,0x05,0x0e,0x04] +v_med3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 +// CHECK: [0x05,0x78,0xfa,0xd1,0x01,0x05,0x0e,0x0c] + v_med3_i16 v5, v1, v2, v3 // CHECK: [0x05,0x00,0xfb,0xd1,0x01,0x05,0x0e,0x04] @@ -11982,6 +11991,9 @@ v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] v_mad_f16 v5, v1, v2, v3 clamp // CHECK: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] +v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:4 +// CHECK: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x14] + v_mad_u16 v5, v1, v2, v3 // CHECK: [0x05,0x00,0x04,0xd2,0x01,0x05,0x0e,0x04] @@ -12546,6 +12558,9 @@ v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] v_fma_f16 v5, v1, v2, v3 clamp // CHECK: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] +v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] clamp mul:2 +// CHECK: [0x05,0xf8,0x06,0xd2,0x01,0x05,0x0e,0x0c] + v_div_fixup_f16 v5, v1, v2, v3 // CHECK: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x04] @@ -12759,6 +12774,9 @@ v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // CHECK: [0x05,0x78,0x07,0xd2,0x01,0x05,0x0e,0x04] +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 +// CHECK: [0x05,0x10,0x07,0xd2,0xf0,0xf8,0xc0,0x4b] + v_div_fixup_f16 v5, v1, v2, v3 clamp // CHECK: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index afbda0e321aa1..1db00580d789c 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1479,6 +1479,9 @@ # GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# CHECK: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b + # GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 @@ -1920,6 +1923,9 @@ # GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# CHECK: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 + # GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 @@ -2803,6 +2809,9 @@ # GFX11: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# CHECK: v_max3_f16 v5, v255, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] +0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09 + # GFX11: v_max3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00 @@ -3469,6 +3478,9 @@ # GFX11: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# CHECK: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] +0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b + # GFX11: v_med3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00 @@ -3739,6 +3751,9 @@ # GFX11: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# CHECK: v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] +0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11 + # GFX11: v_min3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt index af8e0df1dee14..e3ed9778f6fb4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt @@ -16230,6 +16230,9 @@ # CHECK: v_min3_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xf4,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x80,0xf4,0xd1,0x01,0x05,0x0e,0x04 +# CHECK: v_min3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 ; encoding: [0x05,0x78,0xf4,0xd1,0x01,0x05,0x0e,0x0c] +0x05,0x78,0xf4,0xd1,0x01,0x05,0x0e,0x0c + # CHECK: v_min3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf5,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x00,0xf5,0xd1,0x01,0x05,0x0e,0x04 @@ -16698,6 +16701,9 @@ # CHECK: v_max3_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xf7,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x80,0xf7,0xd1,0x01,0x05,0x0e,0x04 +# CHECK: v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 ; encoding: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x0c] +0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x0c + # CHECK: v_max3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04 @@ -17166,6 +17172,9 @@ # CHECK: v_med3_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xfa,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x80,0xfa,0xd1,0x01,0x05,0x0e,0x04 +# CHECK: v_med3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 ; encoding: [0x05,0x78,0xfa,0xd1,0x01,0x05,0x0e,0x0c] +0x05,0x78,0xfa,0xd1,0x01,0x05,0x0e,0x0c + # CHECK: v_med3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xfb,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x00,0xfb,0xd1,0x01,0x05,0x0e,0x04 @@ -19074,6 +19083,9 @@ # CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04 +# CHECK: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x07,0xd2,0xf0,0xf8,0xc0,0x4b] +0x05,0x10,0x07,0xd2,0xf0,0xf8,0xc0,0x4b + # CHECK: v_interp_p1ll_f16 v5, v2, attr0.x ; encoding: [0x05,0x00,0x74,0xd2,0x00,0x04,0x02,0x00] 0x05,0x00,0x74,0xd2,0x00,0x04,0x02,0x00 @@ -22206,6 +22218,9 @@ # CHECK: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04 +# CHECK: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] clamp mul:2 ; encoding: [0x05,0xf8,0x06,0xd2,0x01,0x05,0x0e,0x0c] +0x05,0xf8,0x06,0xd2,0x01,0x05,0x0e,0x0c + # CHECK: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04