diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index d48916670112c..69dc78d33c838 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -29,6 +29,8 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) { case AMDGPU::G_FMAXNUM: case AMDGPU::G_FMINNUM_IEEE: case AMDGPU::G_FMAXNUM_IEEE: + case AMDGPU::G_FMINIMUM: + case AMDGPU::G_FMAXIMUM: case AMDGPU::G_FSIN: case AMDGPU::G_FPEXT: case AMDGPU::G_INTRINSIC_TRUNC: @@ -174,6 +176,10 @@ static unsigned inverseMinMax(unsigned Opc) { return AMDGPU::G_FMINNUM_IEEE; case AMDGPU::G_FMINNUM_IEEE: return AMDGPU::G_FMAXNUM_IEEE; + case AMDGPU::G_FMAXIMUM: + return AMDGPU::G_FMINIMUM; + case AMDGPU::G_FMINIMUM: + return AMDGPU::G_FMAXIMUM; case AMDGPU::G_AMDGPU_FMAX_LEGACY: return AMDGPU::G_AMDGPU_FMIN_LEGACY; case AMDGPU::G_AMDGPU_FMIN_LEGACY: @@ -207,6 +213,8 @@ bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI, case AMDGPU::G_FMAXNUM: case AMDGPU::G_FMINNUM_IEEE: case AMDGPU::G_FMAXNUM_IEEE: + case AMDGPU::G_FMINIMUM: + case AMDGPU::G_FMAXIMUM: case AMDGPU::G_AMDGPU_FMIN_LEGACY: case AMDGPU::G_AMDGPU_FMAX_LEGACY: // 0 doesn't have a negated inline immediate. @@ -304,6 +312,8 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, case AMDGPU::G_FMAXNUM: case AMDGPU::G_FMINNUM_IEEE: case AMDGPU::G_FMAXNUM_IEEE: + case AMDGPU::G_FMINIMUM: + case AMDGPU::G_FMAXIMUM: case AMDGPU::G_AMDGPU_FMIN_LEGACY: case AMDGPU::G_AMDGPU_FMAX_LEGACY: { NegateOperand(MatchInfo->getOperand(1)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index fcbdf51b03c1f..9d7443012e3da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -585,6 +585,8 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) { case ISD::FMAXNUM: case ISD::FMINNUM_IEEE: case ISD::FMAXNUM_IEEE: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case ISD::SELECT: case ISD::FSIN: case ISD::FTRUNC: @@ -4572,6 +4574,10 @@ static unsigned inverseMinMax(unsigned Opc) { return ISD::FMINNUM_IEEE; case ISD::FMINNUM_IEEE: return ISD::FMAXNUM_IEEE; + case ISD::FMAXIMUM: + return ISD::FMINIMUM; + case ISD::FMINIMUM: + return ISD::FMAXIMUM; case AMDGPUISD::FMAX_LEGACY: return AMDGPUISD::FMIN_LEGACY; case AMDGPUISD::FMIN_LEGACY: @@ -4695,6 +4701,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, case ISD::FMINNUM: case ISD::FMAXNUM_IEEE: case ISD::FMINNUM_IEEE: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case AMDGPUISD::FMAX_LEGACY: case AMDGPUISD::FMIN_LEGACY: { // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) @@ -5305,6 +5313,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FMED3) NODE_NAME_CASE(SMED3) NODE_NAME_CASE(UMED3) + NODE_NAME_CASE(FMAXIMUM3) + NODE_NAME_CASE(FMINIMUM3) NODE_NAME_CASE(FDOT2) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DIV_SCALE) @@ -5759,6 +5769,8 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, case AMDGPUISD::FMED3: case AMDGPUISD::FMIN3: case AMDGPUISD::FMAX3: + case AMDGPUISD::FMINIMUM3: + case AMDGPUISD::FMAXIMUM3: case AMDGPUISD::FMAD_FTZ: { if (SNaN) return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 6841067e31b3b..827fb106b5519 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -449,6 +449,8 @@ enum NodeType : unsigned { FMED3, SMED3, UMED3, + FMAXIMUM3, + FMINIMUM3, FDOT2, URECIP, DIV_SCALE, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index fd38739876c4d..82f58ea38fd0a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -170,6 +170,11 @@ def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp, [/*SDNPCommutative, SDNPAssociative*/] >; +// out = max(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant. +def AMDGPUfmaximum3 : SDNode<"AMDGPUISD::FMAXIMUM3", SDTFPTernaryOp, + [/*SDNPCommutative, SDNPAssociative*/] +>; + // out = max(a, b, c) a, b, and c are signed ints def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp, [/*SDNPCommutative, SDNPAssociative*/] @@ -185,6 +190,11 @@ def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp, [/*SDNPCommutative, SDNPAssociative*/] >; +// out = min(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant. +def AMDGPUfminimum3 : SDNode<"AMDGPUISD::FMINIMUM3", SDTFPTernaryOp, + [/*SDNPCommutative, SDNPAssociative*/] +>; + // out = min(a, b, c) a, b and c are signed ints def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp, [/*SDNPCommutative, SDNPAssociative*/] diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index d2d09a0b1fc54..121026aca6035 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -252,6 +252,8 @@ def umin_oneuse : HasOneUseBinOp; def fminnum_oneuse : HasOneUseBinOp; def fmaxnum_oneuse : HasOneUseBinOp; +def fminimum_oneuse : HasOneUseBinOp; +def fmaximum_oneuse : HasOneUseBinOp; def fminnum_ieee_oneuse : HasOneUseBinOp; def fmaxnum_ieee_oneuse : HasOneUseBinOp; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 2cf804e3348e8..489b4f5a8d86a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1959,20 +1959,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .widenScalarToNextPow2(0) .scalarize(0); - getActionDefinitionsBuilder({ - // TODO: Verify V_BFI_B32 is generated from expanded bit ops - G_FCOPYSIGN, + getActionDefinitionsBuilder( + {// TODO: Verify V_BFI_B32 is generated from expanded bit ops + G_FCOPYSIGN, - G_ATOMIC_CMPXCHG_WITH_SUCCESS, - G_ATOMICRMW_NAND, - G_ATOMICRMW_FSUB, - G_READ_REGISTER, - G_WRITE_REGISTER, + G_ATOMIC_CMPXCHG_WITH_SUCCESS, G_ATOMICRMW_NAND, G_ATOMICRMW_FSUB, + G_READ_REGISTER, G_WRITE_REGISTER, - G_SADDO, G_SSUBO, + G_SADDO, G_SSUBO}) + .lower(); - // TODO: Implement - G_FMINIMUM, G_FMAXIMUM}).lower(); + if (ST.hasIEEEMinMax()) { + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}) + .legalFor(FPTypesPK16) + .clampMaxNumElements(0, S16, 2) + .scalarize(0); + } else { + // TODO: Implement + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); + } getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET}) .lower(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 62996a3b3fb79..df8e0c9400678 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3727,14 +3727,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_INTRINSIC_ROUNDEVEN: case AMDGPU::G_FMINNUM: case AMDGPU::G_FMAXNUM: + case AMDGPU::G_FMINIMUM: + case AMDGPU::G_FMAXIMUM: case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_STRICT_FADD: case AMDGPU::G_STRICT_FSUB: case AMDGPU::G_STRICT_FMUL: case AMDGPU::G_STRICT_FMA: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if (Subtarget.hasSALUFloatInsts() && (Size == 32 || Size == 16) && - isSALUMapping(MI)) + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = Ty.getSizeInBits(); + if (Subtarget.hasSALUFloatInsts() && Ty.isScalar() && + (Size == 32 || Size == 16) && isSALUMapping(MI)) return getDefaultMappingSOP(MI); return getDefaultMappingVOP(MI); } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 2b74a6f34708a..ac864325230f8 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1218,6 +1218,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // \returns true if the target has IEEE kernel descriptor mode bit bool hasIEEEMode() const { return getGeneration() < GFX12; } + // \returns true if the target has IEEE fminimum/fmaximum instructions + bool hasIEEEMinMax() const { return getGeneration() >= GFX12; } + // \returns true if the target has WG_RR_MODE kernel descriptor mode bit bool hasRrWGMode() const { return getGeneration() >= GFX12; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 85cc3cfec19cd..f37f2d1a4b688 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -763,6 +763,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, if (Subtarget->hasMad64_32()) setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom); + if (Subtarget->hasIEEEMinMax()) + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, + {MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16, MVT::v2i16, MVT::v2f16, MVT::i128}, @@ -800,6 +804,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::FMAXNUM, ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, + ISD::FMINIMUM, + ISD::FMAXIMUM, ISD::FMA, ISD::SMIN, ISD::SMAX, @@ -11786,10 +11792,14 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, case ISD::FMAXNUM: case ISD::FMINNUM_IEEE: case ISD::FMAXNUM_IEEE: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: case AMDGPUISD::CLAMP: case AMDGPUISD::FMED3: case AMDGPUISD::FMAX3: - case AMDGPUISD::FMIN3: { + case AMDGPUISD::FMIN3: + case AMDGPUISD::FMAXIMUM3: + case AMDGPUISD::FMINIMUM3: { // FIXME: Shouldn't treat the generic operations different based these. // However, we aren't really required to flush the result from // minnum/maxnum.. @@ -11943,7 +11953,9 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF, case AMDGPU::G_FMINNUM: case AMDGPU::G_FMAXNUM: case AMDGPU::G_FMINNUM_IEEE: - case AMDGPU::G_FMAXNUM_IEEE: { + case AMDGPU::G_FMAXNUM_IEEE: + case AMDGPU::G_FMINIMUM: + case AMDGPU::G_FMAXIMUM: { if (Subtarget->supportsMinMaxDenormModes() || // FIXME: denormalsEnabledForType is broken for dynamic denormalsEnabledForType(MRI.getType(Reg), MF)) @@ -12131,6 +12143,8 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { case ISD::FMAXNUM: case ISD::FMAXNUM_IEEE: return AMDGPUISD::FMAX3; + case ISD::FMAXIMUM: + return AMDGPUISD::FMAXIMUM3; case ISD::SMAX: return AMDGPUISD::SMAX3; case ISD::UMAX: @@ -12138,6 +12152,8 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { case ISD::FMINNUM: case ISD::FMINNUM_IEEE: return AMDGPUISD::FMIN3; + case ISD::FMINIMUM: + return AMDGPUISD::FMINIMUM3; case ISD::SMIN: return AMDGPUISD::SMIN3; case ISD::UMIN: @@ -12497,7 +12513,9 @@ SDValue SITargetLowering::performExtractVectorEltCombine( case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::FMAXNUM_IEEE: - case ISD::FMINNUM_IEEE: { + case ISD::FMINNUM_IEEE: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: { SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(0), Idx); SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, @@ -13759,6 +13777,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case ISD::FMINNUM: case ISD::FMAXNUM_IEEE: case ISD::FMINNUM_IEEE: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d4e4526795f3b..8a226a321af09 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5255,11 +5255,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64; case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64; case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64; + case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64; + case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64; case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64; case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64; case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64; case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64; case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64; + case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64; + case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64; case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64; case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64; case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64; @@ -7101,6 +7105,26 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, Inst.eraseFromParent(); return; } + case AMDGPU::S_MINIMUM_F32: + case AMDGPU::S_MAXIMUM_F32: + case AMDGPU::S_MINIMUM_F16: + case AMDGPU::S_MAXIMUM_F16: { + const DebugLoc &DL = Inst.getDebugLoc(); + Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst) + .addImm(0) // src0_modifiers + .add(Inst.getOperand(1)) + .addImm(0) // src1_modifiers + .add(Inst.getOperand(2)) + .addImm(0) // clamp + .addImm(0); // omod + MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst); + + legalizeOperands(*NewInstr, MDT); + addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist); + Inst.eraseFromParent(); + return; + } } if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 9362fe5d9678b..f9bc623abcd04 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3441,6 +3441,12 @@ defm : Int16Med3Pat; defm : Int16Med3Pat; } // End Predicates = [isGFX9Plus] +let OtherPredicates = [isGFX12Plus] in { +def : FPMinMaxPat, fminimum_oneuse>; +def : FPMinMaxPat, fmaximum_oneuse>; +def : FPMinMaxPat, fminimum_oneuse>; +def : FPMinMaxPat, fmaximum_oneuse>; +} // Convert a floating-point power of 2 to the integer exponent. def FPPow2ToExponentXForm : SDNodeXForm; + def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", fmaximum>; + def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", fminimum>; + def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", fmaximum>; +} + //===----------------------------------------------------------------------===// // SOPK Instructions //===----------------------------------------------------------------------===// @@ -2017,6 +2026,10 @@ defm S_MIN_NUM_F32 : SOP2_Real_Renamed_gfx12<0x042, S_MIN_F32, "s_min_num_f32">; defm S_MAX_NUM_F32 : SOP2_Real_Renamed_gfx12<0x043, S_MAX_F32, "s_max_num_f32">; defm S_MIN_NUM_F16 : SOP2_Real_Renamed_gfx12<0x04b, S_MIN_F16, "s_min_num_f16">; defm S_MAX_NUM_F16 : SOP2_Real_Renamed_gfx12<0x04c, S_MAX_F16, "s_max_num_f16">; +defm S_MINIMUM_F32 : SOP2_Real_gfx12<0x04f>; +defm S_MAXIMUM_F32 : SOP2_Real_gfx12<0x050>; +defm S_MINIMUM_F16 : SOP2_Real_gfx12<0x051>; +defm S_MAXIMUM_F16 : SOP2_Real_gfx12<0x052>; defm S_ADD_CO_U32 : SOP2_Real_Renamed_gfx12<0x000, S_ADD_U32, "s_add_co_u32">; defm S_SUB_CO_U32 : SOP2_Real_Renamed_gfx12<0x001, S_SUB_U32, "s_sub_co_u32">; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 617773b34ae98..685c9ac6a2be4 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -161,6 +161,19 @@ defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF, mulhu defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF>; defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF, mulhs>; } // End SchedRW = [WriteIntMul] + +let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { +defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile, DivergentBinFrag>; +defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile, DivergentBinFrag>; +defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile, DivergentBinFrag>; +defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile, DivergentBinFrag>; + +let SchedRW = [WriteDoubleAdd] in { +defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile, fminimum>; +defm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile, fmaximum>; +} // End SchedRW = [WriteDoubleAdd] +} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 + } // End isReMaterializable = 1 let Uses = [MODE, VCC, EXEC] in { @@ -211,6 +224,11 @@ let mayRaiseFPException = 0 in { defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile, AMDGPUfmed3>; } // End mayRaiseFPException = 0 +let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { + defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile, AMDGPUfminimum3>; + defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile, AMDGPUfmaximum3>; +} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 + let isCommutable = 1 in { defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile>; defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile>; @@ -555,6 +573,11 @@ defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile, AMDGPUsmax3>; defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile, AMDGPUumax3>; +let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { + defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile, AMDGPUfminimum3>; + defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile, AMDGPUfmaximum3>; +} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 + defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile>; defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile>; @@ -805,6 +828,13 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile>; } // End SubtargetPredicate = isGFX11Plus +let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { + defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile>; + defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile>; + defm V_MAXIMUMMINIMUM_F16 : VOP3Inst<"v_maximumminimum_f16", VOP3_Profile>; + defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile>; +} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 + let SubtargetPredicate = HasDot9Insts, IsDOT=1 in { defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile, int_amdgcn_fdot2_f16_f16>; defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile, int_amdgcn_fdot2_bf16_bf16>; @@ -862,14 +892,28 @@ defm V_MIN3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x229, "V_MIN3_F32", defm V_MAX3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x22a, "V_MAX3_F32", "v_max3_num_f32">; defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16", "v_min3_num_f16">; defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">; +defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>; +defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>; +defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>; +defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>; defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">; defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">; defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">; defm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">; defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">; defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">; +defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>; +defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>; +defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>; +defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>; defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">; defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">; +defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>; +defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>; +defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>; +defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>; +defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>; +defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>; //===----------------------------------------------------------------------===// // GFX11, GFX12 diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index eb8990238268d..d3cefb339d9e7 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -108,6 +108,11 @@ defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3P_Profile, umin>; defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile, smax>; defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile, umax>; + +let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { +defm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile, fmaximum>; +defm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile, fminimum>; +} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 } defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile>; @@ -1003,6 +1008,9 @@ multiclass VOP3P_Real_with_name_gfx12 op, defm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">; defm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">; +defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>; +defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>; + //===----------------------------------------------------------------------===// // GFX11 //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll new file mode 100644 index 0000000000000..4fe2a4ad2a2fb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll @@ -0,0 +1,311 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL %s + +define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) { +; GCN-LABEL: test_fmaximum_f32_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f32 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call float @llvm.maximum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps float @test_fmaximum_f32_ss(float inreg %a, float inreg %b) { +; GCN-LABEL: test_fmaximum_f32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_maximum_f32 s0, s0, s1 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call float @llvm.maximum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps float @test_fmaximum_f32_vs(float %a, float inreg %b) { +; GCN-LABEL: test_fmaximum_f32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f32 v0, v0, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call float @llvm.maximum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps float @test_fmaximum_nnan_f32(float %a, float %b) { +; GCN-LABEL: test_fmaximum_nnan_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f32 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call nnan float @llvm.maximum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps <2 x float> @test_fmaximum_v2f32(<2 x float> %a, <2 x float> %b) { +; GCN-LABEL: test_fmaximum_v2f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f32 v0, v0, v2 +; GCN-NEXT: v_maximum_f32 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %val +} + +define amdgpu_ps <2 x float> @test_fmaximum_v2f32_ss(<2 x float> inreg %a, <2 x float> inreg %b) { +; GCN-LABEL: test_fmaximum_v2f32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_maximum_f32 s0, s0, s2 +; GCN-NEXT: s_maximum_f32 s1, s1, s3 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %val +} + +define amdgpu_ps <3 x float> @test_fmaximum_v3f32(<3 x float> %a, <3 x float> %b) { +; GCN-LABEL: test_fmaximum_v3f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f32 v0, v0, v3 +; GCN-NEXT: v_maximum_f32 v1, v1, v4 +; GCN-NEXT: v_maximum_f32 v2, v2, v5 +; GCN-NEXT: ; return to shader part epilog + %val = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) + ret <3 x float> %val +} + +define amdgpu_ps <4 x float> @test_fmaximum_v4f32(<4 x float> %a, <4 x float> %b) { +; GCN-LABEL: test_fmaximum_v4f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f32 v0, v0, v4 +; GCN-NEXT: v_maximum_f32 v1, v1, v5 +; GCN-NEXT: v_maximum_f32 v2, v2, v6 +; GCN-NEXT: v_maximum_f32 v3, v3, v7 +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %val +} + +define amdgpu_ps <16 x float> @test_fmaximum_v16f32(<16 x float> %a, <16 x float> %b) { +; GCN-LABEL: test_fmaximum_v16f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f32 v0, v0, v16 +; GCN-NEXT: v_maximum_f32 v1, v1, v17 +; GCN-NEXT: v_maximum_f32 v2, v2, v18 +; GCN-NEXT: v_maximum_f32 v3, v3, v19 +; GCN-NEXT: v_maximum_f32 v4, v4, v20 +; GCN-NEXT: v_maximum_f32 v5, v5, v21 +; GCN-NEXT: v_maximum_f32 v6, v6, v22 +; GCN-NEXT: v_maximum_f32 v7, v7, v23 +; GCN-NEXT: v_maximum_f32 v8, v8, v24 +; GCN-NEXT: v_maximum_f32 v9, v9, v25 +; GCN-NEXT: v_maximum_f32 v10, v10, v26 +; GCN-NEXT: v_maximum_f32 v11, v11, v27 +; GCN-NEXT: v_maximum_f32 v12, v12, v28 +; GCN-NEXT: v_maximum_f32 v13, v13, v29 +; GCN-NEXT: v_maximum_f32 v14, v14, v30 +; GCN-NEXT: v_maximum_f32 v15, v15, v31 +; GCN-NEXT: ; return to shader part epilog + %val = call <16 x float> @llvm.maximum.v16f32(<16 x float> %a, <16 x float> %b) + ret <16 x float> %val +} + +define amdgpu_ps half @test_fmaximum_f16_vv(half %a, half %b) { +; GCN-LABEL: test_fmaximum_f16_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f16 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call half @llvm.maximum.f16(half %a, half %b) + ret half %val +} + +define amdgpu_ps half @test_fmaximum_f16_ss(half inreg %a, half inreg %b) { +; GCN-LABEL: test_fmaximum_f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_maximum_f16 s0, s0, s1 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call half @llvm.maximum.f16(half %a, half %b) + ret half %val +} + +define amdgpu_ps <2 x half> @test_fmaximum_v2f16_vv(<2 x half> %a, <2 x half> %b) { +; GCN-LABEL: test_fmaximum_v2f16_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %val +} + +define amdgpu_ps <2 x half> @test_fmaximum_v2f16_ss(<2 x half> inreg %a, <2 x half> inreg %b) { +; GCN-LABEL: test_fmaximum_v2f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_maximum_f16 v0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %val +} + +define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b) { +; GCN-LABEL: test_fmaximum_v3f16_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GCN-NEXT: v_maximum_f16 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %val = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b) + ret <3 x half> %val +} + +define amdgpu_ps <3 x half> @test_fmaximum_v3f16_ss(<3 x half> inreg %a, <3 x half> inreg %b) { +; GCN-LABEL: test_fmaximum_v3f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_maximum_f16 v0, s0, s2 +; GCN-NEXT: s_maximum_f16 s0, s1, s3 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b) + ret <3 x half> %val +} + +define amdgpu_ps <4 x half> @test_fmaximum_v4f16(<4 x half> %a, <4 x half> %b) { +; GCN-LABEL: test_fmaximum_v4f16: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GCN-NEXT: v_pk_maximum_f16 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %val +} + +define amdgpu_ps <4 x half> @test_fmaximum_v4f16_ss(<4 x half> inreg %a, <4 x half> inreg %b) { +; GCN-LABEL: test_fmaximum_v4f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_maximum_f16 v0, s0, s2 +; GCN-NEXT: v_pk_maximum_f16 v1, s1, s3 +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %val +} + +define amdgpu_ps <2 x float> @test_fmaximum_f64_vv(double %a, double %b) { +; GCN-LABEL: test_fmaximum_f64_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GCN-NEXT: ; return to shader part epilog + %val = call double @llvm.maximum.f64(double %a, double %b) + %ret = bitcast double %val to <2 x float> + ret <2 x float> %ret +} + +define amdgpu_ps <2 x float> @test_fmaximum_f64_ss(double inreg %a, double inreg %b) { +; GCN-LABEL: test_fmaximum_f64_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3] +; GCN-NEXT: ; return to shader part epilog + %val = call double @llvm.maximum.f64(double %a, double %b) + %ret = bitcast double %val to <2 x float> + ret <2 x float> %ret +} + +define amdgpu_ps <4 x float> @test_fmaximum_v2f64_ss(<2 x double> inreg %a, <2 x double> inreg %b) { +; GCN-LABEL: test_fmaximum_v2f64_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5] +; GCN-NEXT: v_maximum_f64 v[2:3], s[2:3], s[6:7] +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) + %ret = bitcast <2 x double> %val to <4 x float> + ret <4 x float> %ret +} + +define amdgpu_ps <8 x float> @test_fmaximum_v4f64(<4 x double> %a, <4 x double> %b) { +; GCN-LABEL: test_fmaximum_v4f64: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9] +; GCN-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11] +; GCN-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13] +; GCN-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15] +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b) + %ret = bitcast <4 x double> %val to <8 x float> + ret <8 x float> %ret +} + +define amdgpu_ps <8 x float> @test_fmaximum_v4f64_ss(<4 x double> inreg %a, <4 x double> inreg %b) { +; GCN-LABEL: test_fmaximum_v4f64_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9] +; GCN-NEXT: v_maximum_f64 v[2:3], s[2:3], s[10:11] +; GCN-NEXT: v_maximum_f64 v[4:5], s[4:5], s[12:13] +; GCN-NEXT: v_maximum_f64 v[6:7], s[6:7], s[14:15] +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b) + %ret = bitcast <4 x double> %val to <8 x float> + ret <8 x float> %ret +} + +define amdgpu_kernel void @fmaximumi_f32_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) { +; GCN-LABEL: fmaximumi_f32_move_to_valu: +; GCN: ; %bb.0: +; GCN-NEXT: s_clause 0x1 +; GCN-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: global_load_b32 v1, v0, s[6:7] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: global_load_b32 v2, v0, s[0:1] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_maximum_f32 v1, v1, v2 +; GCN-NEXT: global_store_b32 v0, v1, s[4:5] +; GCN-NEXT: s_nop 0 +; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GCN-NEXT: s_endpgm + %a = load volatile float, ptr addrspace(1) %aptr, align 4 + %b = load volatile float, ptr addrspace(1) %bptr, align 4 + %v = call float @llvm.maximum.f32(float %a, float %b) + store float %v, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) { +; GCN-LABEL: fmaximum_f16_move_to_valu: +; GCN: ; %bb.0: +; GCN-NEXT: s_clause 0x1 +; GCN-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: global_load_u16 v1, v0, s[6:7] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: global_load_u16 v2, v0, s[0:1] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_maximum_f16 v1, v1, v2 +; GCN-NEXT: global_store_b16 v0, v1, s[4:5] +; GCN-NEXT: s_nop 0 +; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GCN-NEXT: s_endpgm + %a = load volatile half, ptr addrspace(1) %aptr, align 4 + %b = load volatile half, ptr addrspace(1) %bptr, align 4 + %v = call half @llvm.maximum.f16(half %a, half %b) + store half %v, ptr addrspace(1) %out, align 4 + ret void +} + +declare float @llvm.maximum.f32(float, float) +declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) +declare <3 x float> @llvm.maximum.v3f32(<3 x float>, <3 x float>) +declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) +declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) +declare half @llvm.maximum.f16(half, half) +declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) +declare <3 x half> @llvm.maximum.v3f16(<3 x half>, <3 x half>) +declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>) +declare double @llvm.maximum.f64(double, double) +declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX12-GISEL: {{.*}} +; GFX12-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll new file mode 100644 index 0000000000000..2d057e6feac41 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}test_fmaximum3_olt_0_f32: +; GCN: buffer_load_b32 [[REGC:v[0-9]+]] +; GCN: buffer_load_b32 [[REGB:v[0-9]+]] +; GCN: buffer_load_b32 [[REGA:v[0-9]+]] +; GCN: v_maximum3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_b32 [[RESULT]], +define amdgpu_kernel void @test_fmaximum3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile float, ptr addrspace(1) %aptr, align 4 + %b = load volatile float, ptr addrspace(1) %bptr, align 4 + %c = load volatile float, ptr addrspace(1) %cptr, align 4 + %f0 = call float @llvm.maximum.f32(float %a, float %b) + %f1 = call float @llvm.maximum.f32(float %f0, float %c) + store float %f1, ptr addrspace(1) %out, align 4 + ret void +} + +; Commute operand of second fmaximum +; GCN-LABEL: {{^}}test_fmaximum3_olt_1_f32: +; GCN: buffer_load_b32 [[REGB:v[0-9]+]] +; GCN: buffer_load_b32 [[REGA:v[0-9]+]] +; GCN: buffer_load_b32 [[REGC:v[0-9]+]] +; GCN: v_maximum3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_b32 [[RESULT]], +define amdgpu_kernel void @test_fmaximum3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile float, ptr addrspace(1) %aptr, align 4 + %b = load volatile float, ptr addrspace(1) %bptr, align 4 + %c = load volatile float, ptr addrspace(1) %cptr, align 4 + %f0 = call float @llvm.maximum.f32(float %a, float %b) + %f1 = call float @llvm.maximum.f32(float %c, float %f0) + store float %f1, ptr addrspace(1) %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_fmaximum3_olt_0_f16: +; GCN: buffer_load_u16 [[REGC:v[0-9]+]] +; GCN: buffer_load_u16 [[REGB:v[0-9]+]] +; GCN: buffer_load_u16 [[REGA:v[0-9]+]] +; GCN: v_maximum3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_b16 [[RESULT]], +define amdgpu_kernel void @test_fmaximum3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile half, ptr addrspace(1) %aptr, align 2 + %b = load volatile half, ptr addrspace(1) %bptr, align 2 + %c = load volatile half, ptr addrspace(1) %cptr, align 2 + %f0 = call half @llvm.maximum.f16(half %a, half %b) + %f1 = call half @llvm.maximum.f16(half %f0, half %c) + store half %f1, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}test_fmaximum3_olt_1_f16: +; GCN: buffer_load_u16 [[REGA:v[0-9]+]] +; GCN: buffer_load_u16 [[REGB:v[0-9]+]] +; GCN: buffer_load_u16 [[REGC:v[0-9]+]] +; GCN: v_maximum3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]] +; GCN: buffer_store_b16 [[RESULT]], +define amdgpu_kernel void @test_fmaximum3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile half, ptr addrspace(1) %aptr, align 2 + %b = load volatile half, ptr addrspace(1) %bptr, align 2 + %c = load volatile half, ptr addrspace(1) %cptr, align 2 + %f0 = call half @llvm.maximum.f16(half %a, half %b) + %f1 = call half @llvm.maximum.f16(half %c, half %f0) + store half %f1, ptr addrspace(1) %out, align 2 + ret void +} + +; Checks whether the test passes; performMinMaxCombine() should not optimize vector patterns of maximum3 +; since there are no pack instructions for fmaximum3. +; GCN-LABEL: {{^}}no_fmaximum3_v2f16: +; GCN: v_pk_maximum_f16 v0, v0, v1 +; GCN: v_pk_maximum_f16 v0, v2, v0 +; GCN: v_pk_maximum_f16 v0, v0, v3 +; GCN-NEXT: s_setpc_b64 +define <2 x half> @no_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) { +entry: + %max = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) + %max1 = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %max) + %res = call <2 x half> @llvm.maximum.v2f16(<2 x half> %max1, <2 x half> %d) + ret <2 x half> %res +} + +; GCN-LABEL: {{^}}no_fmaximum3_olt_0_f64: +; GCN-COUNT-2: v_maximum_f64 +define amdgpu_kernel void @no_fmaximum3_olt_0_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile double, ptr addrspace(1) %aptr, align 4 + %b = load volatile double, ptr addrspace(1) %bptr, align 4 + %c = load volatile double, ptr addrspace(1) %cptr, align 4 + %f0 = call double @llvm.maximum.f64(double %a, double %b) + %f1 = call double @llvm.maximum.f64(double %f0, double %c) + store double %f1, ptr addrspace(1) %out, align 4 + ret void +} + +declare double @llvm.maximum.f64(double, double) +declare float @llvm.maximum.f32(float, float) +declare half @llvm.maximum.f16(half, half) +declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll new file mode 100644 index 0000000000000..b63a4fa40b591 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll @@ -0,0 +1,311 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL %s + +define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) { +; GCN-LABEL: test_fminimum_f32_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f32 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call float @llvm.minimum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps float @test_fminimum_f32_ss(float inreg %a, float inreg %b) { +; GCN-LABEL: test_fminimum_f32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_minimum_f32 s0, s0, s1 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call float @llvm.minimum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps float @test_fminimum_f32_vs(float %a, float inreg %b) { +; GCN-LABEL: test_fminimum_f32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f32 v0, v0, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call float @llvm.minimum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps float @test_fminimum_nnan_f32(float %a, float %b) { +; GCN-LABEL: test_fminimum_nnan_f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f32 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call nnan float @llvm.minimum.f32(float %a, float %b) + ret float %val +} + +define amdgpu_ps <2 x float> @test_fminimum_v2f32(<2 x float> %a, <2 x float> %b) { +; GCN-LABEL: test_fminimum_v2f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f32 v0, v0, v2 +; GCN-NEXT: v_minimum_f32 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %val +} + +define amdgpu_ps <2 x float> @test_fminimum_v2f32_ss(<2 x float> inreg %a, <2 x float> inreg %b) { +; GCN-LABEL: test_fminimum_v2f32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_minimum_f32 s0, s0, s2 +; GCN-NEXT: s_minimum_f32 s1, s1, s3 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %val +} + +define amdgpu_ps <3 x float> @test_fminimum_v3f32(<3 x float> %a, <3 x float> %b) { +; GCN-LABEL: test_fminimum_v3f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f32 v0, v0, v3 +; GCN-NEXT: v_minimum_f32 v1, v1, v4 +; GCN-NEXT: v_minimum_f32 v2, v2, v5 +; GCN-NEXT: ; return to shader part epilog + %val = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) + ret <3 x float> %val +} + +define amdgpu_ps <4 x float> @test_fminimum_v4f32(<4 x float> %a, <4 x float> %b) { +; GCN-LABEL: test_fminimum_v4f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f32 v0, v0, v4 +; GCN-NEXT: v_minimum_f32 v1, v1, v5 +; GCN-NEXT: v_minimum_f32 v2, v2, v6 +; GCN-NEXT: v_minimum_f32 v3, v3, v7 +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %val +} + +define amdgpu_ps <16 x float> @test_fminimum_v16f32(<16 x float> %a, <16 x float> %b) { +; GCN-LABEL: test_fminimum_v16f32: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f32 v0, v0, v16 +; GCN-NEXT: v_minimum_f32 v1, v1, v17 +; GCN-NEXT: v_minimum_f32 v2, v2, v18 +; GCN-NEXT: v_minimum_f32 v3, v3, v19 +; GCN-NEXT: v_minimum_f32 v4, v4, v20 +; GCN-NEXT: v_minimum_f32 v5, v5, v21 +; GCN-NEXT: v_minimum_f32 v6, v6, v22 +; GCN-NEXT: v_minimum_f32 v7, v7, v23 +; GCN-NEXT: v_minimum_f32 v8, v8, v24 +; GCN-NEXT: v_minimum_f32 v9, v9, v25 +; GCN-NEXT: v_minimum_f32 v10, v10, v26 +; GCN-NEXT: v_minimum_f32 v11, v11, v27 +; GCN-NEXT: v_minimum_f32 v12, v12, v28 +; GCN-NEXT: v_minimum_f32 v13, v13, v29 +; GCN-NEXT: v_minimum_f32 v14, v14, v30 +; GCN-NEXT: v_minimum_f32 v15, v15, v31 +; GCN-NEXT: ; return to shader part epilog + %val = call <16 x float> @llvm.minimum.v16f32(<16 x float> %a, <16 x float> %b) + ret <16 x float> %val +} + +define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) { +; GCN-LABEL: test_fminimum_f16_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f16 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call half @llvm.minimum.f16(half %a, half %b) + ret half %val +} + +define amdgpu_ps half @test_fminimum_f16_ss(half inreg %a, half inreg %b) { +; GCN-LABEL: test_fminimum_f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_minimum_f16 s0, s0, s1 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call half @llvm.minimum.f16(half %a, half %b) + ret half %val +} + +define amdgpu_ps <2 x half> @test_fminimum_v2f16_vv(<2 x half> %a, <2 x half> %b) { +; GCN-LABEL: test_fminimum_v2f16_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %val +} + +define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x half> inreg %b) { +; GCN-LABEL: test_fminimum_v2f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_minimum_f16 v0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %val +} + +define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b) { +; GCN-LABEL: test_fminimum_v3f16_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GCN-NEXT: v_minimum_f16 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %val = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b) + ret <3 x half> %val +} + +define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x half> inreg %b) { +; GCN-LABEL: test_fminimum_v3f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_minimum_f16 v0, s0, s2 +; GCN-NEXT: s_minimum_f16 s0, s1, s3 +; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: ; return to shader part epilog + %val = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b) + ret <3 x half> %val +} + +define amdgpu_ps <4 x half> @test_fminimum_v4f16(<4 x half> %a, <4 x half> %b) { +; GCN-LABEL: test_fminimum_v4f16: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GCN-NEXT: v_pk_minimum_f16 v1, v1, v3 +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %val +} + +define amdgpu_ps <4 x half> @test_fminimum_v4f16_ss(<4 x half> inreg %a, <4 x half> inreg %b) { +; GCN-LABEL: test_fminimum_v4f16_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_pk_minimum_f16 v0, s0, s2 +; GCN-NEXT: v_pk_minimum_f16 v1, s1, s3 +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %val +} + +define amdgpu_ps <2 x float> @test_fminimum_f64_vv(double %a, double %b) { +; GCN-LABEL: test_fminimum_f64_vv: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GCN-NEXT: ; return to shader part epilog + %val = call double @llvm.minimum.f64(double %a, double %b) + %ret = bitcast double %val to <2 x float> + ret <2 x float> %ret +} + +define amdgpu_ps <2 x float> @test_fminimum_f64_ss(double inreg %a, double inreg %b) { +; GCN-LABEL: test_fminimum_f64_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3] +; GCN-NEXT: ; return to shader part epilog + %val = call double @llvm.minimum.f64(double %a, double %b) + %ret = bitcast double %val to <2 x float> + ret <2 x float> %ret +} + +define amdgpu_ps <4 x float> @test_fminimum_v2f64_ss(<2 x double> inreg %a, <2 x double> inreg %b) { +; GCN-LABEL: test_fminimum_v2f64_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5] +; GCN-NEXT: v_minimum_f64 v[2:3], s[2:3], s[6:7] +; GCN-NEXT: ; return to shader part epilog + %val = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) + %ret = bitcast <2 x double> %val to <4 x float> + ret <4 x float> %ret +} + +define amdgpu_ps <8 x float> @test_fminimum_v4f64(<4 x double> %a, <4 x double> %b) { +; GCN-LABEL: test_fminimum_v4f64: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9] +; GCN-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11] +; GCN-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13] +; GCN-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15] +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b) + %ret = bitcast <4 x double> %val to <8 x float> + ret <8 x float> %ret +} + +define amdgpu_ps <8 x float> @test_fminimum_v4f64_ss(<4 x double> inreg %a, <4 x double> inreg %b) { +; GCN-LABEL: test_fminimum_v4f64_ss: +; GCN: ; %bb.0: +; GCN-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9] +; GCN-NEXT: v_minimum_f64 v[2:3], s[2:3], s[10:11] +; GCN-NEXT: v_minimum_f64 v[4:5], s[4:5], s[12:13] +; GCN-NEXT: v_minimum_f64 v[6:7], s[6:7], s[14:15] +; GCN-NEXT: ; return to shader part epilog + %val = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b) + %ret = bitcast <4 x double> %val to <8 x float> + ret <8 x float> %ret +} + +define amdgpu_kernel void @fminimumi_f32_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) { +; GCN-LABEL: fminimumi_f32_move_to_valu: +; GCN: ; %bb.0: +; GCN-NEXT: s_clause 0x1 +; GCN-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: global_load_b32 v1, v0, s[6:7] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: global_load_b32 v2, v0, s[0:1] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_minimum_f32 v1, v1, v2 +; GCN-NEXT: global_store_b32 v0, v1, s[4:5] +; GCN-NEXT: s_nop 0 +; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GCN-NEXT: s_endpgm + %a = load volatile float, ptr addrspace(1) %aptr, align 4 + %b = load volatile float, ptr addrspace(1) %bptr, align 4 + %v = call float @llvm.minimum.f32(float %a, float %b) + store float %v, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) { +; GCN-LABEL: fminimum_f16_move_to_valu: +; GCN: ; %bb.0: +; GCN-NEXT: s_clause 0x1 +; GCN-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: global_load_u16 v1, v0, s[6:7] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: global_load_u16 v2, v0, s[0:1] th:TH_LOAD_RT_NT +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_minimum_f16 v1, v1, v2 +; GCN-NEXT: global_store_b16 v0, v1, s[4:5] +; GCN-NEXT: s_nop 0 +; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GCN-NEXT: s_endpgm + %a = load volatile half, ptr addrspace(1) %aptr, align 4 + %b = load volatile half, ptr addrspace(1) %bptr, align 4 + %v = call half @llvm.minimum.f16(half %a, half %b) + store half %v, ptr addrspace(1) %out, align 4 + ret void +} + +declare float @llvm.minimum.f32(float, float) +declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) +declare <3 x float> @llvm.minimum.v3f32(<3 x float>, <3 x float>) +declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) +declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>) +declare half @llvm.minimum.f16(half, half) +declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) +declare <3 x half> @llvm.minimum.v3f16(<3 x half>, <3 x half>) +declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>) +declare double @llvm.minimum.f64(double, double) +declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX12-GISEL: {{.*}} +; GFX12-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll new file mode 100644 index 0000000000000..e28b324032157 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}test_fminimum3_olt_0_f32: +; GCN: buffer_load_b32 [[REGC:v[0-9]+]] +; GCN: buffer_load_b32 [[REGB:v[0-9]+]] +; GCN: buffer_load_b32 [[REGA:v[0-9]+]] +; GCN: v_minimum3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_b32 [[RESULT]], +define amdgpu_kernel void @test_fminimum3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile float, ptr addrspace(1) %aptr, align 4 + %b = load volatile float, ptr addrspace(1) %bptr, align 4 + %c = load volatile float, ptr addrspace(1) %cptr, align 4 + %f0 = call float @llvm.minimum.f32(float %a, float %b) + %f1 = call float @llvm.minimum.f32(float %f0, float %c) + store float %f1, ptr addrspace(1) %out, align 4 + ret void +} + +; Commute operand of second fminimum +; GCN-LABEL: {{^}}test_fminimum3_olt_1_f32: +; GCN: buffer_load_b32 [[REGB:v[0-9]+]] +; GCN: buffer_load_b32 [[REGA:v[0-9]+]] +; GCN: buffer_load_b32 [[REGC:v[0-9]+]] +; GCN: v_minimum3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_b32 [[RESULT]], +define amdgpu_kernel void @test_fminimum3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile float, ptr addrspace(1) %aptr, align 4 + %b = load volatile float, ptr addrspace(1) %bptr, align 4 + %c = load volatile float, ptr addrspace(1) %cptr, align 4 + %f0 = call float @llvm.minimum.f32(float %a, float %b) + %f1 = call float @llvm.minimum.f32(float %c, float %f0) + store float %f1, ptr addrspace(1) %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_fminimum3_olt_0_f16: +; GCN: buffer_load_u16 [[REGC:v[0-9]+]] +; GCN: buffer_load_u16 [[REGB:v[0-9]+]] +; GCN: buffer_load_u16 [[REGA:v[0-9]+]] +; GCN: v_minimum3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_b16 [[RESULT]], +define amdgpu_kernel void @test_fminimum3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile half, ptr addrspace(1) %aptr, align 2 + %b = load volatile half, ptr addrspace(1) %bptr, align 2 + %c = load volatile half, ptr addrspace(1) %cptr, align 2 + %f0 = call half @llvm.minimum.f16(half %a, half %b) + %f1 = call half @llvm.minimum.f16(half %f0, half %c) + store half %f1, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}test_fminimum3_olt_1_f16: +; GCN: buffer_load_u16 [[REGA:v[0-9]+]] +; GCN: buffer_load_u16 [[REGB:v[0-9]+]] +; GCN: buffer_load_u16 [[REGC:v[0-9]+]] +; GCN: v_minimum3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]] +; GCN: buffer_store_b16 [[RESULT]], +define amdgpu_kernel void @test_fminimum3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile half, ptr addrspace(1) %aptr, align 2 + %b = load volatile half, ptr addrspace(1) %bptr, align 2 + %c = load volatile half, ptr addrspace(1) %cptr, align 2 + %f0 = call half @llvm.minimum.f16(half %a, half %b) + %f1 = call half @llvm.minimum.f16(half %c, half %f0) + store half %f1, ptr addrspace(1) %out, align 2 + ret void +} + +; Checks whether the test passes; performMinMaxCombine() should not optimize vector patterns of minimum3 +; since there are no pack instructions for fminimum3. +; GCN-LABEL: {{^}}no_fminimum3_v2f16: +; GCN: v_pk_minimum_f16 v0, v0, v1 +; GCN: v_pk_minimum_f16 v0, v2, v0 +; GCN: v_pk_minimum_f16 v0, v0, v3 +; GCN-NEXT: s_setpc_b64 +define <2 x half> @no_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) { +entry: + %min = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) + %min1 = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %min) + %res = call <2 x half> @llvm.minimum.v2f16(<2 x half> %min1, <2 x half> %d) + ret <2 x half> %res +} + +; GCN-LABEL: {{^}}no_fminimum3_olt_0_f64: +; GCN-COUNT-2: v_minimum_f64 +define amdgpu_kernel void @no_fminimum3_olt_0_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) { + %a = load volatile double, ptr addrspace(1) %aptr, align 4 + %b = load volatile double, ptr addrspace(1) %bptr, align 4 + %c = load volatile double, ptr addrspace(1) %cptr, align 4 + %f0 = call double @llvm.minimum.f64(double %a, double %b) + %f1 = call double @llvm.minimum.f64(double %f0, double %c) + store double %f1, ptr addrspace(1) %out, align 4 + ret void +} + +declare double @llvm.minimum.f64(double, double) +declare float @llvm.minimum.f32(float, float) +declare half @llvm.minimum.f16(half, half) +declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll new file mode 100644 index 0000000000000..368503725f684 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll @@ -0,0 +1,270 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SDAG %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-GISEL %s + +; -------------------------------------------------------------------------------- +; fminimum tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_minimum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: global_load_b32 [[B:v[0-9]+]] +; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep + %min = call float @llvm.minimum.f32(float %a, float %b) + %fneg = fneg float %min + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_self_minimum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_self_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %min = call float @llvm.minimum.f32(float %a, float %a) + %min.fneg = fneg float %min + store float %min.fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_posk_minimum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_posk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %min = call float @llvm.minimum.f32(float %a, float 4.0) + %fneg = fneg float %min + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_negk_minimum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_negk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %min = call float @llvm.minimum.f32(float %a, float -4.0) + %fneg = fneg float %min + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_0_minimum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], [[A]], 0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_0_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %min = call float @llvm.minimum.f32(float %a, float 0.0) + %fneg = fneg float %min + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_0_minimum_foldable_use_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: global_load_b32 [[B:v[0-9]+]] +; GCN: v_minimum_f32 [[MIN:v[0-9]+]], [[A]], 0 +; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_0_minimum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep + %min = call float @llvm.minimum.f32(float %a, float 0.0) + %fneg = fneg float %min + %mul = fmul float %fneg, %b + store float %mul, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_minimum_multi_use_minimum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: global_load_b32 [[B:v[0-9]+]] +; GCN: v_maximum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]] +; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] +; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]] +define void @v_fneg_minimum_multi_use_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep + %min = call float @llvm.minimum.f32(float %a, float %b) + %fneg = fneg float %min + %use1 = fmul float %min, 4.0 + store volatile float %fneg, ptr addrspace(1) %out + store volatile float %use1, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------------------- +; fmaximum tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_maximum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: global_load_b32 [[B:v[0-9]+]] +; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep + %min = call float @llvm.maximum.f32(float %a, float %b) + %fneg = fneg float %min + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_self_maximum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_self_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %min = call float @llvm.maximum.f32(float %a, float %a) + %min.fneg = fneg float %min + store float %min.fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_posk_maximum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_posk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %min = call float @llvm.maximum.f32(float %a, float 4.0) + %fneg = fneg float %min + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_negk_maximum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_negk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %min = call float @llvm.maximum.f32(float %a, float -4.0) + %fneg = fneg float %min + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_0_maximum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], [[A]], 0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_0_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %max = call float @llvm.maximum.f32(float %a, float 0.0) + %fneg = fneg float %max + store float %fneg, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_0_maximum_foldable_use_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: global_load_b32 [[B:v[0-9]+]] +; GCN: v_maximum_f32 [[MAX:v[0-9]+]], [[A]], 0 +; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]] +define void @v_fneg_0_maximum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep + %max = call float @llvm.maximum.f32(float %a, float 0.0) + %fneg = fneg float %max + %mul = fmul float %fneg, %b + store float %mul, ptr addrspace(1) %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_maximum_multi_use_maximum_f32: +; GCN: global_load_b32 [[A:v[0-9]+]] +; GCN: global_load_b32 [[B:v[0-9]+]] +; GCN: v_minimum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]] +; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] +; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0 +; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]] +; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]] +define void @v_fneg_maximum_multi_use_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep + %min = call float @llvm.maximum.f32(float %a, float %b) + %fneg = fneg float %min + %use1 = fmul float %min, 4.0 + store volatile float %fneg, ptr addrspace(1) %out + store volatile float %use1, ptr addrspace(1) %out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() +declare float @llvm.minimum.f32(float, float) +declare float @llvm.maximum.f32(float, float) diff --git a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll new file mode 100644 index 0000000000000..e73ce7a4826c0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,SDAG %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GISEL %s + +define amdgpu_ps float @test_minmax_f32(float %a, float %b, float %c) { +; GFX12-LABEL: test_minmax_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2 +; GFX12-NEXT: ; return to shader part epilog + %max = call float @llvm.maximum.f32(float %a, float %b) + %minmax = call float @llvm.minimum.f32(float %max, float %c) + ret float %minmax +} + +define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) { +; SDAG-LABEL: s_test_minmax_f32: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_maximum_f32 s0, s0, s1 +; SDAG-NEXT: s_mov_b32 s5, s4 +; SDAG-NEXT: s_mov_b32 s4, s3 +; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; SDAG-NEXT: s_minimum_f32 s0, s0, s2 +; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; SDAG-NEXT: global_store_b32 v0, v1, s[4:5] +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: s_test_minmax_f32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_maximum_f32 s0, s0, s1 +; GISEL-NEXT: s_mov_b32 s6, s3 +; GISEL-NEXT: s_mov_b32 s7, s4 +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: s_minimum_f32 s0, s0, s2 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: global_store_b32 v1, v0, s[6:7] +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-NEXT: s_endpgm + %smax = call float @llvm.maximum.f32(float %a, float %b) + %sminmax = call float @llvm.minimum.f32(float %smax, float %c) + store float %sminmax, ptr addrspace(1) %out + ret void +} + +define amdgpu_ps float @test_minmax_commuted_f32(float %a, float %b, float %c) { +; GFX12-LABEL: test_minmax_commuted_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2 +; GFX12-NEXT: ; return to shader part epilog + %max = call float @llvm.maximum.f32(float %a, float %b) + %minmax = call float @llvm.minimum.f32(float %c, float %max) + ret float %minmax +} + +define amdgpu_ps float @test_maxmin_f32(float %a, float %b, float %c) { +; GFX12-LABEL: test_maxmin_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2 +; GFX12-NEXT: ; return to shader part epilog + %min = call float @llvm.minimum.f32(float %a, float %b) + %maxmin = call float @llvm.maximum.f32(float %min, float %c) + ret float %maxmin +} + +define amdgpu_ps float @test_maxmin_commuted_f32(float %a, float %b, float %c) { +; GFX12-LABEL: test_maxmin_commuted_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2 +; GFX12-NEXT: ; return to shader part epilog + %min = call float @llvm.minimum.f32(float %a, float %b) + %maxmin = call float @llvm.maximum.f32(float %c, float %min) + ret float %maxmin +} + +define amdgpu_ps half @test_minmax_f16(half %a, half %b, half %c) { +; GFX12-LABEL: test_minmax_f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; GFX12-NEXT: ; return to shader part epilog + %max = call half @llvm.maximum.f16(half %a, half %b) + %minmax = call half @llvm.minimum.f16(half %max, half %c) + ret half %minmax +} + +define amdgpu_ps half @test_minmax_commuted_f16(half %a, half %b, half %c) { +; GFX12-LABEL: test_minmax_commuted_f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_maximumminimum_f16 v0, v0, v1, v2 +; GFX12-NEXT: ; return to shader part epilog + %max = call half @llvm.maximum.f16(half %a, half %b) + %minmax = call half @llvm.minimum.f16(half %c, half %max) + ret half %minmax +} + +define amdgpu_ps half @test_maxmin_commuted_f16(half %a, half %b, half %c) { +; GFX12-LABEL: test_maxmin_commuted_f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: v_minimummaximum_f16 v0, v0, v1, v2 +; GFX12-NEXT: ; return to shader part epilog + %min = call half @llvm.minimum.f16(half %a, half %b) + %maxmin = call half @llvm.maximum.f16(half %c, half %min) + ret half %maxmin +} + +define amdgpu_ps void @s_test_minmax_f16(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) { +; SDAG-LABEL: s_test_minmax_f16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_maximum_f16 s0, s0, s1 +; SDAG-NEXT: s_mov_b32 s5, s4 +; SDAG-NEXT: s_mov_b32 s4, s3 +; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; SDAG-NEXT: s_minimum_f16 s0, s0, s2 +; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; SDAG-NEXT: global_store_b16 v0, v1, s[4:5] +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: s_test_minmax_f16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_maximum_f16 s0, s0, s1 +; GISEL-NEXT: s_mov_b32 s6, s3 +; GISEL-NEXT: s_mov_b32 s7, s4 +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: s_minimum_f16 s0, s0, s2 +; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: global_store_b16 v1, v0, s[6:7] +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-NEXT: s_endpgm + %smax = call half @llvm.maximum.f16(half %a, half %b) + %sminmax = call half @llvm.minimum.f16(half %smax, half %c) + store half %sminmax, ptr addrspace(1) %out + ret void +} + +declare half @llvm.minimum.f16(half, half) +declare half @llvm.maximum.f16(half, half) +declare float @llvm.minimum.f32(float, float) +declare float @llvm.maximum.f32(float, float) diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s index 1a898bebde57f..d1d9208724181 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s @@ -483,6 +483,108 @@ s_max_num_f32 s5, 0x3f717273, s2 s_max_num_f32 s5, s1, s105 // GFX12: encoding: [0x01,0x69,0x85,0xa1] +s_minimum_f32 s5, s1, s2 +// GFX12: encoding: [0x01,0x02,0x85,0xa7] + +s_minimum_f32 s105, s1, s2 +// GFX12: encoding: [0x01,0x02,0xe9,0xa7] + +s_minimum_f32 s5, s105, s2 +// GFX12: encoding: [0x69,0x02,0x85,0xa7] + +s_minimum_f32 s5, s103, s2 +// GFX12: encoding: [0x67,0x02,0x85,0xa7] + +s_minimum_f32 s5, vcc_lo, s2 +// GFX12: encoding: [0x6a,0x02,0x85,0xa7] + +s_minimum_f32 s5, vcc_hi, s2 +// GFX12: encoding: [0x6b,0x02,0x85,0xa7] + +s_minimum_f32 s5, ttmp11, s2 +// GFX12: encoding: [0x77,0x02,0x85,0xa7] + +s_minimum_f32 s5, m0, s2 +// GFX12: encoding: [0x7d,0x02,0x85,0xa7] + +s_minimum_f32 s5, exec_lo, s2 +// GFX12: encoding: [0x7e,0x02,0x85,0xa7] + +s_minimum_f32 s5, exec_hi, s2 +// GFX12: encoding: [0x7f,0x02,0x85,0xa7] + +s_minimum_f32 s5, 0, s2 +// GFX12: encoding: [0x80,0x02,0x85,0xa7] + +s_minimum_f32 s5, -1, s2 +// GFX12: encoding: [0xc1,0x02,0x85,0xa7] + +s_minimum_f32 s5, 0.5, s2 +// GFX12: encoding: [0xf0,0x02,0x85,0xa7] + +s_minimum_f32 s5, -4.0, s2 +// GFX12: encoding: [0xf7,0x02,0x85,0xa7] + +s_minimum_f32 s5, 0xaf123456, s2 +// GFX12: encoding: [0xff,0x02,0x85,0xa7,0x56,0x34,0x12,0xaf] + +s_minimum_f32 s5, 0x3f717273, s2 +// GFX12: encoding: [0xff,0x02,0x85,0xa7,0x73,0x72,0x71,0x3f] + +s_minimum_f32 s5, s1, s105 +// GFX12: encoding: [0x01,0x69,0x85,0xa7] + +s_maximum_f32 s5, s1, s2 +// GFX12: encoding: [0x01,0x02,0x05,0xa8] + +s_maximum_f32 s105, s1, s2 +// GFX12: encoding: [0x01,0x02,0x69,0xa8] + +s_maximum_f32 s5, s105, s2 +// GFX12: encoding: [0x69,0x02,0x05,0xa8] + +s_maximum_f32 s5, s103, s2 +// GFX12: encoding: [0x67,0x02,0x05,0xa8] + +s_maximum_f32 s5, vcc_lo, s2 +// GFX12: encoding: [0x6a,0x02,0x05,0xa8] + +s_maximum_f32 s5, vcc_hi, s2 +// GFX12: encoding: [0x6b,0x02,0x05,0xa8] + +s_maximum_f32 s5, ttmp11, s2 +// GFX12: encoding: [0x77,0x02,0x05,0xa8] + +s_maximum_f32 s5, m0, s2 +// GFX12: encoding: [0x7d,0x02,0x05,0xa8] + +s_maximum_f32 s5, exec_lo, s2 +// GFX12: encoding: [0x7e,0x02,0x05,0xa8] + +s_maximum_f32 s5, exec_hi, s2 +// GFX12: encoding: [0x7f,0x02,0x05,0xa8] + +s_maximum_f32 s5, 0, s2 +// GFX12: encoding: [0x80,0x02,0x05,0xa8] + +s_maximum_f32 s5, -1, s2 +// GFX12: encoding: [0xc1,0x02,0x05,0xa8] + +s_maximum_f32 s5, 0.5, s2 +// GFX12: encoding: [0xf0,0x02,0x05,0xa8] + +s_maximum_f32 s5, -4.0, s2 +// GFX12: encoding: [0xf7,0x02,0x05,0xa8] + +s_maximum_f32 s5, 0xaf123456, s2 +// GFX12: encoding: [0xff,0x02,0x05,0xa8,0x56,0x34,0x12,0xaf] + +s_maximum_f32 s5, 0x3f717273, s2 +// GFX12: encoding: [0xff,0x02,0x05,0xa8,0x73,0x72,0x71,0x3f] + +s_maximum_f32 s5, s1, s105 +// GFX12: encoding: [0x01,0x69,0x05,0xa8] + s_fmac_f32 s5, s1, s2 // GFX12: encoding: [0x01,0x02,0x85,0xa3] @@ -942,6 +1044,102 @@ s_min_num_f16 s5, 0x3456, s2 s_min_num_f16 s5, s1, s105 // GFX12: encoding: [0x01,0x69,0x85,0xa5] +s_maximum_f16 s5, s1, s2 +// GFX12: encoding: [0x01,0x02,0x05,0xa9] + +s_maximum_f16 s105, s1, s2 +// GFX12: encoding: [0x01,0x02,0x69,0xa9] + +s_maximum_f16 s5, s105, s2 +// GFX12: encoding: [0x69,0x02,0x05,0xa9] + +s_maximum_f16 s5, s101, s2 +// GFX12: encoding: [0x65,0x02,0x05,0xa9] + +s_maximum_f16 s5, vcc_lo, s2 +// GFX12: encoding: [0x6a,0x02,0x05,0xa9] + +s_maximum_f16 s5, vcc_hi, s2 +// GFX12: encoding: [0x6b,0x02,0x05,0xa9] + +s_maximum_f16 s5, m0, s2 +// GFX12: encoding: [0x7d,0x02,0x05,0xa9] + +s_maximum_f16 s5, exec_lo, s2 +// GFX12: encoding: [0x7e,0x02,0x05,0xa9] + +s_maximum_f16 s5, exec_hi, s2 +// GFX12: encoding: [0x7f,0x02,0x05,0xa9] + +s_maximum_f16 s5, 0, s2 +// GFX12: encoding: [0x80,0x02,0x05,0xa9] + +s_maximum_f16 s5, -1, s2 +// GFX12: encoding: [0xc1,0x02,0x05,0xa9] + +s_maximum_f16 s5, 0.5, s2 +// GFX12: encoding: [0xf0,0x02,0x05,0xa9] + +s_maximum_f16 s5, -4.0, s2 +// GFX12: encoding: [0xf7,0x02,0x05,0xa9] + +s_maximum_f16 s5, 0xfe0b, s2 +// GFX12: encoding: [0xff,0x02,0x05,0xa9,0x0b,0xfe,0x00,0x00] + +s_maximum_f16 s5, 0x3456, s2 +// GFX12: encoding: [0xff,0x02,0x05,0xa9,0x56,0x34,0x00,0x00] + +s_maximum_f16 s5, s1, s105 +// GFX12: encoding: [0x01,0x69,0x05,0xa9] + +s_minimum_f16 s5, s1, s2 +// GFX12: encoding: [0x01,0x02,0x85,0xa8] + +s_minimum_f16 s105, s1, s2 +// GFX12: encoding: [0x01,0x02,0xe9,0xa8] + +s_minimum_f16 s5, s105, s2 +// GFX12: encoding: [0x69,0x02,0x85,0xa8] + +s_minimum_f16 s5, s101, s2 +// GFX12: encoding: [0x65,0x02,0x85,0xa8] + +s_minimum_f16 s5, vcc_lo, s2 +// GFX12: encoding: [0x6a,0x02,0x85,0xa8] + +s_minimum_f16 s5, vcc_hi, s2 +// GFX12: encoding: [0x6b,0x02,0x85,0xa8] + +s_minimum_f16 s5, m0, s2 +// GFX12: encoding: [0x7d,0x02,0x85,0xa8] + +s_minimum_f16 s5, exec_lo, s2 +// GFX12: encoding: [0x7e,0x02,0x85,0xa8] + +s_minimum_f16 s5, exec_hi, s2 +// GFX12: encoding: [0x7f,0x02,0x85,0xa8] + +s_minimum_f16 s5, 0, s2 +// GFX12: encoding: [0x80,0x02,0x85,0xa8] + +s_minimum_f16 s5, -1, s2 +// GFX12: encoding: [0xc1,0x02,0x85,0xa8] + +s_minimum_f16 s5, 0.5, s2 +// GFX12: encoding: [0xf0,0x02,0x85,0xa8] + +s_minimum_f16 s5, -4.0, s2 +// GFX12: encoding: [0xf7,0x02,0x85,0xa8] + +s_minimum_f16 s5, 0xfe0b, s2 +// GFX12: encoding: [0xff,0x02,0x85,0xa8,0x0b,0xfe,0x00,0x00] + +s_minimum_f16 s5, 0x3456, s2 +// GFX12: encoding: [0xff,0x02,0x85,0xa8,0x56,0x34,0x00,0x00] + +s_minimum_f16 s5, s1, s105 +// GFX12: encoding: [0x01,0x69,0x85,0xa8] + s_add_co_u32 s0, s1, s2 // GFX12: encoding: [0x01,0x02,0x00,0x80] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 71b2c442460f3..e97e4b8c7241c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -5981,3 +5981,633 @@ v_xor_b16 v5, src_scc, vcc_lo v_xor_b16 v255, 0xfe0b, vcc_hi // GFX12: encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_minimum_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] + +v_minimum_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xff,0xff,0x03,0x00] + +v_minimum_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x01,0x04,0x00,0x00] + +v_minimum_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x69,0xd2,0x00,0x00] + +v_minimum_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x6a,0xf6,0x00,0x00] + +v_minimum_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_minimum_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7b,0xfa,0x01,0x00] + +v_minimum_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7d,0xe0,0x01,0x00] + +v_minimum_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x65,0xd7,0x7f,0xf8,0x00,0x00] + +v_minimum_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xc1,0xfe,0x00,0x00] + +v_minimum_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xf0,0xfa,0x00,0x40] + +v_minimum_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x65,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x65,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_maximum_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] + +v_maximum_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xff,0xff,0x03,0x00] + +v_maximum_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x01,0x04,0x00,0x00] + +v_maximum_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x69,0xd2,0x00,0x00] + +v_maximum_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x6a,0xf6,0x00,0x00] + +v_maximum_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_maximum_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7b,0xfa,0x01,0x00] + +v_maximum_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7d,0xe0,0x01,0x00] + +v_maximum_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x66,0xd7,0x7f,0xf8,0x00,0x00] + +v_maximum_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xc1,0xfe,0x00,0x00] + +v_maximum_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xf0,0xfa,0x00,0x40] + +v_maximum_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x66,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x66,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_minimum_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00] + +v_minimum_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00] + +v_minimum_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00] + +v_minimum_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00] + +v_minimum_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00] + +v_minimum_f16 v5, vcc_hi, 0xaf12 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] + +v_minimum_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00] + +v_minimum_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00] + +v_minimum_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00] + +v_minimum_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00] + +v_minimum_f16 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40] + +v_minimum_f16 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f16 v255, -|0xaf12|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] + +v_minimum_f16 v205, v201, v200 +// GFX12: encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00] + +v_maximum_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] + +v_maximum_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00] + +v_maximum_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00] + +v_maximum_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00] + +v_maximum_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00] + +v_maximum_f16 v5, vcc_hi, 0xaf12 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] + +v_maximum_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00] + +v_maximum_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00] + +v_maximum_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00] + +v_maximum_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00] + +v_maximum_f16 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40] + +v_maximum_f16 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x68,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f16 v255, -|0xaf12|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x68,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] + +v_maximum_f16 v205, v201, v200 +// GFX12: encoding: [0xcd,0x00,0x68,0xd7,0xc9,0x91,0x03,0x00] + +v_minimum_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x01,0x07,0x02,0x00] + +v_minimum_f64 v[5:6], v[254:255], v[254:255] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xfe,0xfd,0x03,0x00] + +v_minimum_f64 v[5:6], s[6:7], s[4:5] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x06,0x08,0x00,0x00] + +v_minimum_f64 v[5:6], s[104:105], s[104:105] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x68,0xd0,0x00,0x00] + +v_minimum_f64 v[5:6], vcc, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xf4,0x00,0x00] + +v_minimum_f64 v[5:6], vcc, 0xaf121234 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] + +v_minimum_f64 v[5:6], ttmp[14:15], src_scc +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7a,0xfa,0x01,0x00] + +v_minimum_f64 v[5:6], vcc, 0.5 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xe0,0x01,0x00] + +v_minimum_f64 v[5:6], exec, -1 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f64 v[5:6], |exec|, null +// GFX12: encoding: [0x05,0x01,0x41,0xd7,0x7e,0xf8,0x00,0x00] + +v_minimum_f64 v[5:6], null, exec +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f64 v[5:6], -1, exec +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xc1,0xfc,0x00,0x00] + +v_minimum_f64 v[5:6], 0.5, -vcc +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xf0,0xd4,0x00,0x40] + +v_minimum_f64 v[5:6], -src_scc, |vcc| +// GFX12: encoding: [0x05,0x02,0x41,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f64 v[254:255], -|2|, -|vcc| +// GFX12: encoding: [0xfe,0x03,0x41,0xd7,0x82,0xd4,0x00,0x60] + +v_maximum_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x01,0x07,0x02,0x00] + +v_maximum_f64 v[5:6], v[254:255], v[254:255] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xfe,0xfd,0x03,0x00] + +v_maximum_f64 v[5:6], s[6:7], s[4:5] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x06,0x08,0x00,0x00] + +v_maximum_f64 v[5:6], s[104:105], s[104:105] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x68,0xd0,0x00,0x00] + +v_maximum_f64 v[5:6], vcc, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xf4,0x00,0x00] + +v_maximum_f64 v[5:6], vcc, 0xaf121234 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] + +v_maximum_f64 v[5:6], ttmp[14:15], src_scc +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7a,0xfa,0x01,0x00] + +v_maximum_f64 v[5:6], vcc, 0.5 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xe0,0x01,0x00] + +v_maximum_f64 v[5:6], exec, -1 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f64 v[5:6], |exec|, null +// GFX12: encoding: [0x05,0x01,0x42,0xd7,0x7e,0xf8,0x00,0x00] + +v_maximum_f64 v[5:6], null, exec +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f64 v[5:6], -1, exec +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xc1,0xfc,0x00,0x00] + +v_maximum_f64 v[5:6], 0.5, -vcc +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xf0,0xd4,0x00,0x40] + +v_maximum_f64 v[5:6], -src_scc, |vcc| +// GFX12: encoding: [0x05,0x02,0x42,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f64 v[254:255], -|2|, -|vcc| +// GFX12: encoding: [0xfe,0x03,0x42,0xd7,0x82,0xd4,0x00,0x60] + +v_minimum3_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x01,0x05,0x0e,0x00] + +v_minimum3_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0xff,0x05,0xa4,0x01] + +v_minimum3_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x01,0xfe,0xff,0x01] + +v_minimum3_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimum3_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimum3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minimum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimum3_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimum3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2d,0xd6,0x7e,0x82,0xad,0x01] + +v_minimum3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimum3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minimum3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimum3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minimum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2d,0xd6,0xfd,0xd4,0x04,0x33] + +v_minimum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x01,0x05,0x0e,0x00] + +v_maximum3_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0xff,0x05,0xa4,0x01] + +v_maximum3_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x01,0xfe,0xff,0x01] + +v_maximum3_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximum3_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximum3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximum3_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximum3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2e,0xd6,0x7e,0x82,0xad,0x01] + +v_maximum3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximum3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximum3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maximum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2e,0xd6,0xfd,0xd4,0x04,0x33] + +v_maximum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minimum3_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x01,0x05,0x0e,0x00] + +v_minimum3_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0xff,0x05,0xa4,0x01] + +v_minimum3_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x01,0xfe,0xff,0x01] + +v_minimum3_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimum3_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimum3_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_minimum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimum3_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimum3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2f,0xd6,0x7e,0x82,0xad,0x01] + +v_minimum3_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimum3_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x2f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_minimum3_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimum3_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_minimum3_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x2f,0xd6,0xfd,0xd4,0x04,0x23] + +v_minimum3_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp +// GFX12: encoding: [0xff,0x83,0x2f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x01,0x05,0x0e,0x00] + +v_maximum3_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0xff,0x05,0xa4,0x01] + +v_maximum3_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x01,0xfe,0xff,0x01] + +v_maximum3_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximum3_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximum3_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x30,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximum3_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximum3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x30,0xd6,0x7e,0x82,0xad,0x01] + +v_maximum3_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x30,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximum3_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x30,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x30,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximum3_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0xf0,0xfa,0xc0,0x43] + +v_maximum3_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x30,0xd6,0xfd,0xd4,0x04,0x23] + +v_maximumminimum_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x01,0x05,0x0e,0x00] + +v_maximumminimum_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0xff,0x05,0xa4,0x01] + +v_maximumminimum_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x01,0xfe,0xff,0x01] + +v_maximumminimum_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximumminimum_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximumminimum_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximumminimum_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximumminimum_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6d,0xd6,0x7e,0x82,0xad,0x01] + +v_maximumminimum_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximumminimum_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x6d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximumminimum_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maximumminimum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6d,0xd6,0xfd,0xd4,0x04,0x33] + +v_maximumminimum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x01,0x05,0x0e,0x00] + +v_minimummaximum_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0xff,0x05,0xa4,0x01] + +v_minimummaximum_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x01,0xfe,0xff,0x01] + +v_minimummaximum_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimummaximum_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimummaximum_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimummaximum_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimummaximum_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6c,0xd6,0x7e,0x82,0xad,0x01] + +v_minimummaximum_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimummaximum_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x6c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimummaximum_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minimummaximum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6c,0xd6,0xfd,0xd4,0x04,0x33] + +v_minimummaximum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x01,0x05,0x0e,0x00] + +v_maximumminimum_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0xff,0x05,0xa4,0x01] + +v_maximumminimum_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x01,0xfe,0xff,0x01] + +v_maximumminimum_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximumminimum_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximumminimum_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_maximumminimum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximumminimum_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximumminimum_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6f,0xd6,0x7e,0x82,0xad,0x01] + +v_maximumminimum_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximumminimum_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x6f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_maximumminimum_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximumminimum_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_maximumminimum_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x6f,0xd6,0xfd,0xd4,0x04,0x23] + +v_maximumminimum_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp +// GFX12: encoding: [0xff,0x83,0x6f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x01,0x05,0x0e,0x00] + +v_minimummaximum_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0xff,0x05,0xa4,0x01] + +v_minimummaximum_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x01,0xfe,0xff,0x01] + +v_minimummaximum_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimummaximum_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimummaximum_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimummaximum_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimummaximum_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6e,0xd6,0x7e,0x82,0xad,0x01] + +v_minimummaximum_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimummaximum_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x6e,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimummaximum_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0xf0,0xfa,0xc0,0x43] + +v_minimummaximum_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x6e,0xd6,0xfd,0xd4,0x04,0x23] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 63087442c4648..26f63102df950 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -4693,3 +4693,504 @@ v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2, v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf // GFX12: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] + +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_minimum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_minimum_f32 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_minimum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_minimum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_minimum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f32 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_maximum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_maximum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_minimum_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_minimum_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_minimum_f16 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_minimum_f16 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_minimum_f16 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_minimum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_maximum_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f16 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f16 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_maximum_f16 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_maximum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minimum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimum3_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimum3_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimum3_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimum3_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimum3_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimum3_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_minimum3_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_maximum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximum3_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximum3_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximum3_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximum3_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximum3_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_maximum3_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximumminimum_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimummaximum_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimummaximum_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimummaximum_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimummaximum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximumminimum_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximumminimum_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximumminimum_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_maximumminimum_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimummaximum_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimummaximum_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimummaximum_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimummaximum_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimummaximum_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimummaximum_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_minimummaximum_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_minimummaximum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 82807aca0e7bf..de294b1ff2a22 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -2966,3 +2966,339 @@ v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4 v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x65,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x65,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_minimum_f32 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x65,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_maximum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_maximum_f32 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x66,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_maximum_f32 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x66,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_maximum_f32 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x66,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_minimum_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_minimum_f16 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x67,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_minimum_f16 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x67,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_minimum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x67,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_maximum_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_maximum_f16 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x68,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_maximum_f16 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x68,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_maximum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x68,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_minimum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maximum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minimum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_maximum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x30,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x30,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x30,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x30,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x30,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x30,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x30,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_maximumminimum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minimummaximum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maximumminimum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_minimummaximum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6e,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6e,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6e,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s index 7a4687b34f8f8..9a21f7a2eb560 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s @@ -1250,3 +1250,93 @@ v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] v_pk_sub_u16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] clamp // GFX12: [0xff,0xd0,0x0b,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00] + +v_pk_minimum_f16 v5, v1, v2 +// GFX12: [0x05,0x40,0x1d,0xcc,0x01,0x05,0x02,0x18] + +v_pk_minimum_f16 v5, v255, v255 +// GFX12: [0x05,0x40,0x1d,0xcc,0xff,0xff,0x03,0x18] + +v_pk_minimum_f16 v5, s1, s2 +// GFX12: [0x05,0x40,0x1d,0xcc,0x01,0x04,0x00,0x18] + +v_pk_minimum_f16 v5, s105, s105 +// GFX12: [0x05,0x40,0x1d,0xcc,0x69,0xd2,0x00,0x18] + +v_pk_minimum_f16 v5, vcc_lo, ttmp15 +// GFX12: [0x05,0x40,0x1d,0xcc,0x6a,0xf6,0x00,0x18] + +v_pk_minimum_f16 v5, vcc_hi, 0xfe0b +// GFX12: [0x05,0x40,0x1d,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00] + +v_pk_minimum_f16 v5, ttmp15, src_scc +// GFX12: [0x05,0x40,0x1d,0xcc,0x7b,0xfa,0x01,0x18] + +v_pk_minimum_f16 v5, m0, 0.5 +// GFX12: [0x05,0x40,0x1d,0xcc,0x7d,0xe0,0x01,0x18] + +v_pk_minimum_f16 v5, exec_lo, -1 +// GFX12: [0x05,0x40,0x1d,0xcc,0x7e,0x82,0x01,0x18] + +v_pk_minimum_f16 v5, exec_hi, null +// GFX12: [0x05,0x40,0x1d,0xcc,0x7f,0xf8,0x00,0x18] + +v_pk_minimum_f16 v5, null, exec_lo +// GFX12: [0x05,0x40,0x1d,0xcc,0x7c,0xfc,0x00,0x18] + +v_pk_minimum_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] +// GFX12: [0x05,0x59,0x1d,0xcc,0xc1,0xfe,0x00,0x20] + +v_pk_minimum_f16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1] +// GFX12: [0x05,0x42,0x1d,0xcc,0xf0,0xfa,0x00,0x58] + +v_pk_minimum_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0] +// GFX12: [0x05,0x48,0x1d,0xcc,0xfd,0xd4,0x00,0x10] + +v_pk_minimum_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp +// GFX12: [0xff,0xd3,0x1d,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00] + +v_pk_maximum_f16 v5, v1, v2 +// GFX12: [0x05,0x40,0x1e,0xcc,0x01,0x05,0x02,0x18] + +v_pk_maximum_f16 v5, v255, v255 +// GFX12: [0x05,0x40,0x1e,0xcc,0xff,0xff,0x03,0x18] + +v_pk_maximum_f16 v5, s1, s2 +// GFX12: [0x05,0x40,0x1e,0xcc,0x01,0x04,0x00,0x18] + +v_pk_maximum_f16 v5, s105, s105 +// GFX12: [0x05,0x40,0x1e,0xcc,0x69,0xd2,0x00,0x18] + +v_pk_maximum_f16 v5, vcc_lo, ttmp15 +// GFX12: [0x05,0x40,0x1e,0xcc,0x6a,0xf6,0x00,0x18] + +v_pk_maximum_f16 v5, vcc_hi, 0xfe0b +// GFX12: [0x05,0x40,0x1e,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00] + +v_pk_maximum_f16 v5, ttmp15, src_scc +// GFX12: [0x05,0x40,0x1e,0xcc,0x7b,0xfa,0x01,0x18] + +v_pk_maximum_f16 v5, m0, 0.5 +// GFX12: [0x05,0x40,0x1e,0xcc,0x7d,0xe0,0x01,0x18] + +v_pk_maximum_f16 v5, exec_lo, -1 +// GFX12: [0x05,0x40,0x1e,0xcc,0x7e,0x82,0x01,0x18] + +v_pk_maximum_f16 v5, exec_hi, null +// GFX12: [0x05,0x40,0x1e,0xcc,0x7f,0xf8,0x00,0x18] + +v_pk_maximum_f16 v5, null, exec_lo +// GFX12: [0x05,0x40,0x1e,0xcc,0x7c,0xfc,0x00,0x18] + +v_pk_maximum_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] +// GFX12: [0x05,0x59,0x1e,0xcc,0xc1,0xfe,0x00,0x20] + +v_pk_maximum_f16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1] neg_lo:[0,1] neg_hi:[0,1] +// GFX12: [0x05,0x42,0x1e,0xcc,0xf0,0xfa,0x00,0x58] + +v_pk_maximum_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] neg_hi:[0,0] +// GFX12: [0x05,0x48,0x1e,0xcc,0xfd,0xd4,0x00,0x10] + +v_pk_maximum_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp +// GFX12: [0xff,0xd3,0x1e,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt index c177af7a6e50b..297b6b083550c 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt @@ -484,6 +484,108 @@ # GFX12: s_max_num_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa1] 0x01,0x69,0x85,0xa1 +# GFX12: s_minimum_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa7] +0x01,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa7] +0x01,0x02,0xe9,0xa7 + +# GFX12: s_minimum_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa7] +0x69,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, s103, s2 ; encoding: [0x67,0x02,0x85,0xa7] +0x67,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa7] +0x6a,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa7] +0x6b,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x85,0xa7] +0x77,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa7] +0x7d,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa7] +0x7e,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa7] +0x7f,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa7] +0x80,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa7] +0xc1,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa7] +0xf0,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa7] +0xf7,0x02,0x85,0xa7 + +# GFX12: s_minimum_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0xa7,0x56,0x34,0x12,0xaf] +0xff,0x02,0x85,0xa7,0x56,0x34,0x12,0xaf + +# GFX12: s_minimum_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x85,0xa7,0x73,0x72,0x71,0x3f] +0xff,0x02,0x85,0xa7,0x73,0x72,0x71,0x3f + +# GFX12: s_minimum_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa7] +0x01,0x69,0x85,0xa7 + +# GFX12: s_maximum_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa8] +0x01,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa8] +0x01,0x02,0x69,0xa8 + +# GFX12: s_maximum_f32 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa8] +0x69,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, s103, s2 ; encoding: [0x67,0x02,0x05,0xa8] +0x67,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa8] +0x6a,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa8] +0x6b,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, ttmp11, s2 ; encoding: [0x77,0x02,0x05,0xa8] +0x77,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa8] +0x7d,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa8] +0x7e,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa8] +0x7f,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa8] +0x80,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa8] +0xc1,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa8] +0xf0,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa8] +0xf7,0x02,0x05,0xa8 + +# GFX12: s_maximum_f32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x05,0xa8,0x56,0x34,0x12,0xaf] +0xff,0x02,0x05,0xa8,0x56,0x34,0x12,0xaf + +# GFX12: s_maximum_f32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0xa8,0x73,0x72,0x71,0x3f] +0xff,0x02,0x05,0xa8,0x73,0x72,0x71,0x3f + +# GFX12: s_maximum_f32 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa8] +0x01,0x69,0x05,0xa8 + # GFX12: s_fmac_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa3] 0x01,0x02,0x85,0xa3 @@ -943,6 +1045,102 @@ # GFX12: s_min_num_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa5] 0x01,0x69,0x85,0xa5 +# GFX12: s_maximum_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa9] +0x01,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0xa9] +0x01,0x02,0x69,0xa9 + +# GFX12: s_maximum_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x05,0xa9] +0x69,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, s101, s2 ; encoding: [0x65,0x02,0x05,0xa9] +0x65,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x05,0xa9] +0x6a,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x05,0xa9] +0x6b,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x05,0xa9] +0x7d,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x05,0xa9] +0x7e,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x05,0xa9] +0x7f,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0xa9] +0x80,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0xa9] +0xc1,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x05,0xa9] +0xf0,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x05,0xa9] +0xf7,0x02,0x05,0xa9 + +# GFX12: s_maximum_f16 s5, 0xfe0b, s2 ; encoding: [0xff,0x02,0x05,0xa9,0x0b,0xfe,0x00,0x00] +0xff,0x02,0x05,0xa9,0x0b,0xfe,0x00,0x00 + +# GFX12: s_maximum_f16 s5, 0x3456, s2 ; encoding: [0xff,0x02,0x05,0xa9,0x56,0x34,0x00,0x00] +0xff,0x02,0x05,0xa9,0x56,0x34,0x00,0x00 + +# GFX12: s_maximum_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x05,0xa9] +0x01,0x69,0x05,0xa9 + +# GFX12: s_minimum_f16 s5, s1, s2 ; encoding: [0x01,0x02,0x85,0xa8] +0x01,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0xa8] +0x01,0x02,0xe9,0xa8 + +# GFX12: s_minimum_f16 s5, s105, s2 ; encoding: [0x69,0x02,0x85,0xa8] +0x69,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, s101, s2 ; encoding: [0x65,0x02,0x85,0xa8] +0x65,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, vcc_lo, s2 ; encoding: [0x6a,0x02,0x85,0xa8] +0x6a,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, vcc_hi, s2 ; encoding: [0x6b,0x02,0x85,0xa8] +0x6b,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, m0, s2 ; encoding: [0x7d,0x02,0x85,0xa8] +0x7d,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, exec_lo, s2 ; encoding: [0x7e,0x02,0x85,0xa8] +0x7e,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, exec_hi, s2 ; encoding: [0x7f,0x02,0x85,0xa8] +0x7f,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0xa8] +0x80,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0xa8] +0xc1,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, 0.5, s2 ; encoding: [0xf0,0x02,0x85,0xa8] +0xf0,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, -4.0, s2 ; encoding: [0xf7,0x02,0x85,0xa8] +0xf7,0x02,0x85,0xa8 + +# GFX12: s_minimum_f16 s5, 0xfe0b, s2 ; encoding: [0xff,0x02,0x85,0xa8,0x0b,0xfe,0x00,0x00] +0xff,0x02,0x85,0xa8,0x0b,0xfe,0x00,0x00 + +# GFX12: s_minimum_f16 s5, 0x3456, s2 ; encoding: [0xff,0x02,0x85,0xa8,0x56,0x34,0x00,0x00] +0xff,0x02,0x85,0xa8,0x56,0x34,0x00,0x00 + +# GFX12: s_minimum_f16 s5, s1, s105 ; encoding: [0x01,0x69,0x85,0xa8] +0x01,0x69,0x85,0xa8 + # GFX12: s_absdiff_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x83] 0x01,0x02,0x7f,0x83 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index 7f4f142d5bc1c..8ff8c2c4c4f6a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -5495,3 +5495,632 @@ # GFX12: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX12: v_minimum_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x65,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x65,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf + +# GFX12: v_minimum_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x65,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x65,0xd7,0xc1,0xfe,0x00,0x00 + +# GFX12: v_minimum_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x65,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x65,0xd7,0xfd,0xd4,0x00,0x20 + +# GFX12: v_minimum_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x65,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x65,0xd7,0xf0,0xfa,0x00,0x40 + +# GFX12: v_minimum_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x65,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x65,0xd7,0x7e,0x82,0x01,0x00 + +# GFX12: v_minimum_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x65,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x65,0xd7,0x7d,0xe0,0x01,0x00 + +# GFX12: v_minimum_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x65,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x65,0xd7,0x7c,0xfc,0x00,0x00 + +# GFX12: v_minimum_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x65,0xd7,0x01,0x04,0x00,0x00 + +# GFX12: v_minimum_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x65,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x65,0xd7,0x69,0xd2,0x00,0x00 + +# GFX12: v_minimum_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x65,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x65,0xd7,0x7b,0xfa,0x01,0x00 + +# GFX12: v_minimum_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00 + +# GFX12: v_minimum_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x65,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x65,0xd7,0xff,0xff,0x03,0x00 + +# GFX12: v_minimum_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x65,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x65,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX12: v_minimum_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x65,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x65,0xd7,0x6a,0xf6,0x00,0x00 + +# GFX12: v_minimum_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x65,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x65,0xd7,0x7f,0xf8,0x00,0x00 + +# GFX12: v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x66,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x66,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf + +# GFX12: v_maximum_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x66,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x66,0xd7,0xc1,0xfe,0x00,0x00 + +# GFX12: v_maximum_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x66,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x66,0xd7,0xfd,0xd4,0x00,0x20 + +# GFX12: v_maximum_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x66,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x66,0xd7,0xf0,0xfa,0x00,0x40 + +# GFX12: v_maximum_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x66,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x66,0xd7,0x7e,0x82,0x01,0x00 + +# GFX12: v_maximum_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x66,0xd7,0x7d,0xe0,0x01,0x00 + +# GFX12: v_maximum_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x66,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x66,0xd7,0x7c,0xfc,0x00,0x00 + +# GFX12: v_maximum_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x66,0xd7,0x01,0x04,0x00,0x00 + +# GFX12: v_maximum_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x66,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x66,0xd7,0x69,0xd2,0x00,0x00 + +# GFX12: v_maximum_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x66,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x66,0xd7,0x7b,0xfa,0x01,0x00 + +# GFX12: v_maximum_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00 + +# GFX12: v_maximum_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x66,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x66,0xd7,0xff,0xff,0x03,0x00 + +# GFX12: v_maximum_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x66,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x66,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX12: v_maximum_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x66,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x66,0xd7,0x6a,0xf6,0x00,0x00 + +# GFX12: v_maximum_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x66,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x66,0xd7,0x7f,0xf8,0x00,0x00 + +# GFX12: v_minimum_f16 v205, v201, v200 ; encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00] +0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00 + +# GFX12: v_minimum_f16 v255, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] +0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00 + +# GFX12: v_minimum_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00 + +# GFX12: v_minimum_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20 + +# GFX12: v_minimum_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40 + +# GFX12: v_minimum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00 + +# GFX12: v_minimum_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00 + +# GFX12: v_minimum_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00 + +# GFX12: v_minimum_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00 + +# GFX12: v_minimum_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00 + +# GFX12: v_minimum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00 + +# GFX12: v_minimum_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00 + +# GFX12: v_minimum_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00 + +# GFX12: v_minimum_f16 v5, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] +0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00 + +# GFX12: v_minimum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00 + +# GFX12: v_minimum_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00 + +# GFX12: v_maximum_f16 v205, v201, v200 ; encoding: [0xcd,0x00,0x68,0xd7,0xc9,0x91,0x03,0x00] +0xcd,0x00,0x68,0xd7,0xc9,0x91,0x03,0x00 + +# GFX12: v_maximum_f16 v255, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x68,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] +0xff,0x03,0x68,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00 + +# GFX12: v_maximum_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00 + +# GFX12: v_maximum_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x68,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x68,0xd7,0xfd,0xd4,0x00,0x20 + +# GFX12: v_maximum_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40 + +# GFX12: v_maximum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00 + +# GFX12: v_maximum_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00 + +# GFX12: v_maximum_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00 + +# GFX12: v_maximum_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00 + +# GFX12: v_maximum_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00 + +# GFX12: v_maximum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00 + +# GFX12: v_maximum_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00 + +# GFX12: v_maximum_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00 + +# GFX12: v_maximum_f16 v5, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] +0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00 + +# GFX12: v_maximum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00 + +# GFX12: v_maximum_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00 + +# GFX12: v_minimum_f64 v[254:255], -|2|, -|vcc| ; encoding: [0xfe,0x03,0x41,0xd7,0x82,0xd4,0x00,0x60] +0xfe,0x03,0x41,0xd7,0x82,0xd4,0x00,0x60 + +# GFX12: v_minimum_f64 v[5:6], -1, exec ; encoding: [0x05,0x00,0x41,0xd7,0xc1,0xfc,0x00,0x00] +0x05,0x00,0x41,0xd7,0xc1,0xfc,0x00,0x00 + +# GFX12: v_minimum_f64 v[5:6], -src_scc, |vcc| ; encoding: [0x05,0x02,0x41,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x41,0xd7,0xfd,0xd4,0x00,0x20 + +# GFX12: v_minimum_f64 v[5:6], 0.5, -vcc ; encoding: [0x05,0x00,0x41,0xd7,0xf0,0xd4,0x00,0x40] +0x05,0x00,0x41,0xd7,0xf0,0xd4,0x00,0x40 + +# GFX12: v_minimum_f64 v[5:6], exec, -1 ; encoding: [0x05,0x00,0x41,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x41,0xd7,0x7e,0x82,0x01,0x00 + +# GFX12: v_minimum_f64 v[5:6], null, exec ; encoding: [0x05,0x00,0x41,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x41,0xd7,0x7c,0xfc,0x00,0x00 + +# GFX12: v_minimum_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x41,0xd7,0x68,0xd0,0x00,0x00] +0x05,0x00,0x41,0xd7,0x68,0xd0,0x00,0x00 + +# GFX12: v_minimum_f64 v[5:6], s[6:7], s[4:5] ; encoding: [0x05,0x00,0x41,0xd7,0x06,0x08,0x00,0x00] +0x05,0x00,0x41,0xd7,0x06,0x08,0x00,0x00 + +# GFX12: v_minimum_f64 v[5:6], ttmp[14:15], src_scc ; encoding: [0x05,0x00,0x41,0xd7,0x7a,0xfa,0x01,0x00] +0x05,0x00,0x41,0xd7,0x7a,0xfa,0x01,0x00 + +# GFX12: v_minimum_f64 v[5:6], v[1:2], v[3:4] ; encoding: [0x05,0x00,0x41,0xd7,0x01,0x07,0x02,0x00] +0x05,0x00,0x41,0xd7,0x01,0x07,0x02,0x00 + +# GFX12: v_minimum_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x41,0xd7,0xfe,0xfd,0x03,0x00] +0x05,0x00,0x41,0xd7,0xfe,0xfd,0x03,0x00 + +# GFX12: v_minimum_f64 v[5:6], vcc, 0.5 ; encoding: [0x05,0x00,0x41,0xd7,0x6a,0xe0,0x01,0x00] +0x05,0x00,0x41,0xd7,0x6a,0xe0,0x01,0x00 + +# GFX12: v_minimum_f64 v[5:6], vcc, 0xaf121234 ; encoding: [0x05,0x00,0x41,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] +0x05,0x00,0x41,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf + +# GFX12: v_minimum_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x41,0xd7,0x6a,0xf4,0x00,0x00] +0x05,0x00,0x41,0xd7,0x6a,0xf4,0x00,0x00 + +# GFX12: v_minimum_f64 v[5:6], |exec|, null ; encoding: [0x05,0x01,0x41,0xd7,0x7e,0xf8,0x00,0x00] +0x05,0x01,0x41,0xd7,0x7e,0xf8,0x00,0x00 + +# GFX12: v_maximum_f64 v[254:255], -|2|, -|vcc| ; encoding: [0xfe,0x03,0x42,0xd7,0x82,0xd4,0x00,0x60] +0xfe,0x03,0x42,0xd7,0x82,0xd4,0x00,0x60 + +# GFX12: v_maximum_f64 v[5:6], -1, exec ; encoding: [0x05,0x00,0x42,0xd7,0xc1,0xfc,0x00,0x00] +0x05,0x00,0x42,0xd7,0xc1,0xfc,0x00,0x00 + +# GFX12: v_maximum_f64 v[5:6], -src_scc, |vcc| ; encoding: [0x05,0x02,0x42,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x42,0xd7,0xfd,0xd4,0x00,0x20 + +# GFX12: v_maximum_f64 v[5:6], 0.5, -vcc ; encoding: [0x05,0x00,0x42,0xd7,0xf0,0xd4,0x00,0x40] +0x05,0x00,0x42,0xd7,0xf0,0xd4,0x00,0x40 + +# GFX12: v_maximum_f64 v[5:6], exec, -1 ; encoding: [0x05,0x00,0x42,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x42,0xd7,0x7e,0x82,0x01,0x00 + +# GFX12: v_maximum_f64 v[5:6], null, exec ; encoding: [0x05,0x00,0x42,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x42,0xd7,0x7c,0xfc,0x00,0x00 + +# GFX12: v_maximum_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x42,0xd7,0x68,0xd0,0x00,0x00] +0x05,0x00,0x42,0xd7,0x68,0xd0,0x00,0x00 + +# GFX12: v_maximum_f64 v[5:6], s[6:7], s[4:5] ; encoding: [0x05,0x00,0x42,0xd7,0x06,0x08,0x00,0x00] +0x05,0x00,0x42,0xd7,0x06,0x08,0x00,0x00 + +# GFX12: v_maximum_f64 v[5:6], ttmp[14:15], src_scc ; encoding: [0x05,0x00,0x42,0xd7,0x7a,0xfa,0x01,0x00] +0x05,0x00,0x42,0xd7,0x7a,0xfa,0x01,0x00 + +# GFX12: v_maximum_f64 v[5:6], v[1:2], v[3:4] ; encoding: [0x05,0x00,0x42,0xd7,0x01,0x07,0x02,0x00] +0x05,0x00,0x42,0xd7,0x01,0x07,0x02,0x00 + +# GFX12: v_maximum_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x42,0xd7,0xfe,0xfd,0x03,0x00] +0x05,0x00,0x42,0xd7,0xfe,0xfd,0x03,0x00 + +# GFX12: v_maximum_f64 v[5:6], vcc, 0.5 ; encoding: [0x05,0x00,0x42,0xd7,0x6a,0xe0,0x01,0x00] +0x05,0x00,0x42,0xd7,0x6a,0xe0,0x01,0x00 + +# GFX12: v_maximum_f64 v[5:6], vcc, 0xaf121234 ; encoding: [0x05,0x00,0x42,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] +0x05,0x00,0x42,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf + +# GFX12: v_maximum_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x42,0xd7,0x6a,0xf4,0x00,0x00] +0x05,0x00,0x42,0xd7,0x6a,0xf4,0x00,0x00 + +# GFX12: v_maximum_f64 v[5:6], |exec|, null ; encoding: [0x05,0x01,0x42,0xd7,0x7e,0xf8,0x00,0x00] +0x05,0x01,0x42,0xd7,0x7e,0xf8,0x00,0x00 + +# GFX12: v_maximum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x2e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x2e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf + +# GFX12: v_maximum3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x2e,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x2e,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_maximum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x2e,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x2e,0xd6,0xfd,0xd4,0x04,0x33 + +# GFX12: v_maximum3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x2e,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x2e,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_maximum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x2e,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x2e,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_maximum3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x2e,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x2e,0xd6,0xf0,0xfa,0xc0,0x4b + +# GFX12: v_maximum3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x2e,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x2e,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_maximum3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x2e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x2e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf + +# GFX12: v_maximum3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x2e,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x2e,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_maximum3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x2e,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x2e,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_maximum3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x2e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x2e,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_maximum3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x2e,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x2e,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_maximum3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x2e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf + +# GFX12: v_maximum3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x2e,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x2e,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_maximum3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x2e,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x2e,0xd6,0x7e,0x82,0xad,0x01 + +# GFX12: v_minimum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x2d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x2d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf + +# GFX12: v_minimum3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x2d,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x2d,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_minimum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x2d,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x2d,0xd6,0xfd,0xd4,0x04,0x33 + +# GFX12: v_minimum3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x2d,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x2d,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_minimum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x2d,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x2d,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_minimum3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x2d,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x2d,0xd6,0xf0,0xfa,0xc0,0x4b + +# GFX12: v_minimum3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x2d,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x2d,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_minimum3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x2d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x2d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf + +# GFX12: v_minimum3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x2d,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x2d,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_minimum3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x2d,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x2d,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_minimum3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x2d,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x2d,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_minimum3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x2d,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x2d,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_minimum3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x2d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf + +# GFX12: v_minimum3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x2d,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x2d,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_minimum3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x2d,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x2d,0xd6,0x7e,0x82,0xad,0x01 + +# GFX12: v_maximum3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x30,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x30,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_maximum3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x30,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x02,0x30,0xd6,0xfd,0xd4,0x04,0x23 + +# GFX12: v_maximum3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x30,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x30,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_maximum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x30,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x30,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_maximum3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x30,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x00,0x30,0xd6,0xf0,0xfa,0xc0,0x43 + +# GFX12: v_maximum3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x30,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x30,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_maximum3_f16 v5, null, exec_lo, -|0xaf12| ; encoding: [0x05,0x04,0x30,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] +0x05,0x04,0x30,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00 + +# GFX12: v_maximum3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x30,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x30,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_maximum3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x30,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x30,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_maximum3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x30,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x30,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_maximum3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x30,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x30,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_maximum3_f16 v5, vcc_hi, 0xaf12, v255 ; encoding: [0x05,0x00,0x30,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] +0x05,0x00,0x30,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00 + +# GFX12: v_maximum3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x30,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x30,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_maximum3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x30,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x30,0xd6,0x7e,0x82,0xad,0x01 + +# GFX12: v_minimum3_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp ; encoding: [0xff,0x83,0x2f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] +0xff,0x83,0x2f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00 + +# GFX12: v_minimum3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x2f,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x2f,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_minimum3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x2f,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x02,0x2f,0xd6,0xfd,0xd4,0x04,0x23 + +# GFX12: v_minimum3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x2f,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x2f,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_minimum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x2f,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x2f,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_minimum3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x2f,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x00,0x2f,0xd6,0xf0,0xfa,0xc0,0x43 + +# GFX12: v_minimum3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x2f,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x2f,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_minimum3_f16 v5, null, exec_lo, -|0xaf12| ; encoding: [0x05,0x04,0x2f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] +0x05,0x04,0x2f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00 + +# GFX12: v_minimum3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x2f,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x2f,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_minimum3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x2f,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x2f,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_minimum3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x2f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x2f,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_minimum3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x2f,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x2f,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_minimum3_f16 v5, vcc_hi, 0xaf12, v255 ; encoding: [0x05,0x00,0x2f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] +0x05,0x00,0x2f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00 + +# GFX12: v_minimum3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x2f,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x2f,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_minimum3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x2f,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x2f,0xd6,0x7e,0x82,0xad,0x01 + +# GFX12: v_maximumminimum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x6d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf + +# GFX12: v_maximumminimum_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x6d,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x6d,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_maximumminimum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x6d,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x6d,0xd6,0xfd,0xd4,0x04,0x33 + +# GFX12: v_maximumminimum_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x6d,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x6d,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_maximumminimum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x6d,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x6d,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_maximumminimum_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x6d,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x6d,0xd6,0xf0,0xfa,0xc0,0x4b + +# GFX12: v_maximumminimum_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x6d,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x6d,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_maximumminimum_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x6d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x6d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf + +# GFX12: v_maximumminimum_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x6d,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x6d,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_maximumminimum_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x6d,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x6d,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_maximumminimum_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x6d,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x6d,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_maximumminimum_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x6d,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x6d,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_maximumminimum_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x6d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x6d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf + +# GFX12: v_maximumminimum_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x6d,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x6d,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_maximumminimum_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6d,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x6d,0xd6,0x7e,0x82,0xad,0x01 + +# GFX12: v_minimummaximum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x6c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x6c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf + +# GFX12: v_minimummaximum_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x6c,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x6c,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_minimummaximum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x6c,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x6c,0xd6,0xfd,0xd4,0x04,0x33 + +# GFX12: v_minimummaximum_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x6c,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x6c,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_minimummaximum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x6c,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x6c,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_minimummaximum_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x6c,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x6c,0xd6,0xf0,0xfa,0xc0,0x4b + +# GFX12: v_minimummaximum_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x6c,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x6c,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_minimummaximum_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x6c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x6c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf + +# GFX12: v_minimummaximum_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x6c,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x6c,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_minimummaximum_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x6c,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x6c,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_minimummaximum_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x6c,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x6c,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_minimummaximum_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x6c,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x6c,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_minimummaximum_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x6c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x6c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf + +# GFX12: v_minimummaximum_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x6c,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x6c,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_minimummaximum_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6c,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x6c,0xd6,0x7e,0x82,0xad,0x01 + +# GFX12: v_maximumminimum_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp ; encoding: [0xff,0x83,0x6f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] +0xff,0x83,0x6f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00 + +# GFX12: v_maximumminimum_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x6f,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x6f,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_maximumminimum_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x6f,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x02,0x6f,0xd6,0xfd,0xd4,0x04,0x23 + +# GFX12: v_maximumminimum_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x6f,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x6f,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_maximumminimum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x6f,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x6f,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_maximumminimum_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x6f,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x00,0x6f,0xd6,0xf0,0xfa,0xc0,0x43 + +# GFX12: v_maximumminimum_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x6f,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x6f,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_maximumminimum_f16 v5, null, exec_lo, -|0xaf12| ; encoding: [0x05,0x04,0x6f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] +0x05,0x04,0x6f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00 + +# GFX12: v_maximumminimum_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x6f,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x6f,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_maximumminimum_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x6f,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x6f,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_maximumminimum_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x6f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x6f,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_maximumminimum_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x6f,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x6f,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_maximumminimum_f16 v5, vcc_hi, 0xaf12, v255 ; encoding: [0x05,0x00,0x6f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] +0x05,0x00,0x6f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00 + +# GFX12: v_maximumminimum_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x6f,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x6f,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_maximumminimum_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6f,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x6f,0xd6,0x7e,0x82,0xad,0x01 + +# GFX12: v_minimummaximum_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x6e,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x6e,0xd6,0xc1,0xfe,0xf4,0xc3 + +# GFX12: v_minimummaximum_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x6e,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x02,0x6e,0xd6,0xfd,0xd4,0x04,0x23 + +# GFX12: v_minimummaximum_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x6e,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x6e,0xd6,0x7f,0xf8,0xa8,0xa1 + +# GFX12: v_minimummaximum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x6e,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x6e,0xd6,0x7b,0xfa,0xed,0xe1 + +# GFX12: v_minimummaximum_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x6e,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x00,0x6e,0xd6,0xf0,0xfa,0xc0,0x43 + +# GFX12: v_minimummaximum_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x6e,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x6e,0xd6,0x7d,0xe0,0xf5,0x01 + +# GFX12: v_minimummaximum_f16 v5, null, exec_lo, -|0xaf12| ; encoding: [0x05,0x04,0x6e,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] +0x05,0x04,0x6e,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00 + +# GFX12: v_minimummaximum_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x6e,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x6e,0xd6,0x01,0xfe,0xff,0x01 + +# GFX12: v_minimummaximum_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x6e,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x6e,0xd6,0x69,0xd2,0xf8,0x01 + +# GFX12: v_minimummaximum_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x6e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x6e,0xd6,0x01,0x05,0x0e,0x00 + +# GFX12: v_minimummaximum_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x6e,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x6e,0xd6,0xff,0x05,0xa4,0x01 + +# GFX12: v_minimummaximum_f16 v5, vcc_hi, 0xaf12, v255 ; encoding: [0x05,0x00,0x6e,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] +0x05,0x00,0x6e,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00 + +# GFX12: v_minimummaximum_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x6e,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x6e,0xd6,0x6a,0xf6,0x0c,0x04 + +# GFX12: v_minimummaximum_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x6e,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x6e,0xd6,0x7e,0x82,0xad,0x01 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 40204c0dfdf4d..69f61c7eb8030 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4110,3 +4110,507 @@ # GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] 0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 + +# GFX12: v_minimum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 + +# GFX12: v_minimum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX12: v_minimum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 + +# GFX12: v_maximum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 + +# GFX12: v_maximum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX12: v_maximum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 + +# GFX12: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 + +# GFX12: v_minimum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX12: v_minimum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 + +# GFX12: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 + +# GFX12: v_maximum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX12: v_maximum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 + +# GFX12: v_maximum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 + +# GFX12: v_maximum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_maximum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 + +# GFX12: v_minimum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_minimum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 + +# GFX12: v_maximum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_maximum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 + +# GFX12: v_minimum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_minimum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_maximumminimum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_minimummaximum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_maximumminimum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff + +# GFX12: v_minimummaximum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index 904ec7c1347f2..a7f0183016147 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -2629,3 +2629,338 @@ # GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] 0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92 +# GFX12: v_maximum_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x66,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +0xff,0x03,0x66,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 + +# GFX12: v_maximum_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x66,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +0x05,0x02,0x66,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x66,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x66,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +0x05,0x01,0x66,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x65,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +0xff,0x03,0x65,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 + +# GFX12: v_minimum_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x65,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +0x05,0x02,0x65,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x65,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x65,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +0x05,0x01,0x65,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x68,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +0xff,0x03,0x68,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 + +# GFX12: v_maximum_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x68,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +0x05,0x02,0x68,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x68,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x68,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x68,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +0x05,0x01,0x68,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x67,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +0xff,0x03,0x67,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 + +# GFX12: v_minimum_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x67,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +0x05,0x02,0x67,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x67,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x67,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +0x05,0x01,0x67,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x2e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +0xff,0x87,0x2e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 + +# GFX12: v_maximum3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x2e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x2e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x2e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x2e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x2e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +0x05,0x05,0x2e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x2e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +0x05,0x06,0x2e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x2e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x2e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x2e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x2d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +0xff,0x87,0x2d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 + +# GFX12: v_minimum3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x2d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x2d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x2d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x2d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x2d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +0x05,0x05,0x2d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x2d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +0x05,0x06,0x2d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x2d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x2d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x2d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x30,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +0xff,0x87,0x30,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 + +# GFX12: v_maximum3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x30,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x30,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x30,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x30,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x30,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +0x05,0x05,0x30,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x30,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +0x05,0x06,0x30,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x30,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x30,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x30,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x30,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x2f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +0xff,0x87,0x2f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 + +# GFX12: v_minimum3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x2f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x2f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x2f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x2f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x2f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +0x05,0x05,0x2f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +0x05,0x06,0x2f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x2f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x2f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x6d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +0xff,0x87,0x6d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x6d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x6d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x6d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x6d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x6d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +0x05,0x05,0x6d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x6d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +0x05,0x06,0x6d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x6d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x6d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x6d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x6d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x6c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +0xff,0x87,0x6c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x6c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x6c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x6c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x6c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x6c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +0x05,0x05,0x6c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x6c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +0x05,0x06,0x6c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x6c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x6c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x6c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x6c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x6f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +0xff,0x87,0x6f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x6f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x6f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x6f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x6f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x6f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +0x05,0x05,0x6f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +0x05,0x06,0x6f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x6f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x6f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x6f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_maximumminimum_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x6f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x6e,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +0xff,0x87,0x6e,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x6e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +0x05,0x04,0x6e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x6e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +0x05,0x03,0x6e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x6e,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +0x05,0x05,0x6e,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +0x05,0x06,0x6e,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x6e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +0x05,0x02,0x6e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x00,0x6e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x6e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# GFX12: v_minimummaximum_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x6e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +0x05,0x01,0x6e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt index d07516b21cc61..373cd71261449 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt @@ -1251,3 +1251,93 @@ # GFX12: v_pk_sub_u16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] clamp ; encoding: [0xff,0xd0,0x0b,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00] 0xff,0xd0,0x0b,0xcc,0xff,0xd6,0x00,0x08,0x0b,0xfe,0x00,0x00 + +# GFX12: v_pk_maximum_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x1e,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00] +0xff,0xd3,0x1e,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00 + +# GFX12: v_pk_maximum_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x1e,0xcc,0xc1,0xfe,0x00,0x20] +0x05,0x59,0x1e,0xcc,0xc1,0xfe,0x00,0x20 + +# GFX12: v_pk_maximum_f16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x1e,0xcc,0xf0,0xfa,0x00,0x58] +0x05,0x42,0x1e,0xcc,0xf0,0xfa,0x00,0x58 + +# GFX12: v_pk_maximum_f16 v5, exec_hi, null ; encoding: [0x05,0x40,0x1e,0xcc,0x7f,0xf8,0x00,0x18] +0x05,0x40,0x1e,0xcc,0x7f,0xf8,0x00,0x18 + +# GFX12: v_pk_maximum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x1e,0xcc,0x7e,0x82,0x01,0x18] +0x05,0x40,0x1e,0xcc,0x7e,0x82,0x01,0x18 + +# GFX12: v_pk_maximum_f16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x1e,0xcc,0x7d,0xe0,0x01,0x18] +0x05,0x40,0x1e,0xcc,0x7d,0xe0,0x01,0x18 + +# GFX12: v_pk_maximum_f16 v5, null, exec_lo ; encoding: [0x05,0x40,0x1e,0xcc,0x7c,0xfc,0x00,0x18] +0x05,0x40,0x1e,0xcc,0x7c,0xfc,0x00,0x18 + +# GFX12: v_pk_maximum_f16 v5, s1, s2 ; encoding: [0x05,0x40,0x1e,0xcc,0x01,0x04,0x00,0x18] +0x05,0x40,0x1e,0xcc,0x01,0x04,0x00,0x18 + +# GFX12: v_pk_maximum_f16 v5, s105, s105 ; encoding: [0x05,0x40,0x1e,0xcc,0x69,0xd2,0x00,0x18] +0x05,0x40,0x1e,0xcc,0x69,0xd2,0x00,0x18 + +# GFX12: v_pk_maximum_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x1e,0xcc,0xfd,0xd4,0x00,0x10] +0x05,0x48,0x1e,0xcc,0xfd,0xd4,0x00,0x10 + +# GFX12: v_pk_maximum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x1e,0xcc,0x7b,0xfa,0x01,0x18] +0x05,0x40,0x1e,0xcc,0x7b,0xfa,0x01,0x18 + +# GFX12: v_pk_maximum_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x1e,0xcc,0x01,0x05,0x02,0x18] +0x05,0x40,0x1e,0xcc,0x01,0x05,0x02,0x18 + +# GFX12: v_pk_maximum_f16 v5, v255, v255 ; encoding: [0x05,0x40,0x1e,0xcc,0xff,0xff,0x03,0x18] +0x05,0x40,0x1e,0xcc,0xff,0xff,0x03,0x18 + +# GFX12: v_pk_maximum_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x40,0x1e,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00] +0x05,0x40,0x1e,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00 + +# GFX12: v_pk_maximum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x40,0x1e,0xcc,0x6a,0xf6,0x00,0x18] +0x05,0x40,0x1e,0xcc,0x6a,0xf6,0x00,0x18 + +# GFX12: v_pk_minimum_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp ; encoding: [0xff,0xd3,0x1d,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00] +0xff,0xd3,0x1d,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00 + +# GFX12: v_pk_minimum_f16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0] neg_lo:[1,0] neg_hi:[1,0] ; encoding: [0x05,0x59,0x1d,0xcc,0xc1,0xfe,0x00,0x20] +0x05,0x59,0x1d,0xcc,0xc1,0xfe,0x00,0x20 + +# GFX12: v_pk_minimum_f16 v5, 0.5, m0 neg_lo:[0,1] neg_hi:[0,1] ; encoding: [0x05,0x42,0x1d,0xcc,0xf0,0xfa,0x00,0x58] +0x05,0x42,0x1d,0xcc,0xf0,0xfa,0x00,0x58 + +# GFX12: v_pk_minimum_f16 v5, exec_hi, null ; encoding: [0x05,0x40,0x1d,0xcc,0x7f,0xf8,0x00,0x18] +0x05,0x40,0x1d,0xcc,0x7f,0xf8,0x00,0x18 + +# GFX12: v_pk_minimum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x1d,0xcc,0x7e,0x82,0x01,0x18] +0x05,0x40,0x1d,0xcc,0x7e,0x82,0x01,0x18 + +# GFX12: v_pk_minimum_f16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x1d,0xcc,0x7d,0xe0,0x01,0x18] +0x05,0x40,0x1d,0xcc,0x7d,0xe0,0x01,0x18 + +# GFX12: v_pk_minimum_f16 v5, null, exec_lo ; encoding: [0x05,0x40,0x1d,0xcc,0x7c,0xfc,0x00,0x18] +0x05,0x40,0x1d,0xcc,0x7c,0xfc,0x00,0x18 + +# GFX12: v_pk_minimum_f16 v5, s1, s2 ; encoding: [0x05,0x40,0x1d,0xcc,0x01,0x04,0x00,0x18] +0x05,0x40,0x1d,0xcc,0x01,0x04,0x00,0x18 + +# GFX12: v_pk_minimum_f16 v5, s105, s105 ; encoding: [0x05,0x40,0x1d,0xcc,0x69,0xd2,0x00,0x18] +0x05,0x40,0x1d,0xcc,0x69,0xd2,0x00,0x18 + +# GFX12: v_pk_minimum_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x1d,0xcc,0xfd,0xd4,0x00,0x10] +0x05,0x48,0x1d,0xcc,0xfd,0xd4,0x00,0x10 + +# GFX12: v_pk_minimum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x1d,0xcc,0x7b,0xfa,0x01,0x18] +0x05,0x40,0x1d,0xcc,0x7b,0xfa,0x01,0x18 + +# GFX12: v_pk_minimum_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x1d,0xcc,0x01,0x05,0x02,0x18] +0x05,0x40,0x1d,0xcc,0x01,0x05,0x02,0x18 + +# GFX12: v_pk_minimum_f16 v5, v255, v255 ; encoding: [0x05,0x40,0x1d,0xcc,0xff,0xff,0x03,0x18] +0x05,0x40,0x1d,0xcc,0xff,0xff,0x03,0x18 + +# GFX12: v_pk_minimum_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x40,0x1d,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00] +0x05,0x40,0x1d,0xcc,0x6b,0xfe,0x01,0x18,0x0b,0xfe,0x00,0x00 + +# GFX12: v_pk_minimum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x40,0x1d,0xcc,0x6a,0xf6,0x00,0x18] +0x05,0x40,0x1d,0xcc,0x6a,0xf6,0x00,0x18