diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 3866723521147..c4ec7a7befd47 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -172,6 +172,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { ImmTyWaitEXP, ImmTyWaitVAVDst, ImmTyWaitVMVSrc, + ImmTyByteSel, }; // Immediate operand kind. @@ -410,6 +411,9 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } bool isNegLo() const { return isImmTy(ImmTyNegLo); } bool isNegHi() const { return isImmTy(ImmTyNegHi); } + bool isByteSel() const { + return isImmTy(ImmTyByteSel) && isUInt<2>(getImm()); + } bool isRegOrImm() const { return isReg() || isImm(); @@ -1139,6 +1143,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { case ImmTyWaitEXP: OS << "WaitEXP"; break; case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; + case ImmTyByteSel: OS << "ByteSel" ; break; } // clang-format on } @@ -8644,6 +8649,13 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, } } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) { + assert(AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)); + Inst.addOperand(Inst.getOperand(0)); + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyByteSel); + } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); @@ -8680,8 +8692,8 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || - Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 || - Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) { + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods Inst.addOperand(Inst.getOperand(0)); } @@ -8692,7 +8704,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || - Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) { + Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); } @@ -9207,10 +9223,11 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, Inst.addOperand(Inst.getOperand(0)); } - bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || - Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 || - Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || - Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12; + bool IsVOP3CvtSrDpp = + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12; if (IsVOP3CvtSrDpp) { if (Src2ModIdx == static_cast(Inst.getNumOperands())) { Inst.addOperand(MCOperand::createImm(0)); @@ -9243,6 +9260,11 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, llvm_unreachable("unhandled operand type"); } } + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyByteSel); + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index dc1bf92771b44..8fd36b84a00cd 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -869,10 +869,6 @@ void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { if (VDstInIdx != -1) insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); - if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || - MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12) - insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); - unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { @@ -902,10 +898,6 @@ void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { if (VDstInIdx != -1) insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); - if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || - MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12) - insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); - unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 5090b0a07da4b..91733c2933b4c 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -409,6 +409,11 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) { DPPInst.addImm(NegHiOpr->getImm()); } + auto *ByteSelOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::byte_sel); + if (ByteSelOpr && + AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::byte_sel)) { + DPPInst.addImm(ByteSelOpr->getImm()); + } } DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl)); DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask)); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index b6a95906bc45c..883b6c4407fe5 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1806,4 +1806,14 @@ void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo, O << ' ' << formatDec(Imm); } +void AMDGPUInstPrinter::printByteSel(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint8_t Imm = MI->getOperand(OpNo).getImm(); + if (!Imm) + return; + + O << " byte_sel:" << formatDec(Imm); +} + #include "AMDGPUGenAsmWriter.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index c801eaf1111e2..d6d7fd34b68cc 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -186,6 +186,8 @@ class AMDGPUInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); void printExpTgt(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printByteSel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); public: static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f1afbcc060b26..bf6cfe90ebfbe 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1123,6 +1123,8 @@ def WaitEXP : NamedIntOperand; def WaitVAVDst : NamedIntOperand; def WaitVMVSrc : NamedIntOperand; +def ByteSel : NamedIntOperand; + class KImmFPOperand : ImmOperand { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_KIMM"#vt.Size; @@ -1700,9 +1702,9 @@ class getIns64 { + bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32, + bit HasByteSel = 0> { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), "$sdst", @@ -2058,6 +2061,7 @@ class getAsmVOP3Base _ArgVT, bit _EnableClamp = 0> { field bit IsSWMMAC = 0; field bit IsFP8 = 0; + field bit IsFP8DstByteSel = 0; field bit HasDst = !ne(DstVT.Value, untyped.Value); field bit HasDst32 = HasDst; @@ -2401,7 +2406,7 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field string AsmDPP8 = getAsmDPP8.ret; field string AsmVOP3Base = getAsmVOP3Base.ret; + HasModifiers, DstVT, IsFP8DstByteSel>.ret; field string Asm64 = AsmVOP3Base; field string AsmVOP3P = getAsmVOP3P.ret; field string AsmVOP3OpSel = getAsmVOP3OpSel, HasSrc2FloatMods>.ret>.ret); } +class VOP3_CVT_SR_F8_ByteSel_Profile : + VOP3_Profile> { + let IsFP8DstByteSel = 1; + let HasClamp = 0; + defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel); + let Ins64 = !con(getIns64.ret, + bytesel); + let InsVOP3Base = !con( + getInsVOP3Base.ret, + bytesel); +} + def IsPow2Plus1: PatLeaf<(i32 imm), [{ uint32_t V = N->getZExtValue(); return isPowerOf2_32(V - 1); @@ -645,12 +661,17 @@ let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>; defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>; + + let SubtargetPredicate = isGFX12Plus in { + defm V_CVT_SR_FP8_F32_gfx12 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile>; + defm V_CVT_SR_BF8_F32_gfx12 : VOP3Inst<"v_cvt_sr_bf8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile>; + } } // These instructions have non-standard use of op_sel. In particular they are // using op_sel bits 2 and 3 while only having two sources. Therefore dummy // src2 is used to hold the op_sel value. - let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in { + let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX940Plus in { defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; } @@ -667,15 +688,28 @@ class Cvt_SR_F8_F32_Pat index, VOP3_Pseudo inst> !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0) >; +class Cvt_SR_F8_ByteSel_Pat : GCNPat< + (i32 (node (VOP3Mods SrcVT:$src0, i32:$src0_modifiers), (VOP3Mods i32:$src1, i32:$src1_modifiers), + i32:$old, timm:$byte_sel)), + (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $old, (as_i32timm $byte_sel)) +>; + let OtherPredicates = [HasFP8ConversionInsts] in { foreach Index = [0, -1] in { def : Cvt_PK_F8_F32_Pat; def : Cvt_PK_F8_F32_Pat; } -foreach Index = [0, 1, 2, 3] in { - def : Cvt_SR_F8_F32_Pat; - def : Cvt_SR_F8_F32_Pat; +let SubtargetPredicate = isGFX940Plus in { + foreach Index = [0, 1, 2, 3] in { + def : Cvt_SR_F8_F32_Pat; + def : Cvt_SR_F8_F32_Pat; + } +} + +let SubtargetPredicate = isGFX12Plus in { + def : Cvt_SR_F8_ByteSel_Pat; + def : Cvt_SR_F8_ByteSel_Pat; } } @@ -1040,8 +1074,8 @@ defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>; defm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_gfx12<0x369>; defm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36a>; -defm V_CVT_SR_FP8_F32 : VOP3Only_Realtriple_gfx12<0x36b>; -defm V_CVT_SR_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36c>; +defm V_CVT_SR_FP8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36b, "V_CVT_SR_FP8_F32_gfx12", "v_cvt_sr_fp8_f32" >; +defm V_CVT_SR_BF8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36c, "V_CVT_SR_BF8_F32_gfx12", "v_cvt_sr_bf8_f32">; //===----------------------------------------------------------------------===// // GFX11, GFX12 diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index da16178cb58bd..7cdb5cbfe297d 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -311,6 +311,14 @@ class VOP3FP8OpSel_gfx11_gfx12 op, VOPProfile p> : VOP3e_gfx10 { let Inst{12} = !if(p.HasSrc0, src0_modifiers{3}, 0); } + class VOP3FP8OpSel_dst_bytesel_gfx11_gfx12 op, VOPProfile p> : VOP3e_gfx10 { + bits<2> byte_sel; + + let Inst{11} = 0; // op_sel0 + let Inst{12} = 0; // op_sel1 + let Inst{14-13} = byte_sel; // op_sel2/3 + } + class VOP3DotOpSel_gfx11_gfx12 op, VOPProfile p> : VOP3OpSel_gfx11_gfx12{ let Inst{11} = ?; let Inst{12} = ?; @@ -741,6 +749,7 @@ class VOP3_DPPe_Common_Base op, VOPProfile P> : Enc96 { bits<3> src2_modifiers; bits<1> clamp; bits<2> omod; + bits<2> byte_sel; let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); @@ -748,8 +757,8 @@ class VOP3_DPPe_Common_Base op, VOPProfile P> : Enc96 { // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs. let Inst{11} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{2}, 0),?); let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, !if((P.IsFP8), src0_modifiers{3}, 0)), ?); - let Inst{13} = !if(P.HasOpSel,!if(P.HasSrc2Mods, src2_modifiers{2}, 0),?); - let Inst{14} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{3}, 0),?); + let Inst{13} = !if(P.HasOpSel,!if(P.HasSrc2Mods, src2_modifiers{2}, 0),!if(P.IsFP8DstByteSel, byte_sel{0}, ?)); + let Inst{14} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{3}, 0),!if(P.IsFP8DstByteSel, byte_sel{1}, ?)); let Inst{15} = !if(P.HasClamp, clamp, 0); let Inst{25-16} = op; let Inst{31-26} = 0x35; @@ -1388,7 +1397,11 @@ multiclass VOP3_Real_Base op, string opName = NAME, bit isSingle = 0> { defvar ps = !cast(opName#"_e64"); let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { - if ps.Pfl.HasOpSel then { + if ps.Pfl.IsFP8DstByteSel then { + def _e64#Gen.Suffix : + VOP3_Real_Gen, + VOP3FP8OpSel_dst_bytesel_gfx11_gfx12; + } if ps.Pfl.HasOpSel then { def _e64#Gen.Suffix : VOP3_Real_Gen, VOP3OpSel_gfx11_gfx12; @@ -1419,6 +1432,10 @@ multiclass VOP3_Real_with_name op, string opName, def _e64#Gen.Suffix : VOP3_Real_Gen, VOP3FP8OpSel_gfx11_gfx12; + } else if ps.Pfl.IsFP8DstByteSel then { + def _e64#Gen.Suffix : + VOP3_Real_Gen, + VOP3FP8OpSel_dst_bytesel_gfx11_gfx12; } else if ps.Pfl.HasOpSel then { def _e64#Gen.Suffix : VOP3_Real_Gen, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll index e21d61036375a..ffedde9416bb2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll @@ -97,9 +97,7 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) { ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1: ; GFX12: ; %bb.0: -; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: global_store_b32 v[3:4], v2, off ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -114,9 +112,7 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) { ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2: ; GFX12: ; %bb.0: -; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1] +; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX12-NEXT: global_store_b32 v[3:4], v2, off ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll index 9b8fdf9017045..7662a3b78dea2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll @@ -385,7 +385,7 @@ define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -409,7 +409,7 @@ define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1] +; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:2 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -433,7 +433,7 @@ define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1] +; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:3 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -479,7 +479,7 @@ define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -503,7 +503,7 @@ define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1] +; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:2 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -527,7 +527,7 @@ define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1] +; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:3 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index be9edc3e019e3..b0854881d4287 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -1126,6 +1126,18 @@ v_cvt_sr_fp8_f32 v10, s2, v5 v_cvt_sr_fp8_f32 v5, -|v255|, v4 // GFX12: encoding: [0x05,0x01,0x6b,0xd7,0xff,0x09,0x02,0x20] +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6b,0xd7,0x02,0x07,0x02,0x00] + v_cvt_sr_bf8_f32 v1, v2, v3 // GFX12: encoding: [0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00] @@ -1135,6 +1147,18 @@ v_cvt_sr_bf8_f32 v10, s2, v5 v_cvt_sr_bf8_f32 v5, -|v255|, v4 // GFX12: encoding: [0x05,0x01,0x6c,0xd7,0xff,0x09,0x02,0x20] +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6c,0xd7,0x02,0x07,0x02,0x00] + v_cvt_pk_i16_f32 v5, v1, v2 // GFX12: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index d0e309adce410..16cd8d5aa5e9d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1192,6 +1192,18 @@ v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 // GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd // GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] @@ -1219,6 +1231,18 @@ v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 // GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 25b13ac62e4a4..d6ef14cff5fa8 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -698,6 +698,18 @@ v_cvt_sr_fp8_f32_e64_dpp v5, -v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cvt_sr_fp8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] // GFX12: encoding: [0xff,0x01,0x6b,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: encoding: [0x05,0x00,0x6c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -710,6 +722,18 @@ v_cvt_sr_bf8_f32_e64_dpp v5, -v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cvt_sr_bf8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] // GFX12: encoding: [0xff,0x01,0x6c,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_err.s index 55c5fcabea732..31ed577ac0a23 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_err.s @@ -101,3 +101,8 @@ v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 1] // GFX12: error: invalid op_sel operand // GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 1] // GFX12-NEXT:{{^}} ^ + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:4 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-NEXT:{{^}}v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:4 +// GFX12-NEXT:{{^}} ^ diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index 6acaa81527207..2c911777ef97f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1020,6 +1020,15 @@ # GFX12: v_cvt_sr_fp8_f32 v5, -|v255|, v4 ; encoding: [0x05,0x01,0x6b,0xd7,0xff,0x09,0x02,0x20] 0x05,0x01,0x6b,0xd7,0xff,0x09,0x02,0x20 +# GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6b,0xd7,0x02,0x07,0x02,0x00] +0x01,0x20,0x6b,0xd7,0x02,0x07,0x02,0x00 + +# GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6b,0xd7,0x02,0x07,0x02,0x00] +0x01,0x40,0x6b,0xd7,0x02,0x07,0x02,0x00 + +# GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6b,0xd7,0x02,0x07,0x02,0x00] +0x01,0x60,0x6b,0xd7,0x02,0x07,0x02,0x00 + # GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00] 0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00 @@ -1029,6 +1038,15 @@ # GFX12: v_cvt_sr_bf8_f32 v5, -|v255|, v4 ; encoding: [0x05,0x01,0x6c,0xd7,0xff,0x09,0x02,0x20] 0x05,0x01,0x6c,0xd7,0xff,0x09,0x02,0x20 +# GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6c,0xd7,0x02,0x07,0x02,0x00] +0x01,0x20,0x6c,0xd7,0x02,0x07,0x02,0x00 + +# GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6c,0xd7,0x02,0x07,0x02,0x00] +0x01,0x40,0x6c,0xd7,0x02,0x07,0x02,0x00 + +# GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6c,0xd7,0x02,0x07,0x02,0x00] +0x01,0x60,0x6c,0xd7,0x02,0x07,0x02,0x00 + # GFX12: v_cvt_pk_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 0771e6449b62b..f9b6c1b73ddc4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -945,6 +945,15 @@ # GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] 0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff + +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff + +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff + # GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed @@ -972,6 +981,15 @@ # GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] 0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff + +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff + +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff + # GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index a836adafb31e4..eedc6d4910878 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -570,6 +570,15 @@ # GFX12: v_cvt_sr_fp8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x01,0x6b,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] 0xff,0x01,0x6b,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00 +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] +0x01,0x20,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05 + +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] +0x01,0x40,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05 + +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] +0x01,0x60,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05 + # GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x6c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -582,6 +591,15 @@ # GFX12: v_cvt_sr_bf8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x01,0x6c,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] 0xff,0x01,0x6c,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00 +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] +0x01,0x20,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05 + +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] +0x01,0x40,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05 + +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] +0x01,0x60,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05 + # GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05