diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 225e781588668..a94da992b3385 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -314,8 +314,9 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); } - bool isRegOrInlineImmWithFP16InputMods() const { - return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); + template bool isRegOrInlineImmWithFP16InputMods() const { + return isRegOrInline( + IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16); } bool isRegOrInlineImmWithFP32InputMods() const { @@ -8151,7 +8152,7 @@ ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { // Determines which bit DST_OP_SEL occupies in the op_sel operand according to // the number of src operands present, then copies that bit into src0_modifiers. -void cvtVOP3DstOpSelOnly(MCInst &Inst) { +static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) { int Opc = Inst.getOpcode(); int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); if (OpSelIdx == -1) @@ -8168,23 +8169,34 @@ void cvtVOP3DstOpSelOnly(MCInst &Inst) { unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); - if ((OpSel & (1 << SrcNum)) != 0) { - int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); - uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); - Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); + int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); + if (DstIdx == -1) + return; + + const MCOperand &DstOp = Inst.getOperand(DstIdx); + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + if (DstOp.isReg() && + MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) { + if (AMDGPU::isHi(DstOp.getReg(), MRI)) + ModVal |= SISrcMods::DST_OP_SEL; + } else { + if ((OpSel & (1 << SrcNum)) != 0) + ModVal |= SISrcMods::DST_OP_SEL; } + Inst.getOperand(ModIdx).setImm(ModVal); } void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { cvtVOP3P(Inst, Operands); - cvtVOP3DstOpSelOnly(Inst); + cvtVOP3DstOpSelOnly(Inst, *getMRI()); } void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx) { cvtVOP3P(Inst, Operands, OptionalIdx); - cvtVOP3DstOpSelOnly(Inst); + cvtVOP3DstOpSelOnly(Inst, *getMRI()); } static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { @@ -8433,8 +8445,17 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, uint32_t ModVal = 0; - if ((OpSel & (1 << J)) != 0) - ModVal |= SISrcMods::OP_SEL_0; + const MCOperand &SrcOp = Inst.getOperand(OpIdx); + if (SrcOp.isReg() && getMRI() + ->getRegClass(AMDGPU::VGPR_16RegClassID) + .contains(SrcOp.getReg())) { + bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI()); + if (VGPRSuffixIsHi) + ModVal |= SISrcMods::OP_SEL_0; + } else { + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + } if ((OpSelHi & (1 << J)) != 0) ModVal |= SISrcMods::OP_SEL_1; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index fba9eb53c8a8b..85377d07c52dc 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -913,6 +913,41 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI, return Modifiers; } +// Instructions decode the op_sel/suffix bits into the src_modifier +// operands. Copy those bits into the src operands for true16 VGPRs. +void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const { + const unsigned Opc = MI.getOpcode(); + const MCRegisterClass &ConversionRC = + MRI.getRegClass(AMDGPU::VGPR_16RegClassID); + constexpr std::array, 4> OpAndOpMods = { + {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers, + SISrcMods::OP_SEL_0}, + {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers, + SISrcMods::OP_SEL_0}, + {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers, + SISrcMods::OP_SEL_0}, + {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers, + SISrcMods::DST_OP_SEL}}}; + for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); + int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName); + if (OpIdx == -1 || OpModsIdx == -1) + continue; + MCOperand &Op = MI.getOperand(OpIdx); + if (!Op.isReg()) + continue; + if (!ConversionRC.contains(Op.getReg())) + continue; + unsigned OpEnc = MRI.getEncodingValue(Op.getReg()); + const MCOperand &OpMods = MI.getOperand(OpModsIdx); + unsigned ModVal = OpMods.getImm(); + if (ModVal & OpSelMask) { // isHi + unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK; + Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1)); + } + } +} + // MAC opcodes have special old and src2 operands. // src2 is tied to dst, while old is not tied (but assumed to be). bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const { @@ -968,6 +1003,7 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { + convertTrue16OpSel(MI); auto Mods = collectVOPModifiers(MI); insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); @@ -991,6 +1027,8 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { if (isMacDPP(MI)) convertMacDPPInst(MI); + convertTrue16OpSel(MI); + int VDstInIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); if (VDstInIdx != -1) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 5a89b30f6fb36..02feaf553c0c4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -203,6 +203,7 @@ class AMDGPUDisassembler : public MCDisassembler { DecodeStatus convertVOP3PDPPInst(MCInst &MI) const; DecodeStatus convertVOPCDPPInst(MCInst &MI) const; void convertMacDPPInst(MCInst &MI) const; + void convertTrue16OpSel(MCInst &MI) const; enum OpWidthTy { OPW32, diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index a812cdc61500c..8bf05682cbe7e 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -756,14 +756,14 @@ void SIFoldOperands::foldOperand( int UseOpIdx, SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const { - const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); + const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx); - if (!isUseSafeToFold(*UseMI, UseOp)) + if (!isUseSafeToFold(*UseMI, *UseOp)) return; // FIXME: Fold operands with subregs. - if (UseOp.isReg() && OpToFold.isReg() && - (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)) + if (UseOp->isReg() && OpToFold.isReg() && + (UseOp->isImplicit() || UseOp->getSubReg() != AMDGPU::NoSubRegister)) return; // Special case for REG_SEQUENCE: We can't fold literals into @@ -859,7 +859,6 @@ void SIFoldOperands::foldOperand( if (MovOp == AMDGPU::COPY) return; - UseMI->setDesc(TII->get(MovOp)); MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin(); MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end(); while (ImpOpI != ImpOpE) { @@ -867,6 +866,19 @@ void SIFoldOperands::foldOperand( ImpOpI++; UseMI->removeOperand(UseMI->getOperandNo(Tmp)); } + UseMI->setDesc(TII->get(MovOp)); + + if (MovOp == AMDGPU::V_MOV_B16_t16_e64) { + const auto &SrcOp = UseMI->getOperand(UseOpIdx); + MachineOperand NewSrcOp(SrcOp); + MachineFunction *MF = UseMI->getParent()->getParent(); + UseMI->removeOperand(1); + UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers + UseMI->addOperand(NewSrcOp); // src0 + UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel + UseOpIdx = 2; + UseOp = &UseMI->getOperand(UseOpIdx); + } CopiesToReplace.push_back(UseMI); } else { if (UseMI->isCopy() && OpToFold.isReg() && @@ -1027,7 +1039,7 @@ void SIFoldOperands::foldOperand( // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. - if (UseDesc.isVariadic() || UseOp.isImplicit() || + if (UseDesc.isVariadic() || UseOp->isImplicit() || UseDesc.operands()[UseOpIdx].RegClass == -1) return; } @@ -1062,17 +1074,17 @@ void SIFoldOperands::foldOperand( TRI->getRegClass(FoldDesc.operands()[0].RegClass); // Split 64-bit constants into 32-bits for folding. - if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) { - Register UseReg = UseOp.getReg(); + if (UseOp->getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) { + Register UseReg = UseOp->getReg(); const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg); if (AMDGPU::getRegBitWidth(*UseRC) != 64) return; APInt Imm(64, OpToFold.getImm()); - if (UseOp.getSubReg() == AMDGPU::sub0) { + if (UseOp->getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { - assert(UseOp.getSubReg() == AMDGPU::sub1); + assert(UseOp->getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 7edec5a7a5505..22599773d562c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1148,7 +1148,13 @@ def FPT16InputModsMatchClass : FPInputModsMatchClass<16> { def FP32InputModsMatchClass : FPInputModsMatchClass<32>; def FP64InputModsMatchClass : FPInputModsMatchClass<64>; -def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; +class FP16VCSrcInputModsMatchClass + : FPVCSrcInputModsMatchClass<16> { + let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods", + "RegOrInlineImmWithFPT16InputMods"); + let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" # + !if(IsFake16, "true", "false") # ">"; +} def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; class InputMods : Operand { @@ -1166,7 +1172,8 @@ def FPT16InputMods : FPInputMods; def FP32InputMods : FPInputMods; def FP64InputMods : FPInputMods; -def FP16VCSrcInputMods : FPInputMods; +class FP16VCSrcInputMods + : FPInputMods>; def FP32VCSrcInputMods : FPInputMods; class IntInputModsMatchClass : AsmOperandClass { @@ -1653,11 +1660,11 @@ class getSrcModDPP_t16 { } // Return type of input modifiers operand for specified input operand for DPP -class getSrcModVOP3DPP { +class getSrcModVOP3DPP { Operand ret = !if (VT.isFP, !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), - FP16VCSrcInputMods, FP32VCSrcInputMods), + FP16VCSrcInputMods, FP32VCSrcInputMods), Int32VCSrcInputMods); } @@ -2450,6 +2457,10 @@ class VOP_PAT_GEN : VOPProfile : VOPProfile { let IsTrue16 = 1; let IsRealTrue16 = 1; + + let HasOpSel = 1; + let HasModifiers = 1; // All instructions at least have OpSel. + // Most DstVT are 16-bit, but not all. let DstRC = getVALUDstForVT.ret; let DstRC64 = getVALUDstForVT.ret; @@ -2461,6 +2472,10 @@ class VOPProfile_True16 : VOPProfile { let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; + let Src0VOP3DPP = VGPRSrc_16; + let Src0ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; let DstRC64 = getVALUDstForVT.ret; let Src0RC64 = getVOP3SrcForVT.ret; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index c9dbe02037ef2..aabb6c2906211 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1235,6 +1235,12 @@ def VGPRSrc_16_Lo128 : RegisterOperand { let EncoderMethod = "getMachineOpValueT16Lo128"; } +// True 16 operands. +def VGPRSrc_16 : RegisterOperand { + let DecoderMethod = "DecodeVGPR_16RegisterClass"; + let EncoderMethod = "getMachineOpValueT16"; +} + //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir index 84da311108ce3..014534ab79fe6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir @@ -50,7 +50,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]] - ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]] ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]] ; @@ -88,7 +88,7 @@ body: | ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]] ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; @@ -127,7 +127,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]] - ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]] ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir index 30975a8937db6..dcf9e169f586b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir @@ -59,7 +59,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]] - ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]] ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]] ; @@ -97,7 +97,7 @@ body: | ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]] ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; @@ -136,7 +136,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]] - ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]] ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]] ; diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir index 7767aa54c8151..9ae5f559e860a 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir @@ -66,7 +66,7 @@ body: | ; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec ; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec + ; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec ; ; FAKE16-LABEL: name: ceil_f16 ; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF @@ -87,7 +87,7 @@ body: | ; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec ; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec + ; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec ; ; FAKE16-LABEL: name: floor_f16 ; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1-fake16.s new file mode 100644 index 0000000000000..1871a41ec5983 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1-fake16.s @@ -0,0 +1,85 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] + +v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_floor_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s index 9a65c6687f3f8..701a72597e45d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] @@ -42,46 +42,52 @@ v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 f v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] -v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +v_ceil_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +v_ceil_f16_e64_dpp v5.l, v1.h quad_perm:[3,2,1,0] +// GFX11: [0x05,0x08,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ceil_f16_e64_dpp v5.h, v1.l quad_perm:[3,2,1,0] +// GFX11: [0x05,0x40,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ceil_f16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -v_ceil_f16_e64_dpp v5, v1 row_mirror +v_ceil_f16_e64_dpp v5.l, v1.l row_mirror // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_half_mirror +v_ceil_f16_e64_dpp v5.l, v1.l row_half_mirror // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_shl:1 +v_ceil_f16_e64_dpp v5.l, v1.l row_shl:1 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_shl:15 +v_ceil_f16_e64_dpp v5.l, v1.l row_shl:15 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_shr:1 +v_ceil_f16_e64_dpp v5.l, v1.l row_shr:1 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_shr:15 +v_ceil_f16_e64_dpp v5.l, v1.l row_shr:15 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_ror:1 +v_ceil_f16_e64_dpp v5.l, v1.l row_ror:1 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_ror:15 +v_ceil_f16_e64_dpp v5.l, v1.l row_ror:15 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +v_ceil_f16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +v_ceil_f16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_ceil_f16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] -v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_ceil_f16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] @@ -1512,46 +1518,46 @@ v_ffbl_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi v_ffbl_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] -v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +v_floor_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +v_floor_f16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -v_floor_f16_e64_dpp v5, v1 row_mirror +v_floor_f16_e64_dpp v5.l, v1.l row_mirror // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_half_mirror +v_floor_f16_e64_dpp v5.l, v1.l row_half_mirror // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_shl:1 +v_floor_f16_e64_dpp v5.l, v1.l row_shl:1 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_shl:15 +v_floor_f16_e64_dpp v5.l, v1.l row_shl:15 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_shr:1 +v_floor_f16_e64_dpp v5.l, v1.l row_shr:1 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_shr:15 +v_floor_f16_e64_dpp v5.l, v1.l row_shr:15 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_ror:1 +v_floor_f16_e64_dpp v5.l, v1.l row_ror:1 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_ror:15 +v_floor_f16_e64_dpp v5.l, v1.l row_ror:15 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +v_floor_f16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +v_floor_f16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_floor_f16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] -v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_floor_f16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1-fake16.s new file mode 100644 index 0000000000000..1bef1fe215acd --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1-fake16.s @@ -0,0 +1,25 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s index 3897b82785f65..043e0f9334ad8 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] @@ -9,16 +9,22 @@ v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +v_ceil_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +v_ceil_f16_e64_dpp v5.l, v1.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5.h, v1.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x40,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_ceil_f16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: [0x05,0x00,0xdc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_ceil_f16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x81,0xdc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] @@ -375,16 +381,16 @@ v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_ffbl_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +v_floor_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +v_floor_f16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_floor_f16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: [0x05,0x00,0xdb,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_floor_f16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x81,0xdb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt index cf29efa5ff56b..fe5084539aede 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt @@ -1,4 +1,5 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s # GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff @@ -42,48 +43,74 @@ # GFX11: v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x48,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x48,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] 0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 -# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.h op_sel:[1,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x08,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11-REAL16: v_ceil_f16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11-REAL16: v_ceil_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0xff,0xc1,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + # GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff @@ -1302,48 +1329,74 @@ # GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 -# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x48,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x48,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] 0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 -# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.h op_sel:[1,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x08,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11-REAL16: v_floor_f16_e64_dpp v255.l, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11-REAL16: v_floor_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0xff,0xc1,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + # GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt index bfda6d10c2f6d..c1b500e495fe7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt @@ -1,4 +1,5 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s # GFX11: v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 @@ -6,18 +7,34 @@ # GFX11: v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] 0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x48,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] 0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 -# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX11-REAL16: v_ceil_f16_e64_dpp v5.l, v1.h op_sel:[1,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x08,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11-REAL16: v_ceil_f16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_ceil_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0xff,0xc1,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + # GFX11: v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 @@ -288,18 +305,34 @@ # GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 -# GFX11: v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] 0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x48,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] 0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 -# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX11-REAL16: v_floor_f16_e64_dpp v5.l, v1.h op_sel:[1,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0x05,0x08,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11-REAL16: v_floor_f16_e64_dpp v255.l, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_floor_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# COM: GFX11-FAKE16: warning: invalid instruction encoding +0xff,0xc1,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + # GFX11: v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05