diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 1e07e8deb560fc..e16fed445b9f96 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2141,9 +2141,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo const_cast(AsmParser)->Warning(Inst.getLoc(), "Can't encode literal as exact 64-bit floating-point operand. " "Low 32-bits will be set to zero"); + Val &= 0xffffffff00000000u; } - Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); + Inst.addOperand(MCOperand::createImm(Val)); setImmKindLiteral(); return; } @@ -2242,7 +2243,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo return; } - Inst.addOperand(MCOperand::createImm(Lo_32(Val))); + Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? Val << 32 : Lo_32(Val); + + Inst.addOperand(MCOperand::createImm(Val)); setImmKindLiteral(); return; @@ -4309,7 +4312,19 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, continue; if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { - uint32_t Value = static_cast(MO.getImm()); + uint64_t Value = static_cast(MO.getImm()); + bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && + AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; + bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); + + if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { + Error(getLitLoc(Operands), "invalid operand for instruction"); + return false; + } + + if (IsFP64 && IsValid32Op) + Value = Hi_32(Value); + if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index a504a5e86760bd..d74fd0b3a9ea74 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -378,6 +378,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); } +static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { + assert(Imm < (1 << 9) && "9-bit encoding"); + auto DAsm = static_cast(Decoder); + return addOperand( + Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true)); +} + static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { @@ -1219,7 +1228,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { return MCOperand::createImm(Literal); } -MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { +MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants @@ -1229,9 +1238,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { Twine(Bytes.size())); } HasLiteral = true; - Literal = eatBytes(Bytes); + Literal = Literal64 = eatBytes(Bytes); + if (ExtendFP64) + Literal64 <<= 32; } - return MCOperand::createImm(Literal); + return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -1448,7 +1459,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral, - unsigned ImmWidth) const { + unsigned ImmWidth, bool IsFP) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1460,13 +1471,15 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, return createRegOperand(IsAGPR ? getAgprClassId(Width) : getVgprClassId(Width), Val - VGPR_MIN); } - return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth); + return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth, + IsFP); } MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral, - unsigned ImmWidth) const { + unsigned ImmWidth, + bool IsFP) const { // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been // decoded earlier. assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); @@ -1494,7 +1507,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, // Keep a sentinel value for deferred setting return MCOperand::createImm(LITERAL_CONST); else - return decodeLiteralConstant(); + return decodeLiteralConstant(IsFP && ImmWidth == 64); } switch (Width) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 5f3b277d577ff7..91b73b593d6161 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -97,6 +97,7 @@ class AMDGPUDisassembler : public MCDisassembler { const unsigned TargetMaxInstBytes; mutable ArrayRef Bytes; mutable uint32_t Literal; + mutable uint64_t Literal64; mutable bool HasLiteral; mutable std::optional EnableWavefrontSize32; @@ -229,15 +230,15 @@ class AMDGPUDisassembler : public MCDisassembler { static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm); MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; - MCOperand decodeLiteralConstant() const; + MCOperand decodeLiteralConstant(bool ExtendFP64) const; MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, - bool MandatoryLiteral = false, - unsigned ImmWidth = 0) const; + bool MandatoryLiteral = false, unsigned ImmWidth = 0, + bool IsFP = false) const; MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral = false, - unsigned ImmWidth = 0) const; + unsigned ImmWidth = 0, bool IsFP = false) const; MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index ad4c48a8d65581..9459ee088dddee 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -426,7 +426,7 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O) { + raw_ostream &O, bool IsFP) { int64_t SImm = static_cast(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -454,7 +454,10 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, else if (Imm == 0x3fc45f306dc9c882 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) O << "0.15915494309189532"; - else { + else if (IsFP) { + assert(AMDGPU::isValid32BitLiteral(Imm, true)); + O << formatHex(static_cast(Hi_32(Imm))); + } else { assert(isUInt<32>(Imm) || isInt<32>(Imm)); // In rare situations, we will have a 32-bit literal in a 64-bit @@ -605,11 +608,13 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, printImmediate32(Op.getImm(), STI, O); break; case AMDGPU::OPERAND_REG_IMM_INT64: - case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: + printImmediate64(Op.getImm(), STI, O, false); + break; + case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - printImmediate64(Op.getImm(), STI, O); + printImmediate64(Op.getImm(), STI, O, true); break; case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: @@ -671,7 +676,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, if (RCBits == 32) printImmediate32(llvm::bit_cast((float)Value), STI, O); else if (RCBits == 64) - printImmediate64(llvm::bit_cast(Value), STI, O); + printImmediate64(llvm::bit_cast(Value), STI, O, true); else llvm_unreachable("Invalid register class size"); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 3b14faab136b35..dc83547a4afe04 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -91,7 +91,7 @@ class AMDGPUInstPrinter : public MCInstPrinter { void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O); + raw_ostream &O, bool IsFP); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printRegularOperand(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index 21243f80e05549..d93f747bf6f0a6 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -411,7 +411,10 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value. llvm_unreachable("Must be immediate or expr"); - support::endian::write(CB, Imm, llvm::endianness::little); + if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64) + Imm = Hi_32(Imm); + + support::endian::write(CB, Imm, support::endianness::little); // Only one literal value allowed break; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index c3c5bfae405aa4..ea06e85fb400c1 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1263,7 +1263,9 @@ def VSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_IMM">; def VSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_IMM">; def VSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_IMM">; def VSrc_b64 : RegOrB64 <"VS_64", "OPERAND_REG_IMM">; -def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM">; +def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrc_f64"; +} def VSrc_v2b32 : RegOrV2B32 <"VS_64", "OPERAND_REG_IMM">; def VSrc_v2f32 : RegOrV2F32 <"VS_64", "OPERAND_REG_IMM">; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6d0ad763d9e6cc..d123b384a27d4c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2519,6 +2519,13 @@ bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { return Lo16 == Hi16; } +bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { + if (IsFP64) + return !(Val & 0xffffffffu); + + return isUInt<32>(Val) || isInt<32>(Val); +} + bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 297a69f54d6372..bb2964f592f66b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1290,6 +1290,9 @@ bool isInlinableIntLiteralV216(int32_t Literal); LLVM_READNONE bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); +LLVM_READNONE +bool isValid32BitLiteral(uint64_t Val, bool IsFP64); + bool isArgPassedInSGPR(const Argument *Arg); bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);