diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index fd9c045e06804..f4d78d9dedefc 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2436,17 +2436,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: - if (isSafeTruncation(Val, 32) && - AMDGPU::isInlinableLiteral32(static_cast(Val), - AsmParser->hasInv2PiInlineImm())) { - Inst.addOperand(MCOperand::createImm(Val)); - return; - } - [[fallthrough]]; - case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: - - Inst.addOperand(MCOperand::createImm(Lo_32(Val))); + Inst.addOperand(MCOperand::createImm(Val)); return; case AMDGPU::OPERAND_REG_IMM_INT64: @@ -2494,77 +2485,27 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: - if (isSafeTruncation(Val, 16) && - AMDGPU::isInlinableIntLiteral(static_cast(Val))) { - Inst.addOperand(MCOperand::createImm(Lo_32(Val))); - return; - } - - Inst.addOperand(MCOperand::createImm(Val & 0xffff)); - return; - case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_IMM_FP16: - if (isSafeTruncation(Val, 16) && - AMDGPU::isInlinableLiteralFP16(static_cast(Val), - AsmParser->hasInv2PiInlineImm())) { - Inst.addOperand(MCOperand::createImm(Val)); - return; - } - - Inst.addOperand(MCOperand::createImm(Val & 0xffff)); - return; - case AMDGPU::OPERAND_REG_IMM_BF16: case AMDGPU::OPERAND_REG_INLINE_C_BF16: - if (isSafeTruncation(Val, 16) && - AMDGPU::isInlinableLiteralBF16(static_cast(Val), - AsmParser->hasInv2PiInlineImm())) { - Inst.addOperand(MCOperand::createImm(Val)); - return; - } - - Inst.addOperand(MCOperand::createImm(Val & 0xffff)); - return; - - case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: { - assert(isSafeTruncation(Val, 16)); - assert(AMDGPU::isInlinableIntLiteral(static_cast(Val))); - Inst.addOperand(MCOperand::createImm(Val)); - return; - } - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { - assert(isSafeTruncation(Val, 16)); - assert(AMDGPU::isInlinableLiteralFP16(static_cast(Val), - AsmParser->hasInv2PiInlineImm())); - - Inst.addOperand(MCOperand::createImm(Val)); - return; - } - - case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: { - assert(isSafeTruncation(Val, 16)); - assert(AMDGPU::isInlinableLiteralBF16(static_cast(Val), - AsmParser->hasInv2PiInlineImm())); - - Inst.addOperand(MCOperand::createImm(Val)); - return; - } - + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_KIMM32: - Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); - return; case AMDGPU::OPERAND_KIMM16: - Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); + Inst.addOperand(MCOperand::createImm(Val)); return; + case AMDGPU::OPERAND_KIMM64: if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64) Val <<= 32; Inst.addOperand(MCOperand::createImm(Val)); return; + default: - llvm_unreachable("invalid operand size"); + llvm_unreachable("invalid operand type"); } } @@ -4830,7 +4771,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, unsigned NumExprs = 0; unsigned NumLiterals = 0; - uint64_t LiteralValue; + int64_t LiteralValue; for (int OpIdx : OpIndices) { if (OpIdx == -1) break; @@ -4839,7 +4780,9 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, // Exclude special imm operands (like that used by s_set_gpr_idx_on) if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { - uint64_t Value = static_cast(MO.getImm()); + auto OpType = static_cast( + Desc.operands()[OpIdx].OperandType); + int64_t Value = encode32BitLiteral(MO.getImm(), OpType); if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index fd65f95334f75..bf212bbca934c 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -464,8 +464,9 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, assert(STI.hasFeature(AMDGPU::Feature64BitLiterals)); support::endian::write(CB, Imm, llvm::endianness::little); } else { - if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64) - Imm = Hi_32(Imm); + auto OpType = + static_cast(Desc.operands()[i].OperandType); + Imm = AMDGPU::encode32BitLiteral(Imm, OpType); support::endian::write(CB, Imm, llvm::endianness::little); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index faae1fee342af..c80302e03beea 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3157,6 +3157,34 @@ bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { return isUInt<32>(Val) || isInt<32>(Val); } +int64_t encode32BitLiteral(int64_t Imm, OperandType Type) { + switch (Type) { + default: + break; + case OPERAND_REG_IMM_BF16: + case OPERAND_REG_IMM_FP16: + case OPERAND_REG_INLINE_C_BF16: + case OPERAND_REG_INLINE_C_FP16: + return Imm & 0xffff; + case OPERAND_INLINE_SPLIT_BARRIER_INT32: + case OPERAND_REG_IMM_FP32: + case OPERAND_REG_IMM_INT32: + case OPERAND_REG_IMM_V2BF16: + case OPERAND_REG_IMM_V2FP16: + case OPERAND_REG_IMM_V2FP32: + case OPERAND_REG_IMM_V2INT16: + case OPERAND_REG_IMM_V2INT32: + case OPERAND_REG_INLINE_AC_FP32: + case OPERAND_REG_INLINE_AC_INT32: + case OPERAND_REG_INLINE_C_FP32: + case OPERAND_REG_INLINE_C_INT32: + return Lo_32(Imm); + case OPERAND_REG_IMM_FP64: + return Hi_32(Imm); + } + return Imm; +} + bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 3f8d43db5a48c..2def4509e7eed 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1718,6 +1718,9 @@ bool isInlinableLiteralV2F16(uint32_t Literal); LLVM_READNONE bool isValid32BitLiteral(uint64_t Val, bool IsFP64); +LLVM_READNONE +int64_t encode32BitLiteral(int64_t Imm, OperandType Type); + bool isArgPassedInSGPR(const Argument *Arg); bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo); diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10-vop3-literal.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10-vop3-literal.txt index 015ce3e963fb3..e6410a1b6b8a8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10-vop3-literal.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10-vop3-literal.txt @@ -51,7 +51,7 @@ # GFX10: v_add_nc_i16 v5, v1, 0xcdab ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xff,0x01,0x00,0xab,0xcd,0xff,0xff] 0x05,0x00,0x0d,0xd7,0x01,0xff,0x01,0x00,0xab,0xcd,0xff,0xff -# GFX10: v_ceil_f16_e64 v255, 0xabcd clamp ; encoding: [0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff] +# GFX10: v_ceil_f16_e64 v255, 0xabcd clamp ; encoding: [0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0x00,0x00] 0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff # GFX10: v_min_u16 v5, v1, 0xabcd ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt index c5751de810d90..d2da087a44743 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8-literal16.txt @@ -34,17 +34,17 @@ 0xff 0x06 0x02 0x3e 0x00 0x01 0x00 0x00 # non-zero unused bits in constant -# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x01,0x00] +# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x00] 0xff 0x06 0x02 0x3e 0x41 0x00 0x01 0x00 -# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x01] +# VI: v_add_f16_e32 v1, 0x41, v3 ; encoding: [0xff,0x06,0x02,0x3e,0x41,0x00,0x00,0x00] 0xff 0x06 0x02 0x3e 0x41 0x00 0x00 0x01 # FIXME: This should be able to round trip with literal after instruction # VI: v_add_f16_e32 v1, 0, v3 ; encoding: [0x80,0x06,0x02,0x3e] 0xff 0x06 0x02 0x3e 0x00 0x00 0x00 0x00 -# VI: v_add_f16_e32 v1, 0xffcd, v3 ; encoding: [0xff,0x06,0x02,0x3e,0xcd,0xff,0xff,0xff] +# VI: v_add_f16_e32 v1, 0xffcd, v3 ; encoding: [0xff,0x06,0x02,0x3e,0xcd,0xff,0x00,0x00] 0xff 0x06 0x02 0x3e 0xcd 0xff 0xff 0xff # VI: v_mul_lo_u16_e32 v2, 0xffcd, v2 ; encoding: [0xff,0x04,0x04,0x52,0xcd,0xff,0xff,0xff]