Skip to content

Commit

Permalink
[AMDGPU] Support shared literals in FMAMK/FMAAK
Browse files Browse the repository at this point in the history
These instructions should allow src0 to be a literal with the same
value as the mandatory other literal. Enable it by introducing an
operand that defers adding its value to the MI when decoding till
the mandatory literal is parsed.

Reviewed By: dp, foad

Differential Revision: https://reviews.llvm.org/D111067

Change-Id: I22b0ae0d35bad17b6f976808e48bffe9a6af70b7
  • Loading branch information
Sisyph committed Oct 11, 2021
1 parent b41cfbf commit b4b7e60
Show file tree
Hide file tree
Showing 19 changed files with 309 additions and 82 deletions.
46 changes: 34 additions & 12 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1542,7 +1542,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateOpSel(const MCInst &Inst);
bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
bool validateVccOperand(unsigned Reg) const;
bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
Expand Down Expand Up @@ -1715,6 +1715,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
Expand All @@ -1723,6 +1724,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_KIMM32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
Expand All @@ -1732,6 +1734,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
Expand All @@ -1742,6 +1745,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_KIMM16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
Expand Down Expand Up @@ -2017,12 +2021,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
Expand All @@ -2036,7 +2042,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32: {
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
Expand All @@ -2062,6 +2070,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
Expand Down Expand Up @@ -2101,6 +2110,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
Expand Down Expand Up @@ -2128,6 +2138,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
Inst.addOperand(MCOperand::createImm(Val));
return;
}
case AMDGPU::OPERAND_KIMM32:
Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
setImmKindNone();
return;
case AMDGPU::OPERAND_KIMM16:
Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
setImmKindNone();
return;
default:
llvm_unreachable("invalid operand size");
}
Expand Down Expand Up @@ -3250,7 +3268,8 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
SIInstrFlags::SDWA)) {
// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
++ConstantBusUseCount;
++NumLiterals;
LiteralSize = 4;
}

SmallDenseSet<unsigned> SGPRsUsed;
Expand Down Expand Up @@ -3290,7 +3309,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,

// An instruction may use only one literal.
// This has been validated on the previous step.
// See validateVOP3Literal.
// See validateVOPLiteral.
// This literal may be used as more than one operand.
// If all these operands are of the same size,
// this literal counts as one scalar value.
Expand Down Expand Up @@ -3981,26 +4000,29 @@ bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
(FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
}

// VOP3 literal is only allowed in GFX10+ and only one can be used
bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
const OperandVector &Operands) {
// One unique literal can be used. VOP3 literal is only allowed in GFX10+
bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
const OperandVector &Operands) {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
ImmIdx == -1)
return true;

const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);

const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};

unsigned NumExprs = 0;
unsigned NumLiterals = 0;
uint32_t LiteralValue;

for (int OpIdx : OpIndices) {
if (OpIdx == -1) break;
if (OpIdx == -1)
continue;

const MCOperand &MO = Inst.getOperand(OpIdx);
if (!MO.isImm() && !MO.isExpr())
Expand Down Expand Up @@ -4030,7 +4052,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
if (!NumLiterals)
return true;

if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
Error(getLitLoc(Operands), "literal operands are not supported");
return false;
}
Expand Down Expand Up @@ -4202,7 +4224,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"only one literal operand is allowed");
return false;
}
if (!validateVOP3Literal(Inst, Operands)) {
if (!validateVOPLiteral(Inst, Operands)) {
return false;
}
if (!validateConstantBusLimitations(Inst, Operands)) {
Expand Down
76 changes: 73 additions & 3 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"

using namespace llvm;
Expand Down Expand Up @@ -264,6 +265,34 @@ static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
}

static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}

static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}

static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(
Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
}

static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(
Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
}

static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
const MCRegisterInfo *MRI) {
if (OpIdx < 0)
Expand Down Expand Up @@ -626,6 +655,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}

int ImmLitIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
if (Res && ImmLitIdx != -1)
Res = convertFMAanyK(MI, ImmLitIdx);

// if the opcode was not recognized we'll assume a Size of 4 bytes
// (unless there are fewer bytes left)
Size = Res ? (MaxInstBytesNum - Bytes.size())
Expand Down Expand Up @@ -810,6 +844,24 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
return MCDisassembler::Success;
}

DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
int ImmLitIdx) const {
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
unsigned DescNumOps = Desc.getNumOperands();
assert(DescNumOps == MI.getNumOperands());
for (unsigned I = 0; I < DescNumOps; ++I) {
auto &Op = MI.getOperand(I);
auto OpType = Desc.OpInfo[I].OperandType;
bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
IsDeferredOp)
Op.setImm(Literal);
}
return MCDisassembler::Success;
}

const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
return getContext().getRegisterInfo()->
getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
Expand Down Expand Up @@ -1019,6 +1071,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
return decodeDstOp(OPW512, Val);
}

// Decode Literals for insts which always have a literal in the encoding
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
if (HasLiteral) {
if (Literal != Val)
return errOperand(Val, "More than one unique literal is illegal");
}
HasLiteral = true;
Literal = Val;
return MCOperand::createImm(Literal);
}

MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
Expand Down Expand Up @@ -1232,7 +1296,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
}

MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral) const {
using namespace AMDGPU::EncValues;

assert(Val < 1024); // enum10
Expand Down Expand Up @@ -1261,8 +1326,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
return decodeFPImmed(Width, Val);

if (Val == LITERAL_CONST)
return decodeLiteralConstant();
if (Val == LITERAL_CONST) {
if (MandatoryLiteral)
// Keep a sentinel value for deferred setting
return MCOperand::createImm(LITERAL_CONST);
else
return decodeLiteralConstant();
}

switch (Width) {
case OPW32:
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class AMDGPUDisassembler : public MCDisassembler {
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;

DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
DecodeStatus convertSDWAInst(MCInst &MI) const;
DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
Expand Down Expand Up @@ -150,9 +151,11 @@ class AMDGPUDisassembler : public MCDisassembler {

static MCOperand decodeIntImmed(unsigned Imm);
static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeLiteralConstant() const;

MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral = false) const;
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
Expand All @@ -631,6 +632,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
printImmediate16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_IMM_V2INT16:
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
switch (OpInfo.OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
Expand All @@ -255,6 +256,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
Expand All @@ -277,6 +279,9 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
uint32_t Encoding = getLit16Encoding(Lo16, STI);
return Encoding;
}
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
return MO.getImm();
default:
llvm_unreachable("invalid operand size");
}
Expand Down Expand Up @@ -341,7 +346,13 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
(bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
return;

// Check for additional literals in SRC0/1/2 (Op 1/2/3)
// Do not print literals from SISrc Operands for insts with mandatory literals
int ImmLitIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
if (ImmLitIdx != -1)
return;

// Check for additional literals
for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {

// Check if this operand should be encoded as [SV]Src
Expand Down
Loading

0 comments on commit b4b7e60

Please sign in to comment.