Skip to content

Commit

Permalink
[AMDGPU] SDWA: Add assembler support for GFX9
Browse files Browse the repository at this point in the history
Summary:
Added separate pseudo and real instruction for GFX9 SDWA instructions.
Currently supports only in assembler.
Depends D32493

Reviewers: vpykhtin, artem.tamazov

Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye

Differential Revision: https://reviews.llvm.org/D33132

llvm-svn: 303620
  • Loading branch information
SamWot committed May 23, 2017
1 parent 657188a commit f7659d7
Show file tree
Hide file tree
Showing 14 changed files with 820 additions and 233 deletions.
20 changes: 16 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Expand Up @@ -425,7 +425,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
FeatureFastFMAF32, FeatureDPP,
FeatureFastFMAF32, FeatureSDWA, FeatureDPP,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts
]
>;
Expand Down Expand Up @@ -534,10 +534,12 @@ def AMDGPUAsmVariants {
int VOP3_ID = 1;
string SDWA = "SDWA";
int SDWA_ID = 2;
string SDWA9 = "SDWA9";
int SDWA9_ID = 3;
string DPP = "DPP";
int DPP_ID = 3;
int DPP_ID = 4;
string Disable = "Disable";
int Disable_ID = 4;
int Disable_ID = 5;
}

def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
Expand All @@ -555,6 +557,12 @@ def SDWAAsmParserVariant : AsmParserVariant {
let Name = AMDGPUAsmVariants.SDWA;
}

def SDWA9AsmParserVariant : AsmParserVariant {
let Variant = AMDGPUAsmVariants.SDWA9_ID;
let Name = AMDGPUAsmVariants.SDWA9;
}


def DPPAsmParserVariant : AsmParserVariant {
let Variant = AMDGPUAsmVariants.DPP_ID;
let Name = AMDGPUAsmVariants.DPP;
Expand All @@ -567,6 +575,7 @@ def AMDGPU : Target {
let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
VOP3AsmParserVariant,
SDWAAsmParserVariant,
SDWA9AsmParserVariant,
DPPAsmParserVariant];
let AssemblyWriters = [AMDGPUAsmWriter];
}
Expand Down Expand Up @@ -607,7 +616,10 @@ def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<"FeatureVOP3P">;

def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureSDWA">;
AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;

def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureSDWA,FeatureGFX9">;

def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;
Expand Down
83 changes: 53 additions & 30 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Expand Up @@ -881,6 +881,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return AMDGPU::isVI(getSTI());
}

bool isGFX9() const {
return AMDGPU::isGFX9(getSTI());
}

bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
Expand Down Expand Up @@ -989,7 +993,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
bool isSGPR(unsigned Reg);

public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
Expand Down Expand Up @@ -1042,9 +1045,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
uint64_t BasicInstType);
uint64_t BasicInstType, bool skipVcc = false);
};

struct OptionalOperand {
Expand Down Expand Up @@ -1966,7 +1970,8 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
}

if (isForcedSDWA()) {
static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA};
static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
AMDGPUAsmVariants::SDWA9};
return makeArrayRef(Variants);
}

Expand All @@ -1977,7 +1982,7 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {

static const unsigned Variants[] = {
AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP
AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
};

return makeArrayRef(Variants);
Expand All @@ -2000,14 +2005,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
return AMDGPU::NoRegister;
}

bool AMDGPUAsmParser::isSGPR(unsigned Reg) {
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
Reg == AMDGPU::SCC;
}

// NB: This code is correct only when used to check constant
// bus limitations because GFX7 support no f16 inline constants.
// Note that there are no cases when a GFX7 opcode violates
Expand Down Expand Up @@ -2049,7 +2046,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
if (MO.isImm()) {
return !isInlineConstant(Inst, OpIdx);
}
return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg()));
return !MO.isReg() ||
isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
}

bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
Expand All @@ -2060,7 +2058,8 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
if (Desc.TSFlags &
(SIInstrFlags::VOPC |
SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) {
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
SIInstrFlags::SDWA)) {

// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
Expand Down Expand Up @@ -4151,14 +4150,19 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
}

void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
}

void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
cvtSDWA(Inst, Operands, SIInstrFlags::VOPC);
cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
}

void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
uint64_t BasicInstType) {
uint64_t BasicInstType, bool skipVcc) {
using namespace llvm::AMDGPU::SDWA;
OptionalImmIndexMap OptionalIdx;
bool skippedVcc = false;

unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
Expand All @@ -4168,43 +4172,63 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,

for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if ((BasicInstType == SIInstrFlags::VOPC ||
BasicInstType == SIInstrFlags::VOP2)&&
Op.isReg() &&
Op.Reg.RegNo == AMDGPU::VCC) {
// VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
// Skip it.
continue;
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
// Skip VCC only if we didn't skip it on previous iteration.
if (BasicInstType == SIInstrFlags::VOP2 &&
(Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
skippedVcc = true;
continue;
} else if (BasicInstType == SIInstrFlags::VOPC &&
Inst.getNumOperands() == 0) {
skippedVcc = true;
continue;
}
}
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithInputModsOperands(Inst, 2);
} else if (Op.isImm()) {
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = I;
} else {
llvm_unreachable("Invalid operand type");
}
skippedVcc = false;
}

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);

if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
// V_NOP_sdwa_vi has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
if (isGFX9() &&
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
break;

case SIInstrFlags::VOP2:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
if (isGFX9() &&
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;

case SIInstrFlags::VOPC:
if (isVI()) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
Expand All @@ -4220,10 +4244,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
auto it = Inst.begin();
std::advance(
it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
Inst.insert(it, Inst.getOperand(0)); // src2 = dst
}

}

/// Force static initialization.
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
Expand Up @@ -52,6 +52,18 @@ class AMDGPUMCCodeEmitter : public MCCodeEmitter {
return 0;
}

virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
return 0;
}

virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
return 0;
}

protected:
uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
void verifyInstructionPredicates(const MCInst &MI,
Expand Down
42 changes: 42 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
Expand Up @@ -69,6 +69,14 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;

unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;

unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
};

} // end anonymous namespace
Expand Down Expand Up @@ -319,6 +327,40 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
return getMachineOpValue(MI, MO, Fixups, STI);
}

unsigned
SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t RegEnc = 0;

const MCOperand &MO = MI.getOperand(OpNo);

unsigned Reg = MO.getReg();
RegEnc |= MRI.getEncodingValue(Reg);
RegEnc &= SDWA9_SRC_REG_MASK;
if (AMDGPU::isSGPR(Reg, &MRI)) {
RegEnc |= SDWA9_SRC_SGPR_MASK;
}
return RegEnc;
}

unsigned
SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t RegEnc = 0;

const MCOperand &MO = MI.getOperand(OpNo);

unsigned Reg = MO.getReg();
if (Reg != AMDGPU::VCC) {
RegEnc |= MRI.getEncodingValue(Reg);
RegEnc &= SDWA9_VOPC_DST_REG_MASK;
RegEnc |= SDWA9_VOPC_DST_VCC_MASK;
}
return RegEnc;
}

uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/SIDefines.h
Expand Up @@ -118,6 +118,10 @@ namespace AMDGPU {
// Operand for source modifiers for VOP instructions
OPERAND_INPUT_MODS,

// Operand for GFX9 SDWA instructions
OPERAND_SDWA9_SRC,
OPERAND_SDWA9_VOPC_DST,

/// Operand with 32-bit immediate that uses the constant bus.
OPERAND_KIMM32,
OPERAND_KIMM16
Expand Down Expand Up @@ -160,7 +164,8 @@ namespace AMDGPUAsmVariants {
DEFAULT = 0,
VOP3 = 1,
SDWA = 2,
DPP = 3
SDWA9 = 3,
DPP = 4
};
}

Expand Down Expand Up @@ -294,6 +299,11 @@ enum DstUnused {
UNUSED_PRESERVE = 2,
};

#define SDWA9_SRC_SGPR_MASK 0x100
#define SDWA9_SRC_REG_MASK 0xFF
#define SDWA9_VOPC_DST_VCC_MASK 0x80
#define SDWA9_VOPC_DST_REG_MASK 0x7F

} // namespace SDWA
} // namespace AMDGPU

Expand Down

0 comments on commit f7659d7

Please sign in to comment.