Skip to content

Commit

Permalink
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Browse files Browse the repository at this point in the history
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.

Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl

Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye

Differential Revision: https://reviews.llvm.org/D34626

llvm-svn: 306413
  • Loading branch information
SamWot committed Jun 27, 2017
1 parent 0bd79f4 commit a179d25
Show file tree
Hide file tree
Showing 8 changed files with 490 additions and 34 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Expand Up @@ -262,8 +262,8 @@ def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
"Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
>;

def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc",
"HasSDWAClampVOPC",
def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
"HasSDWAOutModsVOPC",
"true",
"Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
>;
Expand Down Expand Up @@ -452,7 +452,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP
]
>;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Expand Up @@ -128,7 +128,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasSDWAScalar(false),
HasSDWASdst(false),
HasSDWAMac(false),
HasSDWAClampVOPC(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Expand Up @@ -153,7 +153,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
bool HasSDWAScalar;
bool HasSDWASdst;
bool HasSDWAMac;
bool HasSDWAClampVOPC;
bool HasSDWAOutModsVOPC;
bool HasDPP;
bool FlatAddressSpace;
bool FlatInstOffsets;
Expand Down Expand Up @@ -452,8 +452,8 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
return HasSDWAMac;
}

bool hasSDWAClampVOPC() const {
return HasSDWAClampVOPC;
bool hasSDWAOutModsVOPC() const {
return HasSDWAOutModsVOPC;
}

/// \brief Returns the offset in bytes from the start of the input buffer
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Expand Up @@ -626,7 +626,9 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
using namespace AMDGPU::SDWA;

if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
// XXX: static_cast<int> is needed to avoid stupid warning:
// compare with unsigned is always true
if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
Val <= SDWA9EncValues::SRC_VGPR_MAX) {
return createRegOperand(getVgprClassId(Width),
Val - SDWA9EncValues::SRC_VGPR_MIN);
Expand Down
14 changes: 9 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -2444,8 +2444,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}

int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
if ( DstIdx == -1)
DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst);

const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };

Expand Down Expand Up @@ -2488,14 +2486,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
return false;
}
} else if (!ST.hasSDWAClampVOPC()) {
} else if (!ST.hasSDWAOutModsVOPC()) {
// No clamp allowed on GFX9 for VOPC
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
if (Clamp != nullptr &&
(!Clamp->isImm() || Clamp->getImm() != 0)) {
if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
return false;
}

// No omod allowed on GFX9 for VOPC
const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
return false;
}
}
}
}
Expand Down
44 changes: 24 additions & 20 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
Expand Up @@ -627,10 +627,13 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
return false;
}

if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
if (!ST.hasSDWAOutModsVOPC() &&
(TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
return false;

} else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
} else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
!TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
return false;
}

Expand All @@ -649,25 +652,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
assert(SDWAOpcode != -1);

// Copy dst, if it is present in original then should also be present in SDWA
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (!Dst && !TII->isVOPC(MI))
return false;

const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);

// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);

// Copy dst, if it is present in original then should also be present in SDWA
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
} else {
Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
} else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
assert(Dst &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
SDWAInst.add(*Dst);
} else {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
}

// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
Expand Down Expand Up @@ -714,20 +716,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
}

// Copy omod if present, initialize otherwise if needed
MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
if (OMod) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
SDWAInst.add(*OMod);
} else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
SDWAInst.addImm(0);
if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
if (OMod) {
SDWAInst.add(*OMod);
} else {
SDWAInst.addImm(0);
}
}

// Initialize dst_sel and dst_unused if present
if (Dst) {
assert(
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
// Initialize dst_sel if present
if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
}

// Initialize dst_unused if present
if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
}

Expand Down

0 comments on commit a179d25

Please sign in to comment.