Skip to content

Commit

Permalink
[AMDGPU][True16] Support VOP3 source DPP operands. (#80892)
Browse files Browse the repository at this point in the history
  • Loading branch information
kosarev committed Feb 8, 2024
1 parent 92eaf03 commit 7d19dc5
Show file tree
Hide file tree
Showing 15 changed files with 410 additions and 109 deletions.
43 changes: 32 additions & 11 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
}

bool isRegOrInlineImmWithFP16InputMods() const {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
return isRegOrInline(
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
}

bool isRegOrInlineImmWithFP32InputMods() const {
Expand Down Expand Up @@ -8151,7 +8152,7 @@ ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {

// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
// the number of src operands present, then copies that bit into src0_modifiers.
void cvtVOP3DstOpSelOnly(MCInst &Inst) {
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
int Opc = Inst.getOpcode();
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
if (OpSelIdx == -1)
Expand All @@ -8168,23 +8169,34 @@ void cvtVOP3DstOpSelOnly(MCInst &Inst) {

unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();

if ((OpSel & (1 << SrcNum)) != 0) {
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
if (DstIdx == -1)
return;

const MCOperand &DstOp = Inst.getOperand(DstIdx);
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
if (DstOp.isReg() &&
MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
if (AMDGPU::isHi(DstOp.getReg(), MRI))
ModVal |= SISrcMods::DST_OP_SEL;
} else {
if ((OpSel & (1 << SrcNum)) != 0)
ModVal |= SISrcMods::DST_OP_SEL;
}
Inst.getOperand(ModIdx).setImm(ModVal);
}

void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands) {
cvtVOP3P(Inst, Operands);
cvtVOP3DstOpSelOnly(Inst);
cvtVOP3DstOpSelOnly(Inst, *getMRI());
}

void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx) {
cvtVOP3P(Inst, Operands, OptionalIdx);
cvtVOP3DstOpSelOnly(Inst);
cvtVOP3DstOpSelOnly(Inst, *getMRI());
}

static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
Expand Down Expand Up @@ -8433,8 +8445,17 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,

uint32_t ModVal = 0;

if ((OpSel & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_0;
const MCOperand &SrcOp = Inst.getOperand(OpIdx);
if (SrcOp.isReg() && getMRI()
->getRegClass(AMDGPU::VGPR_16RegClassID)
.contains(SrcOp.getReg())) {
bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
if (VGPRSuffixIsHi)
ModVal |= SISrcMods::OP_SEL_0;
} else {
if ((OpSel & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_0;
}

if ((OpSelHi & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_1;
Expand Down
38 changes: 38 additions & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,41 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI,
return Modifiers;
}

// Instructions decode the op_sel/suffix bits into the src_modifier
// operands. Copy those bits into the src operands for true16 VGPRs.
void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
const unsigned Opc = MI.getOpcode();
const MCRegisterClass &ConversionRC =
MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
SISrcMods::OP_SEL_0},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
SISrcMods::OP_SEL_0},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
SISrcMods::OP_SEL_0},
{AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
SISrcMods::DST_OP_SEL}}};
for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
if (OpIdx == -1 || OpModsIdx == -1)
continue;
MCOperand &Op = MI.getOperand(OpIdx);
if (!Op.isReg())
continue;
if (!ConversionRC.contains(Op.getReg()))
continue;
unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
const MCOperand &OpMods = MI.getOperand(OpModsIdx);
unsigned ModVal = OpMods.getImm();
if (ModVal & OpSelMask) { // isHi
unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
}
}
}

// MAC opcodes have special old and src2 operands.
// src2 is tied to dst, while old is not tied (but assumed to be).
bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
Expand Down Expand Up @@ -968,6 +1003,7 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
convertTrue16OpSel(MI);
auto Mods = collectVOPModifiers(MI);
insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
AMDGPU::OpName::op_sel);
Expand All @@ -991,6 +1027,8 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);

convertTrue16OpSel(MI);

int VDstInIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
if (VDstInIdx != -1)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ class AMDGPUDisassembler : public MCDisassembler {
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
void convertMacDPPInst(MCInst &MI) const;
void convertTrue16OpSel(MCInst &MI) const;

enum OpWidthTy {
OPW32,
Expand Down
32 changes: 22 additions & 10 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -756,14 +756,14 @@ void SIFoldOperands::foldOperand(
int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);

if (!isUseSafeToFold(*UseMI, UseOp))
if (!isUseSafeToFold(*UseMI, *UseOp))
return;

// FIXME: Fold operands with subregs.
if (UseOp.isReg() && OpToFold.isReg() &&
(UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister))
if (UseOp->isReg() && OpToFold.isReg() &&
(UseOp->isImplicit() || UseOp->getSubReg() != AMDGPU::NoSubRegister))
return;

// Special case for REG_SEQUENCE: We can't fold literals into
Expand Down Expand Up @@ -859,14 +859,26 @@ void SIFoldOperands::foldOperand(
if (MovOp == AMDGPU::COPY)
return;

UseMI->setDesc(TII->get(MovOp));
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
while (ImpOpI != ImpOpE) {
MachineInstr::mop_iterator Tmp = ImpOpI;
ImpOpI++;
UseMI->removeOperand(UseMI->getOperandNo(Tmp));
}
UseMI->setDesc(TII->get(MovOp));

if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
MachineOperand NewSrcOp(SrcOp);
MachineFunction *MF = UseMI->getParent()->getParent();
UseMI->removeOperand(1);
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers
UseMI->addOperand(NewSrcOp); // src0
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel
UseOpIdx = 2;
UseOp = &UseMI->getOperand(UseOpIdx);
}
CopiesToReplace.push_back(UseMI);
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
Expand Down Expand Up @@ -1027,7 +1039,7 @@ void SIFoldOperands::foldOperand(

// Don't fold into target independent nodes. Target independent opcodes
// don't have defined register classes.
if (UseDesc.isVariadic() || UseOp.isImplicit() ||
if (UseDesc.isVariadic() || UseOp->isImplicit() ||
UseDesc.operands()[UseOpIdx].RegClass == -1)
return;
}
Expand Down Expand Up @@ -1062,17 +1074,17 @@ void SIFoldOperands::foldOperand(
TRI->getRegClass(FoldDesc.operands()[0].RegClass);

// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) {
Register UseReg = UseOp.getReg();
if (UseOp->getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) {
Register UseReg = UseOp->getReg();
const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
if (AMDGPU::getRegBitWidth(*UseRC) != 64)
return;

APInt Imm(64, OpToFold.getImm());
if (UseOp.getSubReg() == AMDGPU::sub0) {
if (UseOp->getSubReg() == AMDGPU::sub0) {
Imm = Imm.getLoBits(32);
} else {
assert(UseOp.getSubReg() == AMDGPU::sub1);
assert(UseOp->getSubReg() == AMDGPU::sub1);
Imm = Imm.getHiBits(32);
}

Expand Down
23 changes: 19 additions & 4 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,13 @@ def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;

def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>;
class FP16VCSrcInputModsMatchClass<bit IsFake16>
: FPVCSrcInputModsMatchClass<16> {
let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods",
"RegOrInlineImmWithFPT16InputMods");
let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" #
!if(IsFake16, "true", "false") # ">";
}
def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;

class InputMods <AsmOperandClass matchClass> : Operand <i32> {
Expand All @@ -1166,7 +1172,8 @@ def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;

def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>;
class FP16VCSrcInputMods<bit IsFake16>
: FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;

class IntInputModsMatchClass <int opSize> : AsmOperandClass {
Expand Down Expand Up @@ -1653,11 +1660,11 @@ class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
}

// Return type of input modifiers operand for specified input operand for DPP
class getSrcModVOP3DPP <ValueType VT> {
class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
Operand ret =
!if (VT.isFP,
!if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
FP16VCSrcInputMods, FP32VCSrcInputMods),
FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
Int32VCSrcInputMods);
}

Expand Down Expand Up @@ -2450,6 +2457,10 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;

let HasOpSel = 1;
let HasModifiers = 1; // All instructions at least have OpSel.

// Most DstVT are 16-bit, but not all.
let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
Expand All @@ -2461,6 +2472,10 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;

let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,12 @@ def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
let EncoderMethod = "getMachineOpValueT16Lo128";
}

// True 16 operands.
def VGPRSrc_16 : RegisterOperand<VGPR_16> {
let DecoderMethod = "DecodeVGPR_16RegisterClass";
let EncoderMethod = "getMachineOpValueT16";
}

//===----------------------------------------------------------------------===//
// ASrc_* Operands with an AccVGPR
//===----------------------------------------------------------------------===//
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down Expand Up @@ -88,7 +88,7 @@ body: |
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
;
Expand Down Expand Up @@ -127,7 +127,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down Expand Up @@ -97,7 +97,7 @@ body: |
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
;
Expand Down Expand Up @@ -136,7 +136,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ body: |
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
;
; FAKE16-LABEL: name: ceil_f16
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
Expand All @@ -87,7 +87,7 @@ body: |
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
;
; FAKE16-LABEL: name: floor_f16
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
Expand Down

0 comments on commit 7d19dc5

Please sign in to comment.