48 changes: 42 additions & 6 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
ImmTyDLC,
ImmTyGLC,
ImmTySLC,
ImmTyTFE,
Expand Down Expand Up @@ -314,6 +315,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isDLC() const { return isImmTy(ImmTyDLC); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
Expand Down Expand Up @@ -676,6 +678,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
case ImmTyDLC: OS << "DLC"; break;
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
Expand Down Expand Up @@ -1184,6 +1187,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);

AMDGPUOperand::Ptr defaultDLC() const;
AMDGPUOperand::Ptr defaultGLC() const;
AMDGPUOperand::Ptr defaultSLC() const;

Expand Down Expand Up @@ -2303,13 +2307,26 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
}

if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
if (TSFlags & SIInstrFlags::FLAT) {
// FIXME: Produces error without correct column reported.
auto OpNum =
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
auto Opcode = Inst.getOpcode();
auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);

const auto &Op = Inst.getOperand(OpNum);
if (Op.getImm() != 0)
if (!hasFlatOffsets() && Op.getImm() != 0)
return Match_InvalidOperand;

// GFX10: Address offset is 12-bit signed byte offset. Must be positive for
// FLAT segment. For FLAT segment MSB is ignored and forced to zero.
if (isGFX10()) {
if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
if (!isInt<12>(Op.getImm()))
return Match_InvalidOperand;
} else {
if (!isUInt<11>(Op.getImm()))
return Match_InvalidOperand;
}
}
}

return Match_Success;
Expand Down Expand Up @@ -3887,6 +3904,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
}
}

if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
return MatchOperand_ParseFail;

Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
Expand Down Expand Up @@ -5101,6 +5121,10 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
// mubuf
//===----------------------------------------------------------------------===//

AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
}

AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
Expand Down Expand Up @@ -5177,6 +5201,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}

if (isGFX10())
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}

void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
Expand Down Expand Up @@ -5214,6 +5241,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);

if (isGFX10())
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -5249,8 +5279,12 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
}
}

bool IsGFX10 = isGFX10();

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
if (IsGFX10)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
Expand Down Expand Up @@ -5353,6 +5387,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
{"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
{"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
Expand Down Expand Up @@ -5581,7 +5616,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
}

// Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
// Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
// it has src2 register operand that is tied to dst operand
// we don't allow modifiers for this operand in assembler so src2_modifiers
// should be 0.
Expand Down Expand Up @@ -6031,7 +6066,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
break;

case SIInstrFlags::VOPC:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
Expand Down
585 changes: 383 additions & 202 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td

Large diffs are not rendered by default.

264 changes: 238 additions & 26 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td

Large diffs are not rendered by default.

16 changes: 14 additions & 2 deletions llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,14 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
}

void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
// GFX10: Address offset is 12-bit signed byte offset.
if (AMDGPU::isGFX10(STI)) {
O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
} else {
O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
}
}

void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
Expand Down Expand Up @@ -128,7 +134,7 @@ void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo,
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << ((OpNo == 0)? "offset:" : " offset:");
printS13ImmDecOperand(MI, OpNo, O);
printS13ImmDecOperand(MI, OpNo, STI, O);
}
}

Expand Down Expand Up @@ -173,6 +179,12 @@ void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "gds");
}

void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (AMDGPU::isGFX10(STI))
printNamedBit(MI, OpNo, O, "dlc");
}

void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "glc");
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
Expand All @@ -67,6 +68,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
const MCSubtargetInfo &STI, raw_ostream &O);
void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
// Atomics dont have a GLC, so omit the field if not there.
if (Glc)
NewGlob->addOperand(MF, *Glc);

MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
if (DLC)
NewGlob->addOperand(MF, *DLC);

NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
// _D16 have an vdst_in operand, copy it in.
MachineOperand *VDstInOp = TII->getNamedOperand(MI,
Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,24 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,

// Do a 64-bit pointer add.
if (ST.flatScratchIsPointer()) {
if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
.addReg(FlatScrInitHi)
.addImm(0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
addReg(FlatScrInitLo).
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
addReg(FlatScrInitHi).
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
return;
}

BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
Expand All @@ -80,6 +98,8 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
return;
}

assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);

// Copy the size in bytes.
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitHi, RegState::Kill);
Expand Down Expand Up @@ -423,6 +443,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
.addReg(Rsrc01)
.addImm(EncodedOffset) // offset
.addImm(0) // glc
.addImm(0) // dlc
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
return;
Expand Down Expand Up @@ -463,6 +484,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
.addImm(0) // glc
.addImm(0) // dlc
.addMemOperand(MMO)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4166,6 +4166,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
getNamedOperand(MI, AMDGPU::OpName::glc)) {
MIB.addImm(GLC->getImm());
}
if (const MachineOperand *DLC =
getNamedOperand(MI, AMDGPU::OpName::dlc)) {
MIB.addImm(DLC->getImm());
}

MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,7 @@ def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;

def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
Expand Down
15 changes: 11 additions & 4 deletions llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
bool GLC1;
bool SLC0;
bool SLC1;
bool DLC0;
bool DLC1;
bool UseST64;
SmallVector<MachineInstr *, 8> InstsToMove;
};
Expand Down Expand Up @@ -323,7 +325,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width0 == EltOffset1 ||
EltOffset1 + CI.Width1 == EltOffset0) &&
CI.GLC0 == CI.GLC1 &&
CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
}

Expand Down Expand Up @@ -637,6 +639,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
}
CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
}

// Check both offsets fit in the reduced range.
Expand Down Expand Up @@ -857,6 +861,7 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.GLC0) // glc
.addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});

std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
Expand Down Expand Up @@ -909,6 +914,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
.addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});

std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
Expand Down Expand Up @@ -1088,9 +1094,10 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset0, CI.Offset1)) // offset
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
.addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});

moveInstsAfter(MIB, CI.InstsToMove);
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
.cloneMemRefs(*MI);

const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
Expand Down Expand Up @@ -639,6 +640,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
.addImm(0) // dlc
.addMemOperand(NewMMO);

if (NumSubRegs > 1)
Expand Down Expand Up @@ -769,6 +771,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
.addReg(MFI->getScratchRSrcReg()) // sbase
.addReg(OffsetReg, RegState::Kill) // soff
.addImm(0) // glc
.addImm(0) // dlc
.addMemOperand(MMO);

continue;
Expand Down Expand Up @@ -928,9 +931,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,

auto MIB =
BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
.addReg(MFI->getScratchRSrcReg()) // sbase
.addReg(OffsetReg, RegState::Kill) // soff
.addImm(0) // glc
.addReg(MFI->getScratchRSrcReg()) // sbase
.addReg(OffsetReg, RegState::Kill) // soff
.addImm(0) // glc
.addImm(0) // dlc
.addMemOperand(MMO);

if (NumSubRegs > 1 && i == 0)
Expand Down
277 changes: 248 additions & 29 deletions llvm/lib/Target/AMDGPU/SMInstructions.td

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ body: |
; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:sgpr(p1) = COPY $sgpr2_sgpr3
%1:vgpr(p1) = COPY %0
%2:vgpr(s32) = G_IMPLICIT_DEF
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ body: |
; GCN-LABEL: name: implicit_def_s32
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr3_vgpr4
%1:vgpr(s32) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store 4, addrspace 1)
Expand All @@ -30,7 +30,7 @@ body: |
; GCN-LABEL: name: implicit_def_s64
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr3_vgpr4
%1:vgpr(s64) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store 8, addrspace 1)
Expand Down Expand Up @@ -60,7 +60,7 @@ body: |
; GCN-LABEL: name: implicit_def_p1
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
Expand All @@ -76,7 +76,7 @@ body: |
; GCN-LABEL: name: implicit_def_p3
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p3) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
Expand All @@ -92,7 +92,7 @@ body: |
; GCN-LABEL: name: implicit_def_p4
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p4) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
Expand Down
236 changes: 118 additions & 118 deletions llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir

Large diffs are not rendered by default.

316 changes: 158 additions & 158 deletions llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir

Large diffs are not rendered by default.

48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
Expand All @@ -55,10 +55,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -105,8 +105,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
Expand All @@ -117,10 +117,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
Expand Down Expand Up @@ -168,8 +168,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
Expand All @@ -180,10 +180,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -233,8 +233,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
Expand All @@ -245,10 +245,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -310,8 +310,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
Expand All @@ -322,10 +322,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -375,8 +375,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
Expand All @@ -387,10 +387,10 @@ body: |
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ body: |
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr4_vgpr5 = IMPLICIT_DEF
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr2 = IMPLICIT_DEF
$vgpr3 = IMPLICIT_DEF
$vgpr6 = IMPLICIT_DEF
$vgpr0 = V_ADD_I32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec
FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
%3 = FLAT_LOAD_DWORD %0, 4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ body: |
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
S_ENDPGM 0
bb.2:
Expand Down Expand Up @@ -78,7 +78,7 @@ body: |
bb.8:
successors: %bb.10
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
%35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
%28:vreg_1 = COPY %35
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ body: |
bb.9:
successors: %bb.10(0x80000000)
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec
%22:sreg_64 = COPY $exec, implicit-def $exec
%23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ body: |
%23:vreg_128 = COPY killed %17
%24:sreg_64 = COPY killed %16
%25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec
%29:vreg_128 = COPY killed %21
%29.sub0:vreg_128 = COPY %1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ body: |
%0 = COPY $sgpr2_sgpr3
%1 = COPY $vgpr2
%2 = COPY $vgpr3
%3 = S_LOAD_DWORDX8_IMM %0, 0, 0
%4 = S_LOAD_DWORDX4_IMM %0, 12, 0
%5 = S_LOAD_DWORDX8_IMM %0, 16, 0
%6 = S_LOAD_DWORDX4_IMM %0, 28, 0
%3 = S_LOAD_DWORDX8_IMM %0, 0, 0, 0
%4 = S_LOAD_DWORDX4_IMM %0, 12, 0, 0
%5 = S_LOAD_DWORDX8_IMM %0, 16, 0, 0
%6 = S_LOAD_DWORDX4_IMM %0, 28, 0, 0
undef %7.sub0 = S_MOV_B32 212739
%20 = COPY %7
%11 = COPY %20
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# GCN-LABEL: bb.6:
# GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}})
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit $exec
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, implicit $exec
#

--- |
Expand Down Expand Up @@ -69,7 +69,7 @@ body: |
%10:sreg_64 = COPY killed %5
undef %11.sub2:sreg_128 = COPY %4
%11.sub3:sreg_128 = COPY %3
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, implicit $exec
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, implicit $exec
undef %13.sub1:vreg_128 = COPY %9.sub1
%13.sub2:vreg_128 = COPY %9.sub2
%14:sreg_64 = V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $exec
Expand Down Expand Up @@ -161,7 +161,7 @@ body: |
bb.18:
successors: %bb.7(0x80000000)
dead %59:vgpr_32 = V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $exec
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, implicit $exec
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, 0, implicit $exec
undef %66.sub1:vreg_128 = COPY %13.sub1
%66.sub2:vreg_128 = COPY %13.sub2
%67:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $exec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ body: |
%40:vgpr_32 = V_MAD_F32 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $exec
%41:vgpr_32 = V_MAD_F32 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $exec
%42:vgpr_32 = V_CVT_I32_F32_e32 killed %41, implicit $exec
%43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0 :: (dereferenceable invariant load 4)
%43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0, 0 :: (dereferenceable invariant load 4)
%45:vgpr_32 = V_MUL_LO_I32 killed %42, killed %43, implicit $exec
%46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec
%50:sreg_64 = COPY $exec, implicit-def $exec
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
Expand Down
96 changes: 48 additions & 48 deletions llvm/test/CodeGen/AMDGPU/collapse-endcf.mir

Large diffs are not rendered by default.

96 changes: 48 additions & 48 deletions llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
%1 = S_LOAD_DWORDX2_IMM %0, 36, 0
%1 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%2 = COPY %1.sub1
%3 = COPY %1.sub0
%4 = S_MOV_B32 61440
Expand All @@ -54,7 +54,7 @@ body: |
%8 = S_MOV_B32 9999
%9 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc
%10 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -130,7 +130,7 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%31 = V_ASHRREV_I32_e64 31, %3, implicit $exec
%32 = REG_SEQUENCE %3, 1, %31, 2
%33 = V_LSHLREV_B64 2, killed %32, implicit $exec
Expand All @@ -144,19 +144,19 @@ body: |
%34 = V_MOV_B32_e32 63, implicit $exec
%27 = V_AND_B32_e64 %26, %24, implicit $exec
FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %37, %27, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_AND_B32_e64 %24, %26, implicit $exec
FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %37, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%29 = V_AND_B32_e32 %26, %24, implicit $exec
FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %37, %29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%30 = V_AND_B32_e64 %26, %26, implicit $exec
FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %37, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%31 = V_AND_B32_e64 %34, %34, implicit $exec
FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %37, %31, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
Expand Down Expand Up @@ -210,7 +210,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%5 = S_MOV_B32 1
%6 = COPY %4.sub1
%7 = COPY %4.sub0
Expand All @@ -219,7 +219,7 @@ body: |
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
%12 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc
%13 = COPY %12
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -316,7 +316,7 @@ body: |
%2 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16 = REG_SEQUENCE %2, 1, %15, 2
%17 = V_LSHLREV_B64 2, killed %16, implicit $exec
Expand All @@ -332,34 +332,34 @@ body: |
%27 = S_MOV_B32 -4
%11 = V_LSHLREV_B32_e64 12, %10, implicit $exec
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%12 = V_LSHLREV_B32_e64 %7, 12, implicit $exec
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%13 = V_LSHL_B32_e64 %7, 12, implicit $exec
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%14 = V_LSHL_B32_e64 12, %7, implicit $exec
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%15 = V_LSHL_B32_e64 12, %24, implicit $exec
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%22 = V_LSHL_B32_e64 %6, 12, implicit $exec
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%23 = V_LSHL_B32_e64 %6, 32, implicit $exec
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%25 = V_LSHL_B32_e32 %6, %6, implicit $exec
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%26 = V_LSHLREV_B32_e32 11, %24, implicit $exec
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_LSHL_B32_e32 %27, %6, implicit $exec
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
Expand Down Expand Up @@ -410,7 +410,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%5 = S_MOV_B32 999123
%6 = COPY %4.sub1
%7 = COPY %4.sub0
Expand All @@ -419,7 +419,7 @@ body: |
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
%12 = S_ASHR_I32 killed %5, 12, implicit-def dead $scc
%13 = COPY %12
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -519,7 +519,7 @@ body: |
%2 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16 = REG_SEQUENCE %2, 1, %15, 2
%17 = V_LSHLREV_B64 2, killed %16, implicit $exec
Expand All @@ -540,34 +540,34 @@ body: |
%35 = V_MOV_B32_e32 2, implicit $exec
%11 = V_ASHRREV_I32_e64 8, %10, implicit $exec
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%12 = V_ASHRREV_I32_e64 %8, %10, implicit $exec
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%13 = V_ASHR_I32_e64 %7, 3, implicit $exec
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%14 = V_ASHR_I32_e64 7, %32, implicit $exec
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%15 = V_ASHR_I32_e64 %27, %24, implicit $exec
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%22 = V_ASHR_I32_e64 %6, 4, implicit $exec
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%23 = V_ASHR_I32_e64 %6, %33, implicit $exec
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%25 = V_ASHR_I32_e32 %34, %34, implicit $exec
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%26 = V_ASHRREV_I32_e32 11, %10, implicit $exec
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_ASHR_I32_e32 %27, %35, implicit $exec
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
Expand Down Expand Up @@ -618,7 +618,7 @@ body: |
liveins: $sgpr0_sgpr1
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0
%4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%5 = S_MOV_B32 -999123
%6 = COPY %4.sub1
%7 = COPY %4.sub0
Expand All @@ -627,7 +627,7 @@ body: |
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
%12 = S_LSHR_B32 killed %5, 12, implicit-def dead $scc
%13 = COPY %12
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -728,7 +728,7 @@ body: |
%2 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0
%3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16 = REG_SEQUENCE %2, 1, %15, 2
%17 = V_LSHLREV_B64 2, killed %16, implicit $exec
Expand All @@ -749,34 +749,34 @@ body: |
%35 = V_MOV_B32_e32 2, implicit $exec
%11 = V_LSHRREV_B32_e64 8, %10, implicit $exec
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%12 = V_LSHRREV_B32_e64 %8, %10, implicit $exec
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%13 = V_LSHR_B32_e64 %7, 3, implicit $exec
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%14 = V_LSHR_B32_e64 7, %32, implicit $exec
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%15 = V_LSHR_B32_e64 %27, %24, implicit $exec
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%22 = V_LSHR_B32_e64 %6, 4, implicit $exec
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%23 = V_LSHR_B32_e64 %6, %33, implicit $exec
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%25 = V_LSHR_B32_e32 %34, %34, implicit $exec
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%26 = V_LSHRREV_B32_e32 11, %10, implicit $exec
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%28 = V_LSHR_B32_e32 %27, %35, implicit $exec
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
Expand All @@ -800,7 +800,7 @@ body: |
bb.0:
%0 = V_MOV_B32_e32 0, implicit $exec
%2 = V_XOR_B32_e64 killed %0, undef %1, implicit $exec
FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ body: |
bb.3..lr.ph3410.preheader:
successors: %bb.4(0x80000000)
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
%36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
%10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0
%10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0
S_NOP 0, implicit-def %4:sreg_128, implicit %10.sub1:sreg_128
S_CBRANCH_SCC0 %bb.3, implicit undef $scc
S_BRANCH %bb.1
Expand All @@ -26,7 +26,7 @@ body: |
S_BRANCH %bb.4
bb.3:
%10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0
%10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0
%7:sreg_32_xm0 = COPY %10.sub1:sreg_128
%8:sreg_32_xm0 = COPY %10.sub2:sreg_128
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/dead-lane.mir
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ body: |
%1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
%3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ body: |
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, 0, implicit $exec
dead %2:vgpr_32 = V_MAC_F32_e32 %0:vgpr_32, %1:vgpr_32, %2:vgpr_32, implicit $exec
S_ENDPGM 0
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/dead_copy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ body: |
$vgpr10 = COPY killed $sgpr14, implicit $exec
$vgpr11 = COPY killed $sgpr15, implicit $exec
FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ body: |
; CHECK: dead %26:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $exec
; CHECK: dead %27:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $exec
; CHECK: dead %28:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF6]], 0, [[DEF3]], 0, 0, implicit $exec
; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, implicit $exec
; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1
Expand Down Expand Up @@ -129,7 +129,7 @@ body: |
%26:vgpr_32 = V_MAD_F32 0, %25, 0, %4, 0, %1, 0, 0, implicit $exec
%27:vgpr_32 = V_MAD_F32 0, %25, 0, %5, 0, %2, 0, 0, implicit $exec
%28:vgpr_32 = V_MAD_F32 0, %25, 0, %6, 0, %3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
...
---
# GCN-LABEL: name: load_without_memoperand
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: S_ENDPGM 0
name: load_without_memoperand
tracksRegLiveness: true
Expand All @@ -41,15 +41,15 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
...
---
# GCN-LABEL: name: load_volatile
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
# GCN-NEXT: S_ENDPGM 0
name: load_volatile
tracksRegLiveness: true
Expand All @@ -65,15 +65,15 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
...
---
# GCN-LABEL: name: store
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
# GCN-NEXT: S_ENDPGM 0
name: store
tracksRegLiveness: true
Expand All @@ -86,7 +86,7 @@ body: |
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
---
Expand Down Expand Up @@ -297,12 +297,12 @@ body: |
S_ENDPGM 0
...

# GCN-LABEL: name: implicit_use_on_s_endpgm
# GCN-LABEL: name: implicit_use_on_S_ENDPGM 0
# GCN: V_ADD_I32
# GCN: COPY
# GCN: V_ADDC_U32
# GCN: S_ENDPGM 0, implicit %3
name: implicit_use_on_s_endpgm
name: implicit_use_on_S_ENDPGM 0
tracksRegLiveness: true

body: |
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,24 @@ body: |
%1 = COPY $sgpr4_sgpr5
%0 = COPY $vgpr0
%3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%3 = S_LOAD_DWORDX2_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %1, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%7 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%2 = V_MOV_B32_e32 0, implicit $exec
undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec
%11 = COPY %4.sub1
%12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
%5 = FLAT_LOAD_DWORD %12, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
%5 = FLAT_LOAD_DWORD %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
undef %9.sub0 = V_ADD_I32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec
%8 = COPY %3.sub1
%9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
undef %13.sub0 = V_ADD_I32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec
%13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
%6 = FLAT_LOAD_DWORD %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
%6 = FLAT_LOAD_DWORD %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
undef %10.sub0 = V_ADD_I32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec
%10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
FLAT_STORE_DWORD %9, %5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
FLAT_STORE_DWORD %10, %6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
FLAT_STORE_DWORD %9, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
FLAT_STORE_DWORD %10, %6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
S_ENDPGM 0
...
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/AMDGPU/flat-offset-bug.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s

; GCN-LABEL: flat_inst_offset:
; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:4
; GFX9: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}} offset:4
; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
; GFX10: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}{{$}}
define void @flat_inst_offset(i32* nocapture %p) {
%gep = getelementptr inbounds i32, i32* %p, i64 1
%load = load i32, i32* %gep, align 4
%inc = add nsw i32 %load, 1
store i32 %inc, i32* %gep, align 4
ret void
}

; GCN-LABEL: global_inst_offset:
; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off offset:4
; GCN: global_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:4
define void @global_inst_offset(i32 addrspace(1)* nocapture %p) {
%gep = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 1
%load = load i32, i32 addrspace(1)* %gep, align 4
%inc = add nsw i32 %load, 1
store i32 %inc, i32 addrspace(1)* %gep, align 4
ret void
}

; GCN-LABEL: load_i16_lo:
; GFX9 : flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
define amdgpu_kernel void @load_i16_lo(i16* %arg, <2 x i16>* %out) {
%gep = getelementptr inbounds i16, i16* %arg, i32 4
%ld = load i16, i16* %gep, align 2
%vec = insertelement <2 x i16> <i16 undef, i16 0>, i16 %ld, i32 0
%v = add <2 x i16> %vec, %vec
store <2 x i16> %v, <2 x i16>* %out, align 4
ret void
}

; GCN-LABEL: load_i16_hi:
; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
define amdgpu_kernel void @load_i16_hi(i16* %arg, <2 x i16>* %out) {
%gep = getelementptr inbounds i16, i16* %arg, i32 4
%ld = load i16, i16* %gep, align 2
%vec = insertelement <2 x i16> <i16 0, i16 undef>, i16 %ld, i32 1
%v = add <2 x i16> %vec, %vec
store <2 x i16> %v, <2 x i16>* %out, align 4
ret void
}

; GCN-LABEL: load_half_lo:
; GFX9: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
define amdgpu_kernel void @load_half_lo(half* %arg, <2 x half>* %out) {
%gep = getelementptr inbounds half, half* %arg, i32 4
%ld = load half, half* %gep, align 2
%vec = insertelement <2 x half> <half undef, half 0xH0000>, half %ld, i32 0
%v = fadd <2 x half> %vec, %vec
store <2 x half> %v, <2 x half>* %out, align 4
ret void
}

; GCN-LABEL: load_half_hi:
; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}}
; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
define amdgpu_kernel void @load_half_hi(half* %arg, <2 x half>* %out) {
%gep = getelementptr inbounds half, half* %arg, i32 4
%ld = load half, half* %gep, align 2
%vec = insertelement <2 x half> <half 0xH0000, half undef>, half %ld, i32 1
%v = fadd <2 x half> %vec, %vec
store <2 x half> %v, <2 x half>* %out, align 4
ret void
}

; GCN-LABEL: load_float_lo:
; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:16{{$}}
; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}}
define amdgpu_kernel void @load_float_lo(float* %arg, float* %out) {
%gep = getelementptr inbounds float, float* %arg, i32 4
%ld = load float, float* %gep, align 4
%v = fadd float %ld, %ld
store float %v, float* %out, align 4
ret void
}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ body: |
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr0_sgpr1
%2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0
%2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0, 0
%3:sreg_32_xm0 = S_MOV_B32 2
%4:vgpr_32 = V_LSHLREV_B32_e64 killed %3, %0, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:vreg_64 = REG_SEQUENCE killed %4, %subreg.sub0, killed %5, %subreg.sub1
%7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, implicit $exec
%7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, 0, implicit $exec
%8:sreg_32_xm0 = S_MOV_B32 65535
%9:vgpr_32 = COPY %8
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
Expand Down
78 changes: 39 additions & 39 deletions llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = V_MOV_B32_e32 1065353216, implicit $exec
%13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -222,13 +222,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1065353216, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -289,14 +289,14 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
%16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -360,16 +360,16 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -427,13 +427,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -494,16 +494,16 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 -2, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -564,13 +564,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 15360, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -631,13 +631,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 80886784, implicit $exec
%14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
Expand Down Expand Up @@ -697,13 +697,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 305413120, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
Expand All @@ -60,13 +60,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
%26 = COPY %29
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -117,9 +117,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
Expand All @@ -131,13 +131,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec
%26 = COPY %29
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -188,9 +188,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
Expand All @@ -202,13 +202,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
%26 = COPY %29
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand Down Expand Up @@ -259,9 +259,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
%6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
Expand All @@ -273,13 +273,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec
%26 = COPY %29
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/fold-multiple.mir
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ body: |
%3 = S_LSHL_B32 %1, killed %1, implicit-def dead $scc
%4 = V_AND_B32_e64 killed %2, killed %3, implicit $exec
%5 = IMPLICIT_DEF
BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
110 changes: 55 additions & 55 deletions llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9, $vgpr10
BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, implicit $exec
$vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec
S_ENDPGM 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/hazard-inlineasm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ name: hazard-inlineasm

body: |
bb.0:
FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def $vgpr26_vgpr27, 2818058, def dead $sgpr14_sgpr15, 589833, $sgpr12, 327689, killed $vgpr51, 2621449, $vgpr46_vgpr47
S_ENDPGM 0
...
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/hazard-kill.mir
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ body: |
liveins: $sgpr2, $sgpr3, $sgpr4
$sgpr6 = S_MOV_B32 killed $sgpr3
renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0
renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0, 0
$m0 = S_MOV_B32 killed renamable $sgpr4
dead renamable $sgpr0 = KILL undef renamable $sgpr2
renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $m0, implicit $exec
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: bb.0.entry:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
; GCN: renamable $sgpr2 = COPY renamable $sgpr1
; GCN: renamable $sgpr4 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
; GCN: renamable $sgpr5 = S_MOV_B32 61440
Expand Down Expand Up @@ -101,7 +101,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
; GCN: S_ENDPGM
entry:
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ body: |
bb.0 (%ir-block.2):
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
$vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
$vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec
$vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec
$vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
Original file line number Diff line number Diff line change
Expand Up @@ -230,28 +230,28 @@ name: vmem_gt_8dw_store

body: |
bb.0:
BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.1
bb.1:
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
Expand Down Expand Up @@ -549,14 +549,14 @@ body: |
$flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
$flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
DBG_VALUE $noreg, 2, !5, !11, debug-location !12
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
$sgpr8 = S_MOV_B32 $sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
$sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
S_ENDPGM 0
...
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ body: |
bb.0.entry:
liveins: $sgpr0_sgpr1
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
Expand All @@ -64,23 +64,23 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
S_BRANCH %bb.3
bb.2.if:
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
bb.3.done:
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
S_ENDPGM 0
...
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ body: |
%4.sub1 = COPY %3.sub0
undef %5.sub0 = COPY %4.sub1
%5.sub1 = COPY %4.sub0
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%6 = IMPLICIT_DEF
undef %7.sub0_sub1 = COPY %6
%7.sub2 = COPY %3.sub0
FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%8 = IMPLICIT_DEF
undef %9.sub0_sub1_sub2 = COPY %8
%9.sub3 = COPY %3.sub0
FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
...
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s

declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() #0

; GCN-LABEL: {{^}}test_s_get_waveid_in_workgroup:
; GFX10: global_store_dword
; GFX10: s_get_waveid_in_workgroup [[DEST:s[0-9]+]]
; GFX10: s_waitcnt lgkmcnt(0)
; GFX10: v_mov_b32_e32 [[VDEST:v[0-9]+]], [[DEST]]
; GFX10: global_store_dword v[{{[0-9:]+}}], [[VDEST]], off
define amdgpu_kernel void @test_s_get_waveid_in_workgroup(i32 addrspace(1)* %out) {
; Make sure %out is loaded and assiciated wait count already inserted
store i32 0, i32 addrspace(1)* %out
%v = call i32 @llvm.amdgcn.s.get.waveid.in.workgroup()
store i32 %v, i32 addrspace(1)* %out
ret void
}

attributes #0 = { nounwind }
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ body: |
successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000)
liveins: $vgpr0, $sgpr0_sgpr1
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec
$vgpr1_vgpr2 = V_LSHL_B64 $vgpr0_vgpr1, 3, implicit $exec
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 0
S_WAITCNT 127
$vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
$vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
$vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
Expand All @@ -97,7 +97,7 @@ body: |
successors: %bb.2.exit(0x80000000)
liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003
$sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec
$sgpr4_sgpr5 = S_MOV_B64 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
Expand All @@ -30,7 +30,7 @@ body: |
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
Expand Down
Loading