21 changes: 7 additions & 14 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,16 +557,14 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
return false;

const unsigned SrcFlags = getUndefRegState(Src.isUndef());

// Note we could have mixed SGPR and VGPR destination banks for an SGPR
// source, and this relies on the fact that the same subregister indices are
// used for both.
ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
for (int I = 0, E = NumDst; I != E; ++I) {
MachineOperand &Dst = MI.getOperand(I);
BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
.addReg(SrcReg, SrcFlags, SubRegs[I]);
.addReg(SrcReg, 0, SubRegs[I]);

// Make sure the subregister index is valid for the source register.
SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]);
Expand Down Expand Up @@ -1696,11 +1694,6 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
return Ret;
}

bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
initM0(I);
return selectImpl(I, *CoverageInfo);
}

static int sizeToSubRegIndex(unsigned Size) {
switch (Size) {
case 32:
Expand Down Expand Up @@ -2226,19 +2219,20 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
}

void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();

const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
unsigned AS = PtrTy.getAddressSpace();
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
STI.ldsRequiresM0Init()) {
MachineBasicBlock *BB = I.getParent();

// If DS instructions require M0 initializtion, insert it before selecting.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addImm(-1);
}
}

bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
MachineInstr &I) const {
initM0(I);
return selectImpl(I, *CoverageInfo);
}
Expand Down Expand Up @@ -2866,6 +2860,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return true;
return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
case TargetOpcode::G_ATOMIC_CMPXCHG:
case TargetOpcode::G_ATOMICRMW_XCHG:
case TargetOpcode::G_ATOMICRMW_ADD:
Expand All @@ -2882,13 +2877,11 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_AMDGPU_ATOMIC_DEC:
case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
return selectG_LOAD_ATOMICRMW(I);
return selectG_LOAD_STORE_ATOMICRMW(I);
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
return selectG_AMDGPU_ATOMIC_CMPXCHG(I);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_STORE:
return selectG_STORE(I);
case TargetOpcode::G_TRUNC:
return selectG_TRUNC(I);
case TargetOpcode::G_SEXT:
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;

void initM0(MachineInstr &I) const;
bool selectG_LOAD_ATOMICRMW(MachineInstr &I) const;
bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const;
bool selectG_AMDGPU_ATOMIC_CMPXCHG(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
bool selectG_SELECT(MachineInstr &I) const;
bool selectG_BRCOND(MachineInstr &I) const;
bool selectG_GLOBAL_VALUE(MachineInstr &I) const;
Expand Down
13 changes: 3 additions & 10 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(

// Return any unique registers used by \p MI at \p OpIndices that need to be
// handled in a waterfall loop. Returns these registers in \p
// SGPROperandRegs. Returns true if there are any operansd to handle and a
// SGPROperandRegs. Returns true if there are any operands to handle and a
// waterfall loop is necessary.
bool AMDGPURegisterBankInfo::collectWaterfallOperands(
SmallSet<Register, 4> &SGPROperandRegs, MachineInstr &MI,
Expand Down Expand Up @@ -2319,15 +2319,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(

setRegsToType(MRI, DefRegs, HalfTy);

B.buildInstr(Opc)
.addDef(DefRegs[0])
.addUse(Src0Regs[0])
.addUse(Src1Regs[0]);

B.buildInstr(Opc)
.addDef(DefRegs[1])
.addUse(Src0Regs[1])
.addUse(Src1Regs[1]);
B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});

MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
MI.eraseFromParent();
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4313,7 +4313,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
/// instructions in-between do not load or store, and have no side effects.
MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
const MachineRegisterInfo *MRI,
unsigned &FoldAsLoadDefReg,
Register &FoldAsLoadDefReg,
MachineInstr *&DefMI) const {
// Check whether we can move DefMI here.
DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
/// the machine instruction generated due to folding.
MachineInstr *optimizeLoadInstr(MachineInstr &MI,
const MachineRegisterInfo *MRI,
unsigned &FoldAsLoadDefReg,
Register &FoldAsLoadDefReg,
MachineInstr *&DefMI) const override;

std::pair<unsigned, unsigned>
Expand Down