Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ void RISCVInstrInfo::copyPhysRegVector(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
const TargetRegisterClass *RegClass) const {
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
RISCVVType::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
unsigned NF = RISCVRI::getNF(RegClass->TSFlags);

Expand Down Expand Up @@ -444,13 +444,7 @@ void RISCVInstrInfo::copyPhysRegVector(
return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
};
auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
uint16_t Encoding) {
MCRegister Reg = RISCV::V0 + Encoding;
if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
return Reg;
return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
};

while (I != NumRegs) {
// For non-segment copying, we only do this once as the registers are always
// aligned.
Expand All @@ -470,9 +464,9 @@ void RISCVInstrInfo::copyPhysRegVector(

// Emit actual copying.
// For reversed copying, the encoding should be decreased.
MCRegister ActualSrcReg = FindRegWithEncoding(
MCRegister ActualSrcReg = TRI->findVRegWithEncoding(
RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
MCRegister ActualDstReg = FindRegWithEncoding(
MCRegister ActualDstReg = TRI->findVRegWithEncoding(
RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);

auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
Expand Down
224 changes: 87 additions & 137 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,9 +389,25 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}

// Split a VSPILLx_Mx pseudo into multiple whole register stores separated by
// LMUL*VLENB bytes.
void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
static std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned>
getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) {
if (NumRemaining >= 8 && RegEncoding % 8 == 0)
return {RISCVVType::LMUL_8, RISCV::VRM8RegClass,
IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V};
if (NumRemaining >= 4 && RegEncoding % 4 == 0)
return {RISCVVType::LMUL_4, RISCV::VRM4RegClass,
IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V};
if (NumRemaining >= 2 && RegEncoding % 2 == 0)
return {RISCVVType::LMUL_2, RISCV::VRM2RegClass,
IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V};
return {RISCVVType::LMUL_1, RISCV::VRRegClass,
IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V};
}

// Split a VSPILLx_Mx/VSPILLx_Mx pseudo into multiple whole register stores
// separated by LMUL*VLENB bytes.
void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
bool IsSpill) const {
DebugLoc DL = II->getDebugLoc();
MachineBasicBlock &MBB = *II->getParent();
MachineFunction &MF = *MBB.getParent();
Expand All @@ -403,148 +419,75 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
unsigned NF = ZvlssegInfo->first;
unsigned LMUL = ZvlssegInfo->second;
assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
unsigned Opcode, SubRegIdx;
switch (LMUL) {
default:
llvm_unreachable("LMUL must be 1, 2, or 4.");
case 1:
Opcode = RISCV::VS1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
break;
case 2:
Opcode = RISCV::VS2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
break;
case 4:
Opcode = RISCV::VS4R_V;
SubRegIdx = RISCV::sub_vrm4_0;
break;
}
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
"Unexpected subreg numbering");
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
"Unexpected subreg numbering");
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
"Unexpected subreg numbering");

Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
// Optimize for constant VLEN.
if (auto VLEN = STI.getRealVLen()) {
const int64_t VLENB = *VLEN / 8;
int64_t Offset = VLENB * LMUL;
STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
} else {
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
uint32_t ShiftAmount = Log2_32(LMUL);
if (ShiftAmount != 0)
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
.addReg(VL)
.addImm(ShiftAmount);
}
unsigned NumRegs = NF * LMUL;
assert(NumRegs <= 8 && "Invalid NF/LMUL combinations.");

Register SrcReg = II->getOperand(0).getReg();
Register Reg = II->getOperand(0).getReg();
uint16_t RegEncoding = TRI->getEncodingValue(Reg);
Register Base = II->getOperand(1).getReg();
bool IsBaseKill = II->getOperand(1).isKill();
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);

auto *OldMMO = *(II->memoperands_begin());
LocationSize OldLoc = OldMMO->getSize();
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
for (unsigned I = 0; I < NF; ++I) {
// Adding implicit-use of super register to describe we are using part of
// super register, that prevents machine verifier complaining when part of
// subreg is undef, see comment in MachineVerifier::checkLiveness for more
// detail.
BuildMI(MBB, II, DL, TII->get(Opcode))
.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I))
.addReg(Base, getKillRegState(I == NF - 1))
.addMemOperand(NewMMO)
.addReg(SrcReg, RegState::Implicit);
if (I != NF - 1)
TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);

Register VLENB = 0;
unsigned PreHandledNum = 0;
unsigned I = 0;
while (I != NumRegs) {
auto [LMulHandled, RegClass, Opcode] =
getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill);
auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled);
bool IsLast = I + RegNumHandled == NumRegs;
if (PreHandledNum) {
Register Step;
// Optimize for constant VLEN.
if (auto VLEN = STI.getRealVLen()) {
int64_t Offset = *VLEN / 8 * PreHandledNum;
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
} else {
if (!VLENB) {
VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
}
uint32_t ShiftAmount = Log2_32(PreHandledNum);
if (ShiftAmount == 0)
Step = VLENB;
else {
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
.addReg(VLENB, getKillRegState(IsLast))
.addImm(ShiftAmount);
}
}

BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
.addReg(VL, getKillRegState(I == NF - 2));
Base = NewBase;
}
II->eraseFromParent();
}
.addReg(Step, getKillRegState(Step != VLENB || IsLast));
Base = NewBase;
}

// Split a VSPILLx_Mx pseudo into multiple whole register loads separated by
// LMUL*VLENB bytes.
void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
DebugLoc DL = II->getDebugLoc();
MachineBasicBlock &MBB = *II->getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
MCRegister ActualReg = findVRegWithEncoding(RegClass, RegEncoding);
MachineInstrBuilder MIB =
BuildMI(MBB, II, DL, TII->get(Opcode))
.addReg(ActualReg, getDefRegState(!IsSpill))
.addReg(Base, getKillRegState(IsLast))
.addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
VRegSize * RegNumHandled));

auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
unsigned NF = ZvlssegInfo->first;
unsigned LMUL = ZvlssegInfo->second;
assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
unsigned Opcode, SubRegIdx;
switch (LMUL) {
default:
llvm_unreachable("LMUL must be 1, 2, or 4.");
case 1:
Opcode = RISCV::VL1RE8_V;
SubRegIdx = RISCV::sub_vrm1_0;
break;
case 2:
Opcode = RISCV::VL2RE8_V;
SubRegIdx = RISCV::sub_vrm2_0;
break;
case 4:
Opcode = RISCV::VL4RE8_V;
SubRegIdx = RISCV::sub_vrm4_0;
break;
}
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
"Unexpected subreg numbering");
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
"Unexpected subreg numbering");
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
"Unexpected subreg numbering");

Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
// Optimize for constant VLEN.
if (auto VLEN = STI.getRealVLen()) {
const int64_t VLENB = *VLEN / 8;
int64_t Offset = VLENB * LMUL;
STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
} else {
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
uint32_t ShiftAmount = Log2_32(LMUL);
if (ShiftAmount != 0)
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
.addReg(VL)
.addImm(ShiftAmount);
}
// Adding implicit-use of super register to describe we are using part of
// super register, that prevents machine verifier complaining when part of
// subreg is undef, see comment in MachineVerifier::checkLiveness for more
// detail.
if (IsSpill)
MIB.addReg(Reg, RegState::Implicit);

Register DestReg = II->getOperand(0).getReg();
Register Base = II->getOperand(1).getReg();
bool IsBaseKill = II->getOperand(1).isKill();
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
auto *OldMMO = *(II->memoperands_begin());
LocationSize OldLoc = OldMMO->getSize();
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
for (unsigned I = 0; I < NF; ++I) {
BuildMI(MBB, II, DL, TII->get(Opcode),
TRI->getSubReg(DestReg, SubRegIdx + I))
.addReg(Base, getKillRegState(I == NF - 1))
.addMemOperand(NewMMO);
if (I != NF - 1)
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
.addReg(VL, getKillRegState(I == NF - 2));
Base = NewBase;
PreHandledNum = RegNumHandled;
RegEncoding += RegNumHandled;
I += RegNumHandled;
}
II->eraseFromParent();
}
Expand Down Expand Up @@ -635,9 +578,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}

// Handle spill/fill of synthetic register classes for segment operations to
// ensure correctness in the edge case one gets spilled. There are many
// possible optimizations here, but given the extreme rarity of such spills,
// we prefer simplicity of implementation for now.
// ensure correctness in the edge case one gets spilled.
switch (MI.getOpcode()) {
case RISCV::PseudoVSPILL2_M1:
case RISCV::PseudoVSPILL2_M2:
Expand All @@ -650,7 +591,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case RISCV::PseudoVSPILL6_M1:
case RISCV::PseudoVSPILL7_M1:
case RISCV::PseudoVSPILL8_M1:
lowerVSPILL(II);
lowerSegmentSpillReload(II, /*IsSpill=*/true);
return true;
case RISCV::PseudoVRELOAD2_M1:
case RISCV::PseudoVRELOAD2_M2:
Expand All @@ -663,7 +604,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case RISCV::PseudoVRELOAD6_M1:
case RISCV::PseudoVRELOAD7_M1:
case RISCV::PseudoVRELOAD8_M1:
lowerVRELOAD(II);
lowerSegmentSpillReload(II, /*IsSpill=*/false);
return true;
}

Expand Down Expand Up @@ -1052,3 +993,12 @@ bool RISCVRegisterInfo::getRegAllocationHints(

return BaseImplRetVal;
}

Register
RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass,
uint16_t Encoding) const {
MCRegister Reg = RISCV::V0 + Encoding;
if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
return Reg;
return getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
}
7 changes: 5 additions & 2 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;

void lowerVSPILL(MachineBasicBlock::iterator II) const;
void lowerVRELOAD(MachineBasicBlock::iterator II) const;
void lowerSegmentSpillReload(MachineBasicBlock::iterator II,
bool IsSpill) const;

Register getFrameRegister(const MachineFunction &MF) const override;

Expand Down Expand Up @@ -144,6 +144,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;

Register findVRegWithEncoding(const TargetRegisterClass &RegClass,
uint16_t Encoding) const;

static bool isVRRegClass(const TargetRegisterClass *RC) {
return RISCVRI::isVRegClass(RC->TSFlags) &&
RISCVRI::getNF(RC->TSFlags) == 1;
Expand Down
20 changes: 2 additions & 18 deletions llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,7 @@ define void @_Z3foov() {
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
; CHECK-NEXT: #APP
Expand All @@ -59,15 +51,7 @@ define void @_Z3foov() {
; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vl2r.v v14, (a0) # vscale x 16-byte Folded Reload
; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
Expand Down
Loading
Loading