Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Promote the Pseudo Opcode of instructions that deduce the sign extension for extsw from 32 bits to 64 bits when eliminating the extsw instruction in PPCMIPeepholes optimization. #85451

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
10 changes: 6 additions & 4 deletions llvm/lib/Target/PowerPC/P10InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,17 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
SRADI_rec,
SRAWI_rec
SRAWI_rec,
SRAWI8_rec
)>;

// Single crack instructions
// 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
SRAW_rec
SRAW_rec,
SRAW8_rec
)>;

// 2-way crack instructions
Expand Down Expand Up @@ -926,7 +928,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
SETNBC, SETNBC8,
SETNBCR, SETNBCR8,
SRADI, SRADI_32,
SRAWI,
SRAWI, SRAWI8,
SUBFIC, SUBFIC8,
SUBFME, SUBFME8,
SUBFME8O, SUBFMEO,
Expand Down Expand Up @@ -1008,7 +1010,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
SLD,
SLW, SLW8,
SRAD,
SRAW,
SRAW, SRAW8,
SRD,
SRW, SRW8,
SUBF, SUBF8,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/PowerPC/P9InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instregex "F(N)?ABS(D|S)$"),
(instregex "FNEG(D|S)$"),
(instregex "FCPSGN(D|S)$"),
(instregex "SRAW(I)?$"),
(instregex "SRAW(8)?$"),
(instregex "SRAWI(8)?$"),
(instregex "ISEL(8)?$"),
RLDIMI,
XSIEXPDP,
Expand Down Expand Up @@ -1091,7 +1092,8 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
(instregex "RLD(I)?C(R|L)_rec$"),
(instregex "RLW(IMI|INM|NM)(8)?_rec$"),
(instregex "SLW(8)?_rec$"),
(instregex "SRAW(I)?_rec$"),
(instregex "SRAW(8)?_rec$"),
(instregex "SRAWI(8)?_rec$"),
(instregex "SRW(8)?_rec$"),
RLDICL_32_rec,
RLDIMI_rec
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/PowerPC/PPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,18 @@ def getAltVSXFMAOpcode : InstrMapping {
let ValueCols = [["1"]];
}

def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
let FilterClass = "SExt32To64";
// Instructions with the same opcode.
let RowFields = ["Inst"];
// Instructions with the same Interpretation64Bit value form a column.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "form a column" mean?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let FilterClass = "SExt32To64"

means we will deal with the instruction which only has flag SExt32To64

let RowFields = ["Inst"];

means that if two instruction has the same Pseduo Opcode will be on the same row.

let ColFields = ["Interpretation64Bit"];

means that the instruction for 64bit is one column, 32-bit is another column.

it will generate something like this

int get64BitInstrFromSignedExt32BitInstr(uint16_t Opcode) {
static const uint16_t get64BitInstrFromSignedExt32BitInstrTable[][2] = {
  { PPC::ANDI_rec, PPC::ANDI8_rec },
  { PPC::EXTSB, PPC::EXTSB8 },
  { PPC::EXTSB8_32_64, PPC::EXTSB8 },
  { PPC::EXTSB_rec, PPC::EXTSB8_rec },
  { PPC::EXTSH, PPC::EXTSH8 },
  { PPC::EXTSH8_32_64, PPC::EXTSH8 },
  { PPC::EXTSH_rec, PPC::EXTSH8_rec },
  { PPC::EXTSW, PPC::EXTSW_32_64 },
  { PPC::EXTSW_rec, PPC::EXTSW_32_64_rec },
  { PPC::LBZ, PPC::LBZ8 },
  { PPC::LBZX, PPC::LBZX8 },
  { PPC::LHA, PPC::LHA8 },
  { PPC::LHAX, PPC::LHAX8 },
  { PPC::LHZ, PPC::LHZ8 },
  { PPC::LHZX, PPC::LHZX8 },
  { PPC::LI, PPC::LI8 },

let ColFields = ["Interpretation64Bit"];
// The key column are not the Interpretation64Bit-form instructions.
let KeyCol = ["0"];
// Value columns are the Interpretation64Bit-form instructions.
let ValueCols = [["1"]];
}

//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,14 @@ defm SLW8 : XForm_6r<31, 24, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"slw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
defm SRW8 : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"srw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;

defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"sraw", "$RA, $RST, $RB", IIC_IntShift,
[]>, SExt32To64;

defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
"srawi", "$RA, $RST, $RB", IIC_IntShift, []>, SExt32To64;

} // Interpretation64Bit

// For fast-isel:
Expand Down
238 changes: 238 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5234,6 +5234,244 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
// We limit the max depth to track incoming values of PHIs or binary ops
// (e.g. AND) to avoid excessive cost.
const unsigned MAX_BINOP_DEPTH = 1;

// The `PromoteInstr32To64ForEmliEXTSW` function is recursive. The parameter
// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
// incremented only when `PromoteInstr32To64ForEmliEXTSW` calls itself more
// than once. This is done to prevent exponential recursion. The function will
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably only mention the recursion after explaining how we follow the register uses below.

// promote the instruction which defines the register `Reg` in the parameter
// from a 32-bit to a 64-bit instruction if needed. Additionally, all the used
// and defined registers in the instruction may also need to be promoted from
// 32-bit to 64-bit based on the promoted instruction description. If a used
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should mention the tablegen flag used to map PPC instructions and the opcodes defined by the issignorzeroextended().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put the new adding comment in the code which is in the middle of the function instead of the description of the function.

// register is promoted to 64-bit, the instruction which defines the promoted
// register also needs to be promoted. After an instruction is promoted to 64
// bits, the defined register of the promoted instruction is also 64-bit. A
// defined register may be used by other instructions; in such cases,
// we need to extract the 32-bit register used by other
// non-promoted 32-bit instructions from the promoted 64-bit register.
void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,

MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const {
MachineInstr *MI = MRI->getVRegDef(Reg);
if (!MI)
return;

unsigned Opcode = MI->getOpcode();
bool IsNonSignedExtInstrPromoted = false;
int NewOpcode = -1;

auto CheckAndSetNewOpcode = [&](int NewOpc) {
if (!IsNonSignedExtInstrPromoted) {
NewOpcode = NewOpc;
IsNonSignedExtInstrPromoted = true;
}
};

auto SetNewOpcode = [&](int NewOpc) {
NewOpcode = NewOpc;
IsNonSignedExtInstrPromoted = true;
};

switch (Opcode) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all the cases for opcode switch statements in PromoteInstr32To64ForEmliEXTSW coming from the union of definedBySignExtendingOp() and isSignExtended()/isSignOrZeroExtended()?

Copy link
Contributor Author

@diggerlin diggerlin May 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes,
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp#L1051
-->https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCInstrInfo.h#L617

bool isSignExtended(const unsigned Reg,
                      const MachineRegisterInfo *MRI) const {
    return isSignOrZeroExtended(Reg, 0, MRI).first;
  }

-->
PPCInstrInfo::isSignOrZeroExtended() in the PPCInstrInfo.cpp will call

-->
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp#L5253
definedBySignExtendingOp()

case PPC::OR:
CheckAndSetNewOpcode(PPC::OR8);
[[fallthrough]];
case PPC::ISEL:
CheckAndSetNewOpcode(PPC::ISEL8);
[[fallthrough]];
case PPC::OR8:
case PPC::PHI:
if (BinOpDepth < MAX_BINOP_DEPTH) {
unsigned OperandEnd = 3, OperandStride = 1;
if (Opcode == PPC::PHI) {
OperandEnd = MI->getNumOperands();
OperandStride = 2;
}

for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
assert(MI->getOperand(I).isReg() && "Operand must be register");
Register SrcReg = MI->getOperand(I).getReg();
PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
}

if (!IsNonSignedExtInstrPromoted)
return;
}
break;
case PPC::COPY: {
Register SrcReg = MI->getOperand(1).getReg();
const MachineFunction *MF = MI->getMF();
if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should copy the explanation comments for these conditions from issignorzeroextended() as well

PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
return;
}
// From here on everything is SVR4ABI
if (MI->getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock())
return;

if (SrcReg != PPC::X3) {
PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
return;
}
}
return;
case PPC::ORI:
CheckAndSetNewOpcode(PPC::ORI8);
[[fallthrough]];
case PPC::XORI:
CheckAndSetNewOpcode(PPC::XORI8);
[[fallthrough]];
case PPC::ORIS:
CheckAndSetNewOpcode(PPC::ORIS8);
[[fallthrough]];
case PPC::XORIS:
CheckAndSetNewOpcode(PPC::XORIS8);
[[fallthrough]];
case PPC::ORI8:
case PPC::XORI8:
case PPC::ORIS8:
case PPC::XORIS8: {
Register SrcReg = MI->getOperand(1).getReg();
PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
// If Opcode is PPC::ORI8, PPC::XORI8, PPC::ORIS8, or PPC::XORIS8,
// the instruction does not need to be promoted.
if (!IsNonSignedExtInstrPromoted)
return;
break;
}
case PPC::AND:
CheckAndSetNewOpcode(PPC::AND8);
[[fallthrough]];
case PPC::AND8: {
if (BinOpDepth < MAX_BINOP_DEPTH) {
Register SrcReg1 = MI->getOperand(1).getReg();
PromoteInstr32To64ForEmliEXTSW(SrcReg1, MRI, BinOpDepth, LV);
Register SrcReg2 = MI->getOperand(2).getReg();
PromoteInstr32To64ForEmliEXTSW(SrcReg2, MRI, BinOpDepth, LV);
// If Opcode is PPC::AND8, the instruction does not need to be promoted.
if (!IsNonSignedExtInstrPromoted)
return;
}
break;
}
case PPC::RLWINM:
SetNewOpcode(PPC::RLWINM8);
break;
case PPC::RLWINM_rec:
SetNewOpcode(PPC::RLWINM8_rec);
break;
case PPC::RLWNM:
SetNewOpcode(PPC ::RLWNM8);
break;
case PPC::RLWNM_rec:
SetNewOpcode(PPC::RLWNM8_rec);
break;
case PPC::ANDC_rec:
SetNewOpcode(PPC::ANDC8_rec);
break;
case PPC::ANDIS_rec:
SetNewOpcode(PPC::ANDIS8_rec);
break;
default:
break;
}

amy-kwan marked this conversation as resolved.
Show resolved Hide resolved
const PPCInstrInfo *TII =
MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
amy-kwan marked this conversation as resolved.
Show resolved Hide resolved
if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
!isOpZeroOfSubwordPreincLoad(Opcode)) ||
IsNonSignedExtInstrPromoted) {

const TargetRegisterClass *RC = MRI->getRegClass(Reg);

if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
return;

if (!IsNonSignedExtInstrPromoted)
NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);

assert(NewOpcode != -1 &&
"Must have a 64-bit opcode to map the 32-bit opcode!");

const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
const MCInstrDesc &MCID = TII->get(NewOpcode);
const TargetRegisterClass *NewRC =
TRI->getRegClass(MCID.operands()[0].RegClass);

Register SrcReg = MI->getOperand(0).getReg();
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);

// If the register class of the defined register in the 32-bit instruction
// is the same as the register class of the defined register in the promoted
// 64-bit instruction, we do not need to promote the instruction.
if (NewRC == SrcRC)
return;

DebugLoc DL = MI->getDebugLoc();
auto MBB = MI->getParent();

// Since the pseudo-opcode of the instruction is promoted from 32-bit to
// 64-bit, if the operand reg class of the original instruction belongs to
// PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
// the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
// respectively.
DenseMap<unsigned, Register> PromoteRegs;
DenseMap<unsigned, Register> ReCalRegs;
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
MachineOperand &Operand = MI->getOperand(i);
if (Operand.isReg()) {
Register OperandReg = Operand.getReg();
if (!OperandReg.isVirtual())
continue;

const TargetRegisterClass *NewUsedRegRC =
TRI->getRegClass(MCID.operands()[i].RegClass);
const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
if (NewUsedRegRC != OrgRC &&
(OrgRC == &PPC::GPRCRegClass ||
OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
// Promote the used 32-bit register to 64-bit register.
Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
.addReg(TmpReg)
.addReg(OperandReg)
.addImm(PPC::sub_32);
PromoteRegs[i] = DstTmpReg;
ReCalRegs[i] = DstTmpReg;
} else {
ReCalRegs[i] = OperandReg;
}
}
}

Register NewDefinedReg = MRI->createVirtualRegister(NewRC);

BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
MachineBasicBlock::instr_iterator Iter(MI);
--Iter;
for (unsigned i = 1; i < MI->getNumOperands(); i++)
if (PromoteRegs.find(i) != PromoteRegs.end())
MachineInstrBuilder(*Iter->getMF(), Iter)
.addReg(PromoteRegs[i], RegState::Kill);
else
Iter->addOperand(MI->getOperand(i));

for (auto Iter = ReCalRegs.begin(); Iter != ReCalRegs.end(); Iter++)
LV->recomputeForSingleDefVirtReg(Iter->second);
MI->eraseFromParent();

// Demote the 64-bit defined regster to a 32-bit register.
BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
.addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
LV->recomputeForSingleDefVirtReg(NewDefinedReg);
return;
}
return;
}

// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
// does not count all of the recursions. The parameter BinOpDepth is incremented
// only when isSignOrZeroExtended calls itself more than once. This is done to
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/TargetInstrInfo.h"

#define GET_INSTRINFO_HEADER
Expand Down Expand Up @@ -624,6 +625,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const MachineRegisterInfo *MRI) const {
return isSignOrZeroExtended(Reg, 0, MRI).second;
}
void PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const;

bool convertToImmediateForm(MachineInstr &MI,
SmallSet<Register, 4> &RegsToUpdate,
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,16 @@ bool PPCMIPeephole::simplifyCode() {
} else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
TII->isSignExtended(NarrowReg, MRI)) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
// sign-extended. but we are not sure whether a spill will occur
// during register allocation. All these instructions in the chain
// used to deduce sign extension to eliminate the 'extsw' will need to
// be promoted to 64-bit pseudo instructions when the 'extsw' is
// eliminated. If there is no promotion, it will use the 'stw' instead
// of 'std', and 'lwz' instead of 'ld' when spilling, since the
// register class is 32-bits. Consequently, the high 32-bit
// information will be lost.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might flow nicer if we put the what will happen if we don't promote first and then say how we promote after.

TII->PromoteInstr32To64ForEmliEXTSW(NarrowReg, MRI, 0, LV);

LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
Register TmpReg =
MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCScheduleP7.td
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ let SchedModel = P7Model in {
RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAWI8, SRAWI8_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
)>;

Expand Down
Loading
Loading