Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Promote the Pseudo Opcode of instructions that deduce the sign extension for extsw from 32 bits to 64 bits when eliminating the extsw instruction in PPCMIPeepholes optimization. #85451

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
10 changes: 6 additions & 4 deletions llvm/lib/Target/PowerPC/P10InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,17 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
SRADI_rec,
SRAWI_rec
SRAWI_rec,
SRAWI8_rec
)>;

// Single crack instructions
// 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
SRAW_rec
SRAW_rec,
SRAW8_rec
)>;

// 2-way crack instructions
Expand Down Expand Up @@ -926,7 +928,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
SETNBC, SETNBC8,
SETNBCR, SETNBCR8,
SRADI, SRADI_32,
SRAWI,
SRAWI, SRAWI8,
SUBFIC, SUBFIC8,
SUBFME, SUBFME8,
SUBFME8O, SUBFMEO,
Expand Down Expand Up @@ -1008,7 +1010,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
SLD,
SLW, SLW8,
SRAD,
SRAW,
SRAW, SRAW8,
SRD,
SRW, SRW8,
SUBF, SUBF8,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/PowerPC/P9InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instregex "F(N)?ABS(D|S)$"),
(instregex "FNEG(D|S)$"),
(instregex "FCPSGN(D|S)$"),
(instregex "SRAW(I)?$"),
(instregex "SRAW(8)?$"),
(instregex "SRAWI(8)?$"),
(instregex "ISEL(8)?$"),
RLDIMI,
XSIEXPDP,
Expand Down Expand Up @@ -1091,7 +1092,8 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
(instregex "RLD(I)?C(R|L)_rec$"),
(instregex "RLW(IMI|INM|NM)(8)?_rec$"),
(instregex "SLW(8)?_rec$"),
(instregex "SRAW(I)?_rec$"),
(instregex "SRAW(8)?_rec$"),
(instregex "SRAWI(8)?_rec$"),
(instregex "SRW(8)?_rec$"),
RLDICL_32_rec,
RLDIMI_rec
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/PowerPC/PPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,18 @@ def getAltVSXFMAOpcode : InstrMapping {
let ValueCols = [["1"]];
}

def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
let FilterClass = "SExt32To64";
// Instructions with the same opcode.
let RowFields = ["Inst"];
// Instructions with the same Interpretation64Bit value form a column.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "form a column" mean?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let FilterClass = "SExt32To64"

means we will deal with the instruction which only has flag SExt32To64

let RowFields = ["Inst"];

means that if two instruction has the same Pseduo Opcode will be on the same row.

let ColFields = ["Interpretation64Bit"];

means that the instruction for 64bit is one column, 32-bit is another column.

it will generate something like this

int get64BitInstrFromSignedExt32BitInstr(uint16_t Opcode) {
static const uint16_t get64BitInstrFromSignedExt32BitInstrTable[][2] = {
  { PPC::ANDI_rec, PPC::ANDI8_rec },
  { PPC::EXTSB, PPC::EXTSB8 },
  { PPC::EXTSB8_32_64, PPC::EXTSB8 },
  { PPC::EXTSB_rec, PPC::EXTSB8_rec },
  { PPC::EXTSH, PPC::EXTSH8 },
  { PPC::EXTSH8_32_64, PPC::EXTSH8 },
  { PPC::EXTSH_rec, PPC::EXTSH8_rec },
  { PPC::EXTSW, PPC::EXTSW_32_64 },
  { PPC::EXTSW_rec, PPC::EXTSW_32_64_rec },
  { PPC::LBZ, PPC::LBZ8 },
  { PPC::LBZX, PPC::LBZX8 },
  { PPC::LHA, PPC::LHA8 },
  { PPC::LHAX, PPC::LHAX8 },
  { PPC::LHZ, PPC::LHZ8 },
  { PPC::LHZX, PPC::LHZX8 },
  { PPC::LI, PPC::LI8 },

let ColFields = ["Interpretation64Bit"];
// The key column are not the Interpretation64Bit-form instructions.
let KeyCol = ["0"];
// Value columns are the Interpretation64Bit-form instructions.
let ValueCols = [["1"]];
}

//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,14 @@ defm SLW8 : XForm_6r<31, 24, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"slw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
defm SRW8 : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"srw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;

defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"sraw", "$RA, $RST, $RB", IIC_IntShift,
[(set i64:$RA, (PPCsra i64:$RST, i64:$RB))]>, SExt32To64;

defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
"srawi", "$RA, $RST, $RB", IIC_IntShift, []>, SExt32To64;

} // Interpretation64Bit

// For fast-isel:
Expand Down
212 changes: 212 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5234,6 +5234,218 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
// We limit the max depth to track incoming values of PHIs or binary ops
// (e.g. AND) to avoid excessive cost.
const unsigned MAX_BINOP_DEPTH = 1;

void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment to describe this function.

MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const {
MachineInstr *MI = MRI->getVRegDef(Reg);
if (!MI)
return;

unsigned Opcode = MI->getOpcode();
bool IsReplaceInstr = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we update the name of this variable? Something like IsPromotedInstr?

int NewOpcode = -1;

auto SetNewOpcode = [&](int NewOpc) {
if (!IsReplaceInstr) {
NewOpcode = NewOpc;
IsReplaceInstr = true;
}
};

switch (Opcode) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all the cases for opcode switch statements in PromoteInstr32To64ForEmliEXTSW coming from the union of definedBySignExtendingOp() and isSignExtended()/isSignOrZeroExtended()?

Copy link
Contributor Author

@diggerlin diggerlin May 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes,
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp#L1051
-->https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCInstrInfo.h#L617

bool isSignExtended(const unsigned Reg,
                      const MachineRegisterInfo *MRI) const {
    return isSignOrZeroExtended(Reg, 0, MRI).first;
  }

-->
PPCInstrInfo::isSignOrZeroExtended() in the PPCInstrInfo.cpp will call

-->
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp#L5253
definedBySignExtendingOp()

case PPC::OR:
SetNewOpcode(PPC::OR8);
[[fallthrough]];
case PPC::ISEL:
SetNewOpcode(PPC::ISEL8);
[[fallthrough]];
case PPC::OR8:
case PPC::PHI:
if (BinOpDepth < MAX_BINOP_DEPTH) {
unsigned OperandEnd = 3, OperandStride = 1;
if (Opcode == PPC::PHI) {
OperandEnd = MI->getNumOperands();
OperandStride = 2;
}

for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to use < instead of != here.

assert(MI->getOperand(I).isReg() && "Operand must be register");
Register SrcReg = MI->getOperand(I).getReg();
replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
}

if (!IsReplaceInstr)
return;
}
break;
case PPC::COPY: {
Register SrcReg = MI->getOperand(1).getReg();
const MachineFunction *MF = MI->getMF();
if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should copy the explanation comments for these conditions from issignorzeroextended() as well

replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
return;
}
// From here on everything is SVR4ABI
if (MI->getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock())
return;

if (SrcReg != PPC::X3) {
replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
return;
}
}
return;
case PPC::ORI:
SetNewOpcode(PPC::ORI8);
[[fallthrough]];
case PPC::XORI:
SetNewOpcode(PPC::XORI8);
[[fallthrough]];
case PPC::ORIS:
SetNewOpcode(PPC::ORIS8);
[[fallthrough]];
case PPC::XORIS:
SetNewOpcode(PPC::XORIS8);
[[fallthrough]];
case PPC::ORI8:
case PPC::XORI8:
case PPC::ORIS8:
case PPC::XORIS8: {
Register SrcReg = MI->getOperand(1).getReg();
replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);

if (!IsReplaceInstr)
return;
break;
}
case PPC::AND:
SetNewOpcode(PPC::AND8);
[[fallthrough]];
case PPC::AND8: {
if (BinOpDepth < MAX_BINOP_DEPTH) {
Register SrcReg1 = MI->getOperand(1).getReg();
replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
Register SrcReg2 = MI->getOperand(2).getReg();
replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
if (!IsReplaceInstr)
return;
}
break;
}
case PPC::RLWINM:
SetNewOpcode(PPC::RLWINM8);
break;
case PPC::RLWINM_rec:
SetNewOpcode(PPC::RLWINM8_rec);
break;
case PPC::RLWNM:
SetNewOpcode(PPC ::RLWNM8);
break;
case PPC::RLWNM_rec:
SetNewOpcode(PPC::RLWNM8_rec);
break;
case PPC::ANDC_rec:
SetNewOpcode(PPC::ANDC8_rec);
break;
case PPC::ANDIS_rec:
SetNewOpcode(PPC::ANDIS8_rec);
break;
default:
break;
}

amy-kwan marked this conversation as resolved.
Show resolved Hide resolved
const PPCInstrInfo *TII =
MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
amy-kwan marked this conversation as resolved.
Show resolved Hide resolved
if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
!isOpZeroOfSubwordPreincLoad(Opcode)) ||
IsReplaceInstr) {

const TargetRegisterClass *RC = MRI->getRegClass(Reg);

if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
return;

if (!IsReplaceInstr)
NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);

assert(NewOpcode != -1 &&
"Must have a 64-bit opcode to map the 32-bit opcode!");

const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
const MCInstrDesc &MCID = TII->get(NewOpcode);

Register SrcReg = MI->getOperand(0).getReg();
const TargetRegisterClass *NewRC =
TRI->getRegClass(MCID.operands()[0].RegClass);
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);

if (NewRC == SrcRC)
return;

DebugLoc DL = MI->getDebugLoc();
auto MBB = MI->getParent();

// Since the pseudo-opcode of the instruction is promoted from 32-bit to
// 64-bit, if the operand of the original instruction belongs to
// PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
// the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
// respectively.
DenseMap<unsigned, Register> PromoteRegs;
DenseMap<unsigned, Register> ReCalRegs;
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
MachineOperand &Operand = MI->getOperand(i);
if (Operand.isReg()) {
Register OperandReg = Operand.getReg();
if (!OperandReg.isVirtual())
continue;

const TargetRegisterClass *RC =
TRI->getRegClass(MCID.operands()[i].RegClass);
const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
if (RC != MRI->getRegClass(OperandReg) &&
(OrgRC == &PPC::GPRCRegClass ||
OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
Register TmpReg = MRI->createVirtualRegister(RC);
Register DstTmpReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
.addReg(TmpReg)
.addReg(OperandReg)
.addImm(PPC::sub_32);
PromoteRegs[i] = DstTmpReg;
ReCalRegs[i] = DstTmpReg;
} else {
ReCalRegs[i] = OperandReg;
}
}
}

Register NewReg = MRI->createVirtualRegister(NewRC);

BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewReg);
MachineBasicBlock::instr_iterator Iter(MI);
--Iter;
for (unsigned i = 1; i < MI->getNumOperands(); i++)
if (PromoteRegs.find(i) != PromoteRegs.end())
MachineInstrBuilder(*Iter->getMF(), Iter)
.addReg(PromoteRegs[i], RegState::Kill);
else
Iter->addOperand(MI->getOperand(i));

for (auto Iter = ReCalRegs.begin(); Iter != ReCalRegs.end(); Iter++)
LV->recomputeForSingleDefVirtReg(Iter->second);
MI->eraseFromParent();

BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
.addReg(NewReg, RegState::Kill, PPC::sub_32);
LV->recomputeForSingleDefVirtReg(NewReg);
return;
}
return;
}

// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
// does not count all of the recursions. The parameter BinOpDepth is incremented
// only when isSignOrZeroExtended calls itself more than once. This is done to
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/TargetInstrInfo.h"

#define GET_INSTRINFO_HEADER
Expand Down Expand Up @@ -624,6 +625,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const MachineRegisterInfo *MRI) const {
return isSignOrZeroExtended(Reg, 0, MRI).second;
}
void replaceInstrAfterElimExt32To64(const Register &Reg,
MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const;

bool convertToImmediateForm(MachineInstr &MI,
SmallSet<Register, 4> &RegsToUpdate,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,7 @@ bool PPCMIPeephole::simplifyCode() {
TII->isSignExtended(NarrowReg, MRI)) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
kamaub marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A couple comments regarding this line:

  • This call does not make sense with the comment on 1052-1053. It would be better to put this call above the comment.
  • The naming of the function seems unclear. We can consider renaming the function from replaceInstrAfter to promoteInstrBefore, or something similar.
  • Add new documentation for the function.

LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
Register TmpReg =
MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCScheduleP7.td
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ let SchedModel = P7Model in {
RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAWI8, SRAWI8_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
)>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ body: |
%3 = IMPLICIT_DEF
%2 = LI 170
%4 = RLWNM killed %1, %2, 20, 27
; CHECK: RLWINM killed %1, 10, 20, 27
; CHECK: RLWINM8 killed %6, 10, 20, 27
; CHECK-LATE: rlwinm 3, 3, 10, 20, 27
$x3 = EXTSW_32_64 %4
BLR8 implicit $lr8, implicit $rm, implicit $x3
Expand Down Expand Up @@ -604,7 +604,7 @@ body: |
%2 = LI 48
%5 = COPY %0.sub_32
%8 = SRW killed %5, killed %2
; CHECK: LI 0
; CHECK: LI8 0
; CHECK-LATE: li 3, 0
$x3 = EXTSW_32_64 %8
BLR8 implicit $lr8, implicit $rm, implicit $x3
Expand Down Expand Up @@ -722,7 +722,7 @@ body: |
%3 = COPY %0.sub_32
%4 = SRAW killed %3, killed %2, implicit-def dead $carry
; CHECK: LI 48
; CHECK: SRAW killed %3, killed %2, implicit-def dead $carry
; CHECK: SRAW8 killed %7, killed %9, implicit-def $carry, implicit-def dead $carry
; CHECK-LATE: sraw 3, 3, 4
%5 = EXTSW_32_64 killed %4
$x3 = COPY %5
Expand Down Expand Up @@ -779,7 +779,7 @@ body: |
%2 = LI 80
%3 = COPY %0.sub_32
%4 = SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
; CHECK: SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
; CHECK: SRAW8_rec killed %10, killed %12, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
; CHECK-LATE: sraw. 3, 3, 4
%5 = COPY killed $cr0
%6 = ISEL %2, %4, %5.sub_eq
Expand Down
Loading
Loading