Skip to content

Commit

Permalink
Revert "[X86] Remove patterns for ADC/SBB with immediate 8 and optimi…
Browse files Browse the repository at this point in the history
…ze during MC lowering, NFCI"

This caused compiler assertions, see comment on
https://reviews.llvm.org/D150107.

This also reverts the dependent follow-up change:

> [X86] Remove patterns for ADD/AND/OR/SUB/XOR/CMP with immediate 8 and optimize during MC lowering, NFCI
>
> This is follow-up of D150107.
>
> In addition, the function `X86::optimizeToFixedRegisterOrShortImmediateForm` can be
> shared with project bolt and eliminates the code in X86InstrRelaxTables.cpp.
>
> Differential Revision: https://reviews.llvm.org/D150949

This reverts commit 2ef8ae1 and
5586bc5.
  • Loading branch information
zmodem committed May 19, 2023
1 parent 218841a commit cb16b33
Show file tree
Hide file tree
Showing 63 changed files with 735 additions and 467 deletions.
76 changes: 1 addition & 75 deletions llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp
Expand Up @@ -370,7 +370,7 @@ bool X86::optimizeMOV(MCInst &MI, bool In64BitMode) {

/// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
/// a short fixed-register form.
static bool optimizeToFixedRegisterForm(MCInst &MI) {
bool X86::optimizeToFixedRegisterForm(MCInst &MI) {
unsigned NewOpc;
switch (MI.getOpcode()) {
default:
Expand Down Expand Up @@ -424,77 +424,3 @@ static bool optimizeToFixedRegisterForm(MCInst &MI) {
MI.addOperand(Saved);
return true;
}

static bool optimizeToShortImmediateForm(MCInst &MI) {
unsigned NewOpc;
switch (MI.getOpcode()) {
default:
return false;
FROM_TO(ADC16mi, ADC16mi8)
FROM_TO(ADC16ri, ADC16ri8)
FROM_TO(ADC32mi, ADC32mi8)
FROM_TO(ADC32ri, ADC32ri8)
FROM_TO(ADC64mi32, ADC64mi8)
FROM_TO(ADC64ri32, ADC64ri8)
FROM_TO(SBB16mi, SBB16mi8)
FROM_TO(SBB16ri, SBB16ri8)
FROM_TO(SBB32mi, SBB32mi8)
FROM_TO(SBB32ri, SBB32ri8)
FROM_TO(SBB64mi32, SBB64mi8)
FROM_TO(SBB64ri32, SBB64ri8)
FROM_TO(ADD16mi, ADD16mi8)
FROM_TO(ADD16ri, ADD16ri8)
FROM_TO(ADD32mi, ADD32mi8)
FROM_TO(ADD32ri, ADD32ri8)
FROM_TO(ADD64mi32, ADD64mi8)
FROM_TO(ADD64ri32, ADD64ri8)
FROM_TO(AND16mi, AND16mi8)
FROM_TO(AND16ri, AND16ri8)
FROM_TO(AND32mi, AND32mi8)
FROM_TO(AND32ri, AND32ri8)
FROM_TO(AND64mi32, AND64mi8)
FROM_TO(AND64ri32, AND64ri8)
FROM_TO(OR16mi, OR16mi8)
FROM_TO(OR16ri, OR16ri8)
FROM_TO(OR32mi, OR32mi8)
FROM_TO(OR32ri, OR32ri8)
FROM_TO(OR64mi32, OR64mi8)
FROM_TO(OR64ri32, OR64ri8)
FROM_TO(SUB16mi, SUB16mi8)
FROM_TO(SUB16ri, SUB16ri8)
FROM_TO(SUB32mi, SUB32mi8)
FROM_TO(SUB32ri, SUB32ri8)
FROM_TO(SUB64mi32, SUB64mi8)
FROM_TO(SUB64ri32, SUB64ri8)
FROM_TO(XOR16mi, XOR16mi8)
FROM_TO(XOR16ri, XOR16ri8)
FROM_TO(XOR32mi, XOR32mi8)
FROM_TO(XOR32ri, XOR32ri8)
FROM_TO(XOR64mi32, XOR64mi8)
FROM_TO(XOR64ri32, XOR64ri8)
FROM_TO(CMP16mi, CMP16mi8)
FROM_TO(CMP16ri, CMP16ri8)
FROM_TO(CMP32mi, CMP32mi8)
FROM_TO(CMP32ri, CMP32ri8)
FROM_TO(CMP64mi32, CMP64mi8)
FROM_TO(CMP64ri32, CMP64ri8)
}
MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);
if (LastOp.isExpr()) {
const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(LastOp.getExpr());
if (!SRE || SRE->getKind() != MCSymbolRefExpr::VK_X86_ABS8)
return false;
} else if (LastOp.isImm()) {
if (!isInt<8>(LastOp.getImm()))
return false;
}
MI.setOpcode(NewOpc);
return true;
}

bool X86::optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI) {
// We may optimize twice here.
bool ShortImm = optimizeToShortImmediateForm(MI);
bool FixedReg = optimizeToFixedRegisterForm(MI);
return ShortImm || FixedReg;
}
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h
Expand Up @@ -22,7 +22,7 @@ bool optimizeVPCMPWithImmediateOneOrSix(MCInst &MI);
bool optimizeMOVSX(MCInst &MI);
bool optimizeINCDEC(MCInst &MI, bool In64BitMode);
bool optimizeMOV(MCInst &MI, bool In64BitMode);
bool optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI);
bool optimizeToFixedRegisterForm(MCInst &MI);
} // namespace X86
} // namespace llvm
#endif
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86CallFrameOptimization.cpp
Expand Up @@ -285,15 +285,15 @@ X86CallFrameOptimization::classifyInstruction(
// The instructions we actually care about are movs onto the stack or special
// cases of constant-stores to stack
switch (MI->getOpcode()) {
case X86::AND16mi:
case X86::AND32mi:
case X86::AND64mi32: {
case X86::AND16mi8:
case X86::AND32mi8:
case X86::AND64mi8: {
const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
return ImmOp.getImm() == 0 ? Convert : Exit;
}
case X86::OR16mi:
case X86::OR32mi:
case X86::OR64mi32: {
case X86::OR16mi8:
case X86::OR32mi8:
case X86::OR64mi8: {
const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
return ImmOp.getImm() == -1 ? Convert : Exit;
}
Expand Down Expand Up @@ -512,12 +512,12 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
switch (Store->getOpcode()) {
default:
llvm_unreachable("Unexpected Opcode!");
case X86::AND16mi:
case X86::AND32mi:
case X86::AND64mi32:
case X86::OR16mi:
case X86::OR32mi:
case X86::OR64mi32:
case X86::AND16mi8:
case X86::AND32mi8:
case X86::AND64mi8:
case X86::OR16mi8:
case X86::OR32mi8:
case X86::OR64mi8:
case X86::MOV32mi:
case X86::MOV64mi32:
PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/X86/X86DynAllocaExpander.cpp
Expand Up @@ -189,10 +189,10 @@ void X86DynAllocaExpander::computeLowerings(MachineFunction &MF,
}
}

static unsigned getSubOpcode(bool Is64Bit) {
static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) {
if (Is64Bit)
return X86::SUB64ri32;
return X86::SUB32ri;
return isInt<8>(Amount) ? X86::SUB64ri8 : X86::SUB64ri32;
return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri;
}

void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
Expand Down Expand Up @@ -242,7 +242,8 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
.addReg(RegA, RegState::Undef);
} else {
// Sub.
BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64BitAlloca)), StackPtr)
BuildMI(*MBB, I, DL,
TII->get(getSubOpcode(Is64BitAlloca, Amount)), StackPtr)
.addReg(StackPtr)
.addImm(Amount);
}
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/X86/X86FastISel.cpp
Expand Up @@ -1376,20 +1376,29 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
/// If we have a comparison with RHS as the RHS of the comparison, return an
/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
int64_t Val = RHSC->getSExtValue();
switch (VT.getSimpleVT().SimpleTy) {
// Otherwise, we can't fold the immediate into this comparison.
default:
return 0;
case MVT::i8:
return X86::CMP8ri;
case MVT::i16:
if (isInt<8>(Val))
return X86::CMP16ri8;
return X86::CMP16ri;
case MVT::i32:
if (isInt<8>(Val))
return X86::CMP32ri8;
return X86::CMP32ri;
case MVT::i64:
if (isInt<8>(Val))
return X86::CMP64ri8;
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
// field.
return isInt<32>(RHSC->getSExtValue()) ? X86::CMP64ri32 : 0;
if (isInt<32>(Val))
return X86::CMP64ri32;
return 0;
}
}

Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/X86/X86FixupLEAs.cpp
Expand Up @@ -186,9 +186,13 @@ FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
// Only convert instructions that we've verified are safe.
return nullptr;
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
case X86::ADD64ri8_DB:
case X86::ADD32ri:
case X86::ADD32ri8:
case X86::ADD32ri_DB:
case X86::ADD32ri8_DB:
if (!MI.getOperand(2).isImm()) {
// convertToThreeAddress will call getImm()
// which requires isImm() to be true
Expand Down Expand Up @@ -370,14 +374,15 @@ static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {

static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
const MachineOperand &Offset) {
bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
switch (LEAOpcode) {
default:
llvm_unreachable("Unexpected LEA instruction");
case X86::LEA32r:
case X86::LEA64_32r:
return X86::ADD32ri;
return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
case X86::LEA64r:
return X86::ADD64ri32;
return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
}
}

Expand Down
57 changes: 41 additions & 16 deletions llvm/lib/Target/X86/X86FrameLowering.cpp
Expand Up @@ -105,12 +105,28 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
(isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
}

static unsigned getSUBriOpcode(bool IsLP64) {
return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
if (IsLP64) {
if (isInt<8>(Imm))
return X86::SUB64ri8;
return X86::SUB64ri32;
} else {
if (isInt<8>(Imm))
return X86::SUB32ri8;
return X86::SUB32ri;
}
}

static unsigned getADDriOpcode(bool IsLP64) {
return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
if (IsLP64) {
if (isInt<8>(Imm))
return X86::ADD64ri8;
return X86::ADD64ri32;
} else {
if (isInt<8>(Imm))
return X86::ADD32ri8;
return X86::ADD32ri;
}
}

static unsigned getSUBrrOpcode(bool IsLP64) {
Expand All @@ -122,7 +138,14 @@ static unsigned getADDrrOpcode(bool IsLP64) {
}

static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
if (IsLP64) {
if (isInt<8>(Imm))
return X86::AND64ri8;
return X86::AND64ri32;
}
if (isInt<8>(Imm))
return X86::AND32ri8;
return X86::AND32ri;
}

static unsigned getLEArOpcode(bool IsLP64) {
Expand Down Expand Up @@ -340,8 +363,8 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
} else {
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
: getADDriOpcode(Uses64BitFramePtr);
const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
: getADDriOpcode(Uses64BitFramePtr, AbsOffset);
MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(AbsOffset);
Expand Down Expand Up @@ -377,8 +400,9 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
unsigned Opc = PI->getOpcode();
int Offset = 0;

if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
PI->getOperand(0).getReg() == StackPtr) {
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
PI->getOperand(0).getReg() == StackPtr){
assert(PI->getOperand(1).getReg() == StackPtr);
Offset = PI->getOperand(2).getImm();
} else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
Expand All @@ -389,7 +413,8 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
PI->getOperand(5).getReg() == X86::NoRegister) {
// For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
Offset = PI->getOperand(4).getImm();
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
PI->getOperand(0).getReg() == StackPtr) {
assert(PI->getOperand(1).getReg() == StackPtr);
Offset = -PI->getOperand(2).getImm();
Expand Down Expand Up @@ -808,7 +833,7 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// save loop bound
{
const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
.addReg(FinalStackProbed)
.addImm(BoundOffset)
Expand Down Expand Up @@ -1311,7 +1336,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,

{
const unsigned SUBOpc =
getSUBriOpcode(Uses64BitFramePtr);
getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
Expand Down Expand Up @@ -1342,7 +1367,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
.setMIFlag(MachineInstr::FrameSetup);

const unsigned SUBOpc =
getSUBriOpcode(Uses64BitFramePtr);
getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
Expand Down Expand Up @@ -1775,7 +1800,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.addImm(8)
.addUse(X86::NoRegister)
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
.addUse(X86::RSP)
.addImm(8)
.setMIFlag(MachineInstr::FrameSetup);
Expand Down Expand Up @@ -2394,7 +2419,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
(Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
(Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
(Opc != X86::ADD64ri32 || !PI->getFlag(MachineInstr::FrameDestroy)))
(Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
break;
FirstCSPop = PI;
}
Expand Down Expand Up @@ -3768,7 +3793,7 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(

if (UsedReg == FramePtr) {
// ADD $offset, %ebp
unsigned ADDri = getADDriOpcode(false);
unsigned ADDri = getADDriOpcode(false, EndOffset);
BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
.addReg(FramePtr)
.addImm(EndOffset)
Expand Down

0 comments on commit cb16b33

Please sign in to comment.