Skip to content

Commit

Permalink
[SystemZ] Improve foldMemoryOperandImpl().
Browse files Browse the repository at this point in the history
Swap the compare operands if LHS is spilled while updating the CCMask:s of
the CC users. This is relatively straight forward since the live-in lists for
the CC register can be assumed to be correct during register allocation
(thanks to 659efa2).

Also fold a spilled operand of an LOCR/SELR into an LOC(G).

Review: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D67437
  • Loading branch information
JonPsson committed Mar 10, 2020
1 parent 14219aa commit 62ff996
Show file tree
Hide file tree
Showing 7 changed files with 585 additions and 45 deletions.
23 changes: 3 additions & 20 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Expand Up @@ -2190,15 +2190,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) {
return false;
}

// Return a version of comparison CC mask CCMask in which the LT and GT
// actions are swapped.
static unsigned reverseCCMask(unsigned CCMask) {
return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
(CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
(CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
(CCMask & SystemZ::CCMASK_CMP_UO));
}

// Check whether C tests for equality between X and Y and whether X - Y
// or Y - X is also computed. In that case it's better to compare the
// result of the subtraction against zero.
Expand Down Expand Up @@ -2234,7 +2225,7 @@ static void adjustForFNeg(Comparison &C) {
SDNode *N = *I;
if (N->getOpcode() == ISD::FNEG) {
C.Op0 = SDValue(N, 0);
C.CCMask = reverseCCMask(C.CCMask);
C.CCMask = SystemZ::reverseCCMask(C.CCMask);
return;
}
}
Expand Down Expand Up @@ -2601,7 +2592,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,

if (shouldSwapCmpOperands(C)) {
std::swap(C.Op0, C.Op1);
C.CCMask = reverseCCMask(C.CCMask);
C.CCMask = SystemZ::reverseCCMask(C.CCMask);
}

adjustForTestUnderMask(DAG, DL, C);
Expand Down Expand Up @@ -6277,15 +6268,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
return false;

// Compute the effective CC mask for the new branch or select.
switch (CCMask) {
case SystemZ::CCMASK_CMP_EQ: break;
case SystemZ::CCMASK_CMP_NE: break;
case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
default: return false;
}
CCMask = SystemZ::reverseCCMask(CCMask);

// Return the updated CCReg link.
CCReg = IPM->getOperand(0);
Expand Down
51 changes: 49 additions & 2 deletions llvm/lib/Target/SystemZ/SystemZInstrFormats.td
Expand Up @@ -2807,6 +2807,10 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
let OpKey = mnemonic#"r"#cls;
let OpType = "mem";
let MemKey = mnemonic#cls;
let MemType = "target";
}

// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
Expand Down Expand Up @@ -3211,6 +3215,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = !subst("loc", "sel", mnemonic);
let NumOpsValue = "2";
let OpKey = mnemonic#cls1;
let OpType = "reg";
}

// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
Expand Down Expand Up @@ -3252,6 +3258,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = mnemonic;
let NumOpsValue = "3";
let OpKey = mnemonic#cls1;
let OpType = "reg";
}

// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
Expand Down Expand Up @@ -4775,6 +4783,20 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
let hasNoSchedulingInfo = 1;
}

// Same as MemFoldPseudo but for Load On Condition with CC operands.
class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes,
AddressingMode mode>
: Pseudo<(outs cls:$R1),
(ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> {
let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls;
let OpType = "mem";
let MemKey = mnemonic#cls;
let MemType = "pseudo";
let mayLoad = 1;
let AccessBytes = bytes;
let hasNoSchedulingInfo = 1;
}

// Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm>
Expand Down Expand Up @@ -4813,6 +4835,8 @@ class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = !subst("loc", "sel", mnemonic);
let NumOpsValue = "2";
let OpKey = mnemonic#cls1;
let OpType = "reg";
}

// Like CondBinaryRRFa, but expanded after RA depending on the choice of
Expand All @@ -4826,6 +4850,8 @@ class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = mnemonic;
let NumOpsValue = "3";
let OpKey = mnemonic#cls1;
let OpType = "reg";
}

// Like CondBinaryRIE, but expanded after RA depending on the choice of
Expand All @@ -4842,8 +4868,9 @@ class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>

// Like CondUnaryRSY, but expanded after RA depending on the choice of
// register.
class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr20only>
class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
: Pseudo<(outs cls:$R1),
(ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
[(set cls:$R1,
Expand All @@ -4854,6 +4881,10 @@ class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
let OpKey = mnemonic#"r"#cls;
let OpType = "mem";
let MemKey = mnemonic#cls;
let MemType = "target";
}

// Like CondStoreRSY, but expanded after RA depending on the choice of
Expand Down Expand Up @@ -5066,6 +5097,22 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
}

multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode,
SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only> {
defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>;
def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
}

multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only> {
def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>;
def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
}

// Define an instruction that operates on two fixed-length blocks of memory,
// and associated pseudo instructions for operating on blocks of any size.
// The Sequence form uses a straight-line sequence of instructions and
Expand Down
115 changes: 96 additions & 19 deletions llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
Expand Up @@ -1150,14 +1150,31 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// commutable, try to change <INSN>R into <INSN>.
unsigned NumOps = MI.getNumExplicitOperands();
int MemOpcode = SystemZ::getMemOpcode(Opcode);
if (MemOpcode == -1)
return nullptr;

// Try to swap compare operands if possible.
bool NeedsCommute = false;
if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR ||
MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR) &&
OpNum == 0 && prepareCompareSwapOperands(MI))
NeedsCommute = true;

bool CCOperands = false;
if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR ||
MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) {
assert(MI.getNumOperands() == 6 && NumOps == 5 &&
"LOCR/SELR instruction operands corrupt?");
NumOps -= 2;
CCOperands = true;
}

// See if this is a 3-address instruction that is convertible to 2-address
// and suitable for folding below. Only try this with virtual registers
// and a provided VRM (during regalloc).
bool NeedsCommute = false;
if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
if (SystemZ::getTwoOperandOpcode(Opcode) != -1) {
if (VRM == nullptr)
MemOpcode = -1;
return nullptr;
else {
assert(NumOps == 3 && "Expected two source registers.");
Register DstReg = MI.getOperand(0).getReg();
Expand All @@ -1172,32 +1189,42 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
DstPhys == VRM->getPhys(SrcReg))
NeedsCommute = (OpNum == 1);
else
MemOpcode = -1;
return nullptr;
}
}

if (MemOpcode >= 0) {
if ((OpNum == NumOps - 1) || NeedsCommute) {
const MCInstrDesc &MemDesc = get(MemOpcode);
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
assert(AccessBytes != 0 && "Size of access should be known");
assert(AccessBytes <= Size && "Access outside the frame index");
uint64_t Offset = Size - AccessBytes;
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode));
if ((OpNum == NumOps - 1) || NeedsCommute) {
const MCInstrDesc &MemDesc = get(MemOpcode);
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
assert(AccessBytes != 0 && "Size of access should be known");
assert(AccessBytes <= Size && "Access outside the frame index");
uint64_t Offset = Size - AccessBytes;
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode));
if (MI.isCompare()) {
assert(NumOps == 2 && "Expected 2 register operands for a compare.");
MIB.add(MI.getOperand(NeedsCommute ? 1 : 0));
}
else {
MIB.add(MI.getOperand(0));
if (NeedsCommute)
MIB.add(MI.getOperand(2));
else
for (unsigned I = 1; I < OpNum; ++I)
MIB.add(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
transferDeadCC(&MI, MIB);
transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
return MIB;
}
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
if (CCOperands) {
unsigned CCValid = MI.getOperand(NumOps).getImm();
unsigned CCMask = MI.getOperand(NumOps + 1).getImm();
MIB.addImm(CCValid);
MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask);
}
transferDeadCC(&MI, MIB);
transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
return MIB;
}

return nullptr;
Expand Down Expand Up @@ -1706,6 +1733,56 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
return 0;
}

bool SystemZInstrInfo::
prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const {
assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() &&
MBBI->getOperand(1).isReg() && !MBBI->mayLoad() &&
"Not a compare reg/reg.");

MachineBasicBlock *MBB = MBBI->getParent();
bool CCLive = true;
SmallVector<MachineInstr *, 4> CCUsers;
for (MachineBasicBlock::iterator Itr = std::next(MBBI);
Itr != MBB->end(); ++Itr) {
if (Itr->readsRegister(SystemZ::CC)) {
unsigned Flags = Itr->getDesc().TSFlags;
if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast))
CCUsers.push_back(&*Itr);
else
return false;
}
if (Itr->definesRegister(SystemZ::CC)) {
CCLive = false;
break;
}
}
if (CCLive) {
LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo());
LiveRegs.addLiveOuts(*MBB);
if (LiveRegs.contains(SystemZ::CC))
return false;
}

// Update all CC users.
for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) {
unsigned Flags = CCUsers[Idx]->getDesc().TSFlags;
unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ?
0 : CCUsers[Idx]->getNumExplicitOperands() - 2);
MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1);
unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm());
CCMaskMO.setImm(NewCCMask);
}

return true;
}

unsigned SystemZ::reverseCCMask(unsigned CCMask) {
return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
(CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
(CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
(CCMask & SystemZ::CCMASK_CMP_UO));
}

unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
if (!STI.hasLoadAndTrap())
return 0;
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZInstrInfo.h
Expand Up @@ -155,6 +155,10 @@ enum FusedCompareType {
namespace SystemZ {
int getTwoOperandOpcode(uint16_t Opcode);
int getTargetMemOpcode(uint16_t Opcode);

// Return a version of comparison CC mask CCMask in which the LT and GT
// actions are swapped.
unsigned reverseCCMask(unsigned CCMask);
}

class SystemZInstrInfo : public SystemZGenInstrInfo {
Expand Down Expand Up @@ -314,6 +318,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
SystemZII::FusedCompareType Type,
const MachineInstr *MI = nullptr) const;

// Try to find all CC users of the compare instruction (MBBI) and update
// all of them to maintain equivalent behavior after swapping the compare
// operands. Return false if not all users can be conclusively found and
// handled. The compare instruction is *not* changed.
bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const;

// If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
// operation exists, returh the opcode for the latter, otherwise return 0.
unsigned getLoadAndTrap(unsigned Opcode) const;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/SystemZ/SystemZInstrInfo.td
Expand Up @@ -492,7 +492,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to SELR or SELFHR or a branch-and-move sequence,
// depending on the choice of registers.
def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>;
def SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>;
defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
Expand Down Expand Up @@ -525,13 +525,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to LOCR or LOCFHR or a branch-and-move sequence,
// depending on the choice of registers.
def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>;
def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>;
defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
}

// Load on condition. Matched via DAG pattern.
// Expands to LOC or LOCFH, depending on the choice of register.
def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>;
defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>;
defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;

// Store on condition. Expanded from CondStore* pseudos.
Expand Down Expand Up @@ -564,7 +564,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {

// Load on condition. Matched via DAG pattern.
defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>;
defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>;
defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>;

// Store on condition. Expanded from CondStore* pseudos.
defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>;
Expand Down

0 comments on commit 62ff996

Please sign in to comment.