Skip to content

Commit

Permalink
[AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs jus…
Browse files Browse the repository at this point in the history
…t before selection.

Since all types <32b on gpr end up being assigned gpr32 regclasses, we can end
up with PHIs here which try to select between a gpr32 and an fpr16. Ideally RBS
shouldn't be selecting heterogenous regbanks for operands if possible, but we
still need to be able to deal with it here.

To fix this, if we have a gpr-bank operand < 32b in size and at least one other
operand is on the fpr bank, then we add cross-bank copies to homogenize the
operand banks. For simplicity the bank that we choose to settle on is whatever
bank the def operand has. For example:

%endbb:
  %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
 =>
%bb2:
  ...
  %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
  ...
%endbb:
  %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2

Differential Revision: https://reviews.llvm.org/D75086

(cherry picked from commit 65f99b5)
  • Loading branch information
aemerson committed Apr 15, 2020
1 parent 2675c47 commit 05f5e2a
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 0 deletions.
93 changes: 93 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Expand Up @@ -63,6 +63,7 @@ class AArch64InstructionSelector : public InstructionSelector {
// cache it here for each run of the selector.
ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
processPHIs(MF);
}

private:
Expand All @@ -77,6 +78,9 @@ class AArch64InstructionSelector : public InstructionSelector {
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I) const;

// Do some preprocessing of G_PHIs before we begin selection.
void processPHIs(MachineFunction &MF);

bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;

/// Eliminate same-sized cross-bank copies into stores before selectImpl().
Expand Down Expand Up @@ -4755,6 +4759,95 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
}
}


// Perform fixups on the given PHI instruction's operands to force them all
// to be the same as the destination regbank.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
const AArch64RegisterBankInfo &RBI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
Register DstReg = MI.getOperand(0).getReg();
const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
assert(DstRB && "Expected PHI dst to have regbank assigned");
MachineIRBuilder MIB(MI);

// Go through each operand and ensure it has the same regbank.
for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
Register OpReg = MO.getReg();
const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
if (RB != DstRB) {
// Insert a cross-bank copy.
auto *OpDef = MRI.getVRegDef(OpReg);
const LLT &Ty = MRI.getType(OpReg);
MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
auto Copy = MIB.buildCopy(Ty, OpReg);
MRI.setRegBank(Copy.getReg(0), *DstRB);
MO.setReg(Copy.getReg(0));
}
}
}

void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
// We're looking for PHIs, build a list so we don't invalidate iterators.
MachineRegisterInfo &MRI = MF.getRegInfo();
SmallVector<MachineInstr *, 32> Phis;
for (auto &BB : MF) {
for (auto &MI : BB) {
if (MI.getOpcode() == TargetOpcode::G_PHI)
Phis.emplace_back(&MI);
}
}

for (auto *MI : Phis) {
// We need to do some work here if the operand types are < 16 bit and they
// are split across fpr/gpr banks. Since all types <32b on gpr
// end up being assigned gpr32 regclasses, we can end up with PHIs here
// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
// be selecting heterogenous regbanks for operands if possible, but we
// still need to be able to deal with it here.
//
// To fix this, if we have a gpr-bank operand < 32b in size and at least
// one other operand is on the fpr bank, then we add cross-bank copies
// to homogenize the operand banks. For simplicity the bank that we choose
// to settle on is whatever bank the def operand has. For example:
//
// %endbb:
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
// =>
// %bb2:
// ...
// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
// ...
// %endbb:
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
bool HasGPROp = false, HasFPROp = false;
for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
const auto &MO = MI->getOperand(OpIdx);
if (!MO.isReg())
continue;
const LLT &Ty = MRI.getType(MO.getReg());
if (!Ty.isValid() || !Ty.isScalar())
break;
if (Ty.getSizeInBits() >= 32)
break;
const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
// If for some reason we don't have a regbank yet. Don't try anything.
if (!RB)
break;

if (RB->getID() == AArch64::GPRRegBankID)
HasGPROp = true;
else
HasFPROp = true;
}
// We have heterogenous regbanks, need to fixup.
if (HasGPROp && HasFPROp)
fixupPHIOpBanks(*MI, MRI, RBI);
}
}

namespace llvm {
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
Expand Down
110 changes: 110 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
@@ -0,0 +1,110 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
---
name: test_loop_phi_fpr_to_gpr
alignment: 4
legalized: true
regBankSelected: true
selected: false
failedISel: false
tracksRegLiveness: true
liveins: []
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: test_loop_phi_fpr_to_gpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
; CHECK: bb.2:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
; CHECK: B %bb.2
bb.0:
successors: %bb.1(0x80000000)
%0:gpr(s1) = G_IMPLICIT_DEF
%4:gpr(p0) = G_IMPLICIT_DEF
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
bb.1:
successors: %bb.2(0x80000000)
%6:gpr(s32) = G_IMPLICIT_DEF
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
%1:gpr(s16) = G_TRUNC %7(s32)
bb.2:
successors: %bb.2(0x80000000)
%3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2
%5:fpr(s16) = G_FPTRUNC %8(s32)
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
G_BR %bb.2
...
---
name: test_loop_phi_gpr_to_fpr
alignment: 4
legalized: true
regBankSelected: true
selected: false
failedISel: false
tracksRegLiveness: true
liveins: []
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: test_loop_phi_gpr_to_fpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[CSELWr]]
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
; CHECK: bb.2:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
; CHECK: B %bb.2
bb.0:
successors: %bb.1(0x80000000)
%0:gpr(s1) = G_IMPLICIT_DEF
%4:gpr(p0) = G_IMPLICIT_DEF
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
bb.1:
successors: %bb.2(0x80000000)
%6:gpr(s32) = G_IMPLICIT_DEF
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
%1:gpr(s16) = G_TRUNC %7(s32)
bb.2:
successors: %bb.2(0x80000000)
%3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1
%5:fpr(s16) = G_FPTRUNC %8(s32)
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
G_BR %bb.2
...

0 comments on commit 05f5e2a

Please sign in to comment.