Skip to content

Commit

Permalink
[GlobalISel][AArch64] Make FP constraint checks consider possible use…
Browse files Browse the repository at this point in the history
…/def banks

In a few places in getInstrMapping, we check if use/def instructions for the
instruction we're mapping have floating point constraints.

We can improve this check and reduce the number of copies in GISel-compiled code
if we make a couple observations:

- For a def instruction, it only matters if the def instruction must always
  output a value stored on a FPR

- For a use instruction, it only matters if the use instruction must always
  only take in values stored in FPRs

This adds two new functions:

- onlyUsesFP
- onlyDefinesFP

Then we can use those when we're checking the uses/defs instead.

Without this patch, the load, unmerge, store, and select in the added test
would have unnecessary copies.

Differential Revision: https://reviews.llvm.org/D62426

llvm-svn: 361679
  • Loading branch information
Jessica Paquette committed May 24, 2019
1 parent 59f959f commit 97d668d
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 7 deletions.
42 changes: 35 additions & 7 deletions llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,35 @@ bool AArch64RegisterBankInfo::hasFPConstraints(
&AArch64::FPRRegBank;
}

bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FCMP:
return true;
default:
break;
}
return hasFPConstraints(MI, MRI, TRI);
}

bool AArch64RegisterBankInfo::onlyDefinesFP(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_INSERT_VECTOR_ELT:
return true;
default:
break;
}
return hasFPConstraints(MI, MRI, TRI);
}

const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
Expand Down Expand Up @@ -642,7 +671,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// assume this was a floating point load in the IR.
// If it was not, we would have had a bitcast before
// reaching that instruction.
if (hasFPConstraints(UseMI, MRI, TRI)) {
if (onlyUsesFP(UseMI, MRI, TRI)) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
Expand All @@ -655,7 +684,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
if (!VReg)
break;
MachineInstr *DefMI = MRI.getVRegDef(VReg);
if (hasFPConstraints(*DefMI, MRI, TRI))
if (onlyDefinesFP(*DefMI, MRI, TRI))
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
Expand Down Expand Up @@ -687,7 +716,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// fpr = G_FOO %z ...
if (any_of(
MRI.use_instructions(MI.getOperand(0).getReg()),
[&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); }))
[&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
++NumFP;

// Check if the defs of the source values always produce floating point
Expand All @@ -707,7 +736,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned VReg = MI.getOperand(Idx).getReg();
MachineInstr *DefMI = MRI.getVRegDef(VReg);
if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
hasFPConstraints(*DefMI, MRI, TRI))
onlyDefinesFP(*DefMI, MRI, TRI))
++NumFP;
}

Expand All @@ -729,9 +758,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// UNMERGE into scalars from a vector should always use FPR.
// Likewise if any of the uses are FP instructions.
if (SrcTy.isVector() ||
any_of(
MRI.use_instructions(MI.getOperand(0).getReg()),
[&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); })) {
any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
[&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
// Set the register bank of every operand to FPR.
for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
Idx < NumOperands; ++Idx)
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,14 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;

/// Returns true if the source registers of \p MI must all be FPRs.
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;

/// Returns true if the destination register of \p MI must be a FPR.
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;

public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);

Expand Down
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect %s -o - | FileCheck %s

# Check that we correctly assign register banks based off of instructions which
# only use or only define FPRs.
#
# For example, G_SITOFP takes in a GPR, but only ever produces values on FPRs.
# Some instructions can have inputs/outputs on either FPRs or GPRs. If one of
# those instructions takes in the result of a G_SITOFP as a source, we should
# put that source on a FPR.
#
# Similarly, G_FPTOSI can only take in a value on a FPR. So, if the result of
# an instruction is consumed by a G_FPTOSI, we should put the instruction on
# FPRs.

---
name: load_only_uses_fp
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: load_only_uses_fp
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4)
; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(uno), [[C]](s32), [[LOAD]]
; CHECK: $w0 = COPY [[FCMP]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%1:_(s32) = G_FCONSTANT float 2.0
%2:_(s32) = G_LOAD %0 :: (load 4)
%3:_(s32) = G_FCMP floatpred(uno), %1, %2
$w0 = COPY %3(s32)
RET_ReallyLR implicit $w0
...
---
name: unmerge_only_uses_fp

legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: unmerge_only_uses_fp
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY [[COPY]](s64)
; CHECK: [[UV:%[0-9]+]]:fpr(s32), [[UV1:%[0-9]+]]:fpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(uno), [[UV]](s32), [[UV1]]
; CHECK: $w0 = COPY [[FCMP]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(s64) = COPY $x0
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(s64)
%3:_(s32) = G_FCMP floatpred(uno), %1, %2
$w0 = COPY %3(s32)
RET_ReallyLR implicit $w0
...
---
name: store_defined_by_fp
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0, $w1
; CHECK-LABEL: name: store_defined_by_fp
; CHECK: liveins: $x0, $w1
; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1
; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
; CHECK: G_STORE [[SITOFP]](s32), [[COPY]](p0) :: (store 4)
%0:_(p0) = COPY $x0
%1:_(s32) = COPY $w1
%2:_(s32) = G_SITOFP %1
G_STORE %2, %0 :: (store 4)
...
---
name: select_defined_by_fp_using_fp
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
; CHECK-LABEL: name: select_defined_by_fp_using_fp
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC %2(s32)
; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:gpr(s32) = COPY $w2
; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
; CHECK: [[COPY4:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[SITOFP]]
; CHECK: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
%0:_(s32) = COPY $w0
%1:_(s1) = G_TRUNC %3(s32)
%2:_(s32) = COPY $w1
%3:_(s32) = COPY $w2
%4:_(s32) = G_SITOFP %2
%6:_(s32) = G_SELECT %1(s1), %3, %4
%8:_(s32) = G_FPTOSI %6

0 comments on commit 97d668d

Please sign in to comment.