Skip to content

Commit

Permalink
[AArch64][GlobalISel] Add a simple cross-regclass copy optimization p…
Browse files Browse the repository at this point in the history
…ost-selection.

This does some trivial cross-regclass folding, where we can either do some extra
constraining to eliminate the copy or modify uses to use a smaller regclass.

There are minor code size improvements on average.

Program                                       size.__text
                                              before         after           diff
tramp3d-v4/tramp3d-v4                         366000.00      366012.00       0.0%
mafft/pairlocalalign                          248196.00      248188.00      -0.0%
7zip/7zip-benchmark                           568612.00      568592.00      -0.0%
kimwitu++/kc                                  434704.00      434676.00      -0.0%
Bullet/bullet                                 456128.00      456096.00      -0.0%
sqlite3/sqlite3                               284136.00      284100.00      -0.0%
ClamAV/clamscan                               381492.00      381396.00      -0.0%
SPASS/SPASS                                   412052.00      411944.00      -0.0%
lencod/lencod                                 428060.00      427912.00      -0.0%
consumer-typeset/consumer-typeset             413148.00      411116.00      -0.5%
                           Geomean difference                               -0.1%

Differential Revision: https://reviews.llvm.org/D136793
  • Loading branch information
aemerson committed Nov 1, 2022
1 parent 3b82b4f commit 974cf71
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 1 deletion.
66 changes: 65 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
Expand Up @@ -14,12 +14,15 @@
#include "AArch64.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"

#define DEBUG_TYPE "aarch64-post-select-optimize"

Expand All @@ -42,6 +45,9 @@ class AArch64PostSelectOptimize : public MachineFunctionPass {

private:
bool optimizeNZCVDefs(MachineBasicBlock &MBB);
bool doPeepholeOpts(MachineBasicBlock &MBB);
/// Look for cross regclass copies that can be trivially eliminated.
bool foldSimpleCrossClassCopies(MachineInstr &MI);
};
} // end anonymous namespace

Expand Down Expand Up @@ -74,6 +80,62 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
}
}

bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
bool Changed = false;
for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
Changed |= foldSimpleCrossClassCopies(MI);
}
return Changed;
}

bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
auto *MF = MI.getMF();
auto &MRI = MF->getRegInfo();

if (!MI.isCopy())
return false;

if (MI.getOperand(1).getSubReg())
return false; // Don't deal with subreg copies

Register Src = MI.getOperand(1).getReg();
Register Dst = MI.getOperand(0).getReg();

if (Src.isPhysical() || Dst.isPhysical())
return false;

const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);

if (SrcRC == DstRC)
return false;


if (SrcRC->hasSubClass(DstRC)) {
// This is the case where the source class is a superclass of the dest, so
// if the copy is the only user of the source, we can just constrain the
// source reg to the dest class.

if (!MRI.hasOneNonDBGUse(Src))
return false; // Only constrain single uses of the source.

// Constrain to dst reg class as long as it's not a weird class that only
// has a few registers.
if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
return false;
} else if (DstRC->hasSubClass(SrcRC)) {
// This is the inverse case, where the destination class is a superclass of
// the source. Here, if the copy is the only user, we can just constrain
// the user of the copy to use the smaller class of the source.
} else {
return false;
}

MRI.replaceRegWith(Dst, Src);
MI.eraseFromParent();
return true;
}

bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
// Consider the following code:
// FCMPSrr %0, %1, implicit-def $nzcv
Expand Down Expand Up @@ -178,8 +240,10 @@ bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
"Expected a selected MF");

bool Changed = false;
for (auto &BB : MF)
for (auto &BB : MF) {
Changed |= optimizeNZCVDefs(BB);
Changed |= doPeepholeOpts(BB);
}
return Changed;
}

Expand Down
116 changes: 116 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir
@@ -0,0 +1,116 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

@x = external hidden local_unnamed_addr global i32*, align 8
define void @copy_from_larger_rc_def() { ret void }
define void @copy_from_larger_rc_def_multi_use() { ret void }
define void @copy_from_smaller_rc_def() { ret void }

...
---
name: copy_from_larger_rc_def
alignment: 4
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
- { reg: '$x2' }
body: |
bb.1:
liveins: $w1, $x0, $x2
; Show that if we're doing a copy from a large rc to a single user with a smaller rc
; then we just constrain the def instead.
; CHECK-LABEL: name: copy_from_larger_rc_def
; CHECK: liveins: $w1, $x0, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: %large_rc_def:gpr64common = UBFMXri [[COPY]], 61, 60
; CHECK-NEXT: %add:gpr64sp = ADDXri %large_rc_def, 3, 0
; CHECK-NEXT: $x0 = COPY %add
; CHECK-NEXT: RET_ReallyLR
%0:gpr64 = COPY $x0
%large_rc_def:gpr64 = UBFMXri %0, 61, 60
%constrain_copy:gpr64common = COPY %large_rc_def
; Even though ADDXri may not actually need to use gpr64common, just use it as an example.
%add:gpr64sp = ADDXri %constrain_copy, 3, 0
$x0 = COPY %add
RET_ReallyLR
...
---
name: copy_from_larger_rc_def_multi_use
alignment: 4
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
- { reg: '$x2' }
body: |
bb.1:
liveins: $w1, $x0, $x2
; Don't constrain def if the original def has multiple users.
; CHECK-LABEL: name: copy_from_larger_rc_def_multi_use
; CHECK: liveins: $w1, $x0, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: %large_rc_def:gpr64 = UBFMXri [[COPY]], 61, 60
; CHECK-NEXT: %constrain_copy:gpr64common = COPY %large_rc_def
; CHECK-NEXT: %add:gpr64sp = ADDXri %constrain_copy, 3, 0
; CHECK-NEXT: %add2:gpr64sp = ADDXri %constrain_copy, 3, 0
; CHECK-NEXT: $x0 = COPY %add
; CHECK-NEXT: $x1 = COPY %large_rc_def
; CHECK-NEXT: RET_ReallyLR
%0:gpr64 = COPY $x0
%large_rc_def:gpr64 = UBFMXri %0, 61, 60
%constrain_copy:gpr64common = COPY %large_rc_def
%add:gpr64sp = ADDXri %constrain_copy, 3, 0
%add2:gpr64sp = ADDXri %constrain_copy, 3, 0
$x0 = COPY %add
$x1 = COPY %large_rc_def
RET_ReallyLR
...
---
name: copy_from_smaller_rc_def
alignment: 4
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
- { reg: '$x2' }
body: |
bb.1:
liveins: $w1, $x0, $x2
; Show that if we're doing a copy from a small rc to a single user with a larger rc
; then we just use the smaller def instead of doing a copy.
; CHECK-LABEL: name: copy_from_smaller_rc_def
; CHECK: liveins: $w1, $x0, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
; CHECK-NEXT: %add:gpr64common = ADDXri [[COPY1]], 3, 0
; CHECK-NEXT: STRXui [[COPY1]], %add, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0))
; CHECK-NEXT: RET_ReallyLR
%0:gpr64common = COPY $x0
%1:gpr64common = COPY $x1
%add:gpr64common = ADDXri %1, 3, 0
%copy:gpr64sp = COPY %add
STRXui %1, %copy, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0))
RET_ReallyLR
...

0 comments on commit 974cf71

Please sign in to comment.