Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64][GlobalISel] Add a simple cross-regclass copy optimization p…
…ost-selection. This does some trivial cross-regclass folding, where we can either do some extra constraining to eliminate the copy or modify uses to use a smaller regclass. There are minor code size improvements on average. Program size.__text before after diff tramp3d-v4/tramp3d-v4 366000.00 366012.00 0.0% mafft/pairlocalalign 248196.00 248188.00 -0.0% 7zip/7zip-benchmark 568612.00 568592.00 -0.0% kimwitu++/kc 434704.00 434676.00 -0.0% Bullet/bullet 456128.00 456096.00 -0.0% sqlite3/sqlite3 284136.00 284100.00 -0.0% ClamAV/clamscan 381492.00 381396.00 -0.0% SPASS/SPASS 412052.00 411944.00 -0.0% lencod/lencod 428060.00 427912.00 -0.0% consumer-typeset/consumer-typeset 413148.00 411116.00 -0.5% Geomean difference -0.1% Differential Revision: https://reviews.llvm.org/D136793
- Loading branch information
Showing
2 changed files
with
181 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
116 changes: 116 additions & 0 deletions
116
llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s | ||
--- | | ||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" | ||
|
||
@x = external hidden local_unnamed_addr global i32*, align 8 | ||
define void @copy_from_larger_rc_def() { ret void } | ||
define void @copy_from_larger_rc_def_multi_use() { ret void } | ||
define void @copy_from_smaller_rc_def() { ret void } | ||
|
||
... | ||
--- | ||
name: copy_from_larger_rc_def | ||
alignment: 4 | ||
legalized: true | ||
regBankSelected: true | ||
selected: true | ||
tracksRegLiveness: true | ||
liveins: | ||
- { reg: '$x0' } | ||
- { reg: '$w1' } | ||
- { reg: '$x2' } | ||
body: | | ||
bb.1: | ||
liveins: $w1, $x0, $x2 | ||
; Show that if we're doing a copy from a large rc to a single user with a smaller rc | ||
; then we just constrain the def instead. | ||
; CHECK-LABEL: name: copy_from_larger_rc_def | ||
; CHECK: liveins: $w1, $x0, $x2 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 | ||
; CHECK-NEXT: %large_rc_def:gpr64common = UBFMXri [[COPY]], 61, 60 | ||
; CHECK-NEXT: %add:gpr64sp = ADDXri %large_rc_def, 3, 0 | ||
; CHECK-NEXT: $x0 = COPY %add | ||
; CHECK-NEXT: RET_ReallyLR | ||
%0:gpr64 = COPY $x0 | ||
%large_rc_def:gpr64 = UBFMXri %0, 61, 60 | ||
%constrain_copy:gpr64common = COPY %large_rc_def | ||
; Even though ADDXri may not actually need to use gpr64common, just use it as an example. | ||
%add:gpr64sp = ADDXri %constrain_copy, 3, 0 | ||
$x0 = COPY %add | ||
RET_ReallyLR | ||
... | ||
--- | ||
name: copy_from_larger_rc_def_multi_use | ||
alignment: 4 | ||
legalized: true | ||
regBankSelected: true | ||
selected: true | ||
tracksRegLiveness: true | ||
liveins: | ||
- { reg: '$x0' } | ||
- { reg: '$w1' } | ||
- { reg: '$x2' } | ||
body: | | ||
bb.1: | ||
liveins: $w1, $x0, $x2 | ||
; Don't constrain def if the original def has multiple users. | ||
; CHECK-LABEL: name: copy_from_larger_rc_def_multi_use | ||
; CHECK: liveins: $w1, $x0, $x2 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 | ||
; CHECK-NEXT: %large_rc_def:gpr64 = UBFMXri [[COPY]], 61, 60 | ||
; CHECK-NEXT: %constrain_copy:gpr64common = COPY %large_rc_def | ||
; CHECK-NEXT: %add:gpr64sp = ADDXri %constrain_copy, 3, 0 | ||
; CHECK-NEXT: %add2:gpr64sp = ADDXri %constrain_copy, 3, 0 | ||
; CHECK-NEXT: $x0 = COPY %add | ||
; CHECK-NEXT: $x1 = COPY %large_rc_def | ||
; CHECK-NEXT: RET_ReallyLR | ||
%0:gpr64 = COPY $x0 | ||
%large_rc_def:gpr64 = UBFMXri %0, 61, 60 | ||
%constrain_copy:gpr64common = COPY %large_rc_def | ||
%add:gpr64sp = ADDXri %constrain_copy, 3, 0 | ||
%add2:gpr64sp = ADDXri %constrain_copy, 3, 0 | ||
$x0 = COPY %add | ||
$x1 = COPY %large_rc_def | ||
RET_ReallyLR | ||
... | ||
--- | ||
name: copy_from_smaller_rc_def | ||
alignment: 4 | ||
legalized: true | ||
regBankSelected: true | ||
selected: true | ||
tracksRegLiveness: true | ||
liveins: | ||
- { reg: '$x0' } | ||
- { reg: '$w1' } | ||
- { reg: '$x2' } | ||
body: | | ||
bb.1: | ||
liveins: $w1, $x0, $x2 | ||
; Show that if we're doing a copy from a small rc to a single user with a larger rc | ||
; then we just use the smaller def instead of doing a copy. | ||
; CHECK-LABEL: name: copy_from_smaller_rc_def | ||
; CHECK: liveins: $w1, $x0, $x2 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 | ||
; CHECK-NEXT: %add:gpr64common = ADDXri [[COPY1]], 3, 0 | ||
; CHECK-NEXT: STRXui [[COPY1]], %add, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0)) | ||
; CHECK-NEXT: RET_ReallyLR | ||
%0:gpr64common = COPY $x0 | ||
%1:gpr64common = COPY $x1 | ||
%add:gpr64common = ADDXri %1, 3, 0 | ||
%copy:gpr64sp = COPY %add | ||
STRXui %1, %copy, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0)) | ||
RET_ReallyLR | ||
... |