Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Add test showing reassociation potential.
Add a test case where some ops of a reassociate-able expression are in an earlier block. This can appear in practice, e.g. when computing the final reduction value after vectorization.
- Loading branch information
Showing
1 changed file
with
159 additions
and
0 deletions.
There are no files selected for viewing
159 changes: 159 additions & 0 deletions
159
llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-different-blocks.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
# RUN: llc -mtriple=arm64-appe-ios -o - -run-pass=machine-combiner %s | FileCheck %s | ||
|
||
--- | | ||
define float @reassoicate_different_block(ptr %a, i1 %c) { | ||
ret float undef | ||
} | ||
|
||
define float @no_reassociate_different_block(ptr %a, i1 %c) { | ||
ret float undef | ||
} | ||
|
||
declare void @use() | ||
|
||
|
||
... | ||
# FIXME: Should reassociate the serialized reduction in bb.1 to improve parallelism. | ||
--- | ||
name: reassoicate_different_block | ||
alignment: 4 | ||
tracksRegLiveness: true | ||
body: | | ||
; CHECK-LABEL: name: reassoicate_different_block | ||
; CHECK: bb.0: | ||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) | ||
; CHECK-NEXT: liveins: $x0, $w1 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 | ||
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) | ||
; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) | ||
; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) | ||
; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4) | ||
; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 | ||
; CHECK-NEXT: B %bb.1 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.1: | ||
; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr | ||
; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr | ||
; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr | ||
; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub | ||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] | ||
; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr | ||
; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] | ||
; CHECK-NEXT: RET_ReallyLR implicit $s0 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.2: | ||
; CHECK-NEXT: $q0 = COPY [[LDRQui]] | ||
; CHECK-NEXT: $q1 = COPY [[LDRQui2]] | ||
; CHECK-NEXT: $q2 = COPY [[LDRQui1]] | ||
; CHECK-NEXT: $q3 = COPY [[LDRQui3]] | ||
; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 | ||
bb.0: | ||
successors: %bb.1, %bb.2 | ||
liveins: $x0, $w1 | ||
%5:gpr32 = COPY $w1 | ||
%4:gpr64common = COPY $x0 | ||
%0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) | ||
%1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) | ||
%2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) | ||
%3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4) | ||
TBZW %5, 0, %bb.2 | ||
B %bb.1 | ||
bb.1: | ||
%6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr | ||
%7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr | ||
%8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr | ||
%9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr | ||
%10:gpr64all = COPY %9.dsub | ||
%12:fpr64 = COPY %10 | ||
%11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr | ||
$s0 = COPY %11 | ||
RET_ReallyLR implicit $s0 | ||
bb.2: | ||
$q0 = COPY %0 | ||
$q1 = COPY %2 | ||
$q2 = COPY %1 | ||
$q3 = COPY %3 | ||
TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 | ||
... | ||
# Reassociation of the reduction in bb.1 is not profitable, because LDRQui3 has a | ||
# much larger latency than the other loads. | ||
--- | ||
name: no_reassociate_different_block | ||
alignment: 4 | ||
tracksRegLiveness: true | ||
body: | | ||
; CHECK-LABEL: name: no_reassociate_different_block | ||
; CHECK: bb.0: | ||
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) | ||
; CHECK-NEXT: liveins: $x0, $w1 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 | ||
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) | ||
; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) | ||
; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) | ||
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY1]], 8 :: (load (s64)) | ||
; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64common = LDRXui killed [[LDRXui]], 0 :: (load (s64)) | ||
; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui killed [[LDRXui1]], 0 :: (load (s128), align 4) | ||
; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 | ||
; CHECK-NEXT: B %bb.1 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.1: | ||
; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr | ||
; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr | ||
; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr | ||
; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub | ||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] | ||
; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr | ||
; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] | ||
; CHECK-NEXT: RET_ReallyLR implicit $s0 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.2: | ||
; CHECK-NEXT: $q0 = COPY [[LDRQui]] | ||
; CHECK-NEXT: $q1 = COPY [[LDRQui2]] | ||
; CHECK-NEXT: $q2 = COPY [[LDRQui1]] | ||
; CHECK-NEXT: $q3 = COPY [[LDRQui3]] | ||
; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 | ||
bb.0: | ||
successors: %bb.1, %bb.2 | ||
liveins: $x0, $w1 | ||
%5:gpr32 = COPY $w1 | ||
%4:gpr64common = COPY $x0 | ||
%0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) | ||
%1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) | ||
%2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) | ||
%6:gpr64common = LDRXui %4, 8 :: (load (s64)) | ||
%7:gpr64common = LDRXui killed %6, 0 :: (load (s64)) | ||
%3:fpr128 = LDRQui killed %7, 0 :: (load (s128), align 4) | ||
TBZW %5, 0, %bb.2 | ||
B %bb.1 | ||
bb.1: | ||
%8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr | ||
%9:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %8, implicit $fpcr | ||
%10:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %9, implicit $fpcr | ||
%11:fpr128 = nofpexcept FADDPv4f32 %10, %10, implicit $fpcr | ||
%12:gpr64all = COPY %11.dsub | ||
%14:fpr64 = COPY %12 | ||
%13:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %14, implicit $fpcr | ||
$s0 = COPY %13 | ||
RET_ReallyLR implicit $s0 | ||
bb.2: | ||
$q0 = COPY %0 | ||
$q1 = COPY %2 | ||
$q2 = COPY %1 | ||
$q3 = COPY %3 | ||
TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 | ||
... |