-
Notifications
You must be signed in to change notification settings - Fork 11.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix late rematerialization operands check
D106408 enables rematerialization of instructions with virtual register uses. That has uncovered the bug in the allUsesAvailableAt implementation: https://bugs.llvm.org/show_bug.cgi?id=51516. In the majority of cases canRematerializeAt() called to check if an instruction can be rematerialized before the given UseIdx. However, SplitEditor::enterIntvAtEnd() calls it to rematerialize an instruction at the end of a block passing LIS.getMBBEndIdx() into the check. In the testcase from the bug it has attempted to rematerialize ADDXri after STRXui in bb.17. The use operand %55 of the ADD is killed by the STRX but that is undetected by the check because it adjusts passed UseIdx to the reg slot, before the kill. The value is dead at the index passed to the check however. This change uses a later of passed UseIdx and its reg slot. This shall be correct because if are checking an availability of operands before an instruction that instruction cannot be the one defining these operands. If we are checking for late rematerialization we are really interested if operands live past the instruction. The bug is not exploitable without D106408 but needed to reland reverted D106408. Differential Revision: https://reviews.llvm.org/D108475
- Loading branch information
Showing
3 changed files
with
323 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s | ||
|
||
# Check that we spill %31 and do not rematerialize it since the use operand | ||
# of ADDXri is killed by the STRXui in this block. | ||
|
||
# CHECK-LABEL: name: test | ||
# CHECK: bb.17: | ||
# CHECK: STRXui | ||
# CHECK: LDRXui | ||
# CHECK: bb.18: | ||
|
||
--- | ||
name: test | ||
tracksRegLiveness: true | ||
stack: | ||
- { id: 0, type: variable-sized, offset: 0, alignment: 32, | ||
stack-id: default } | ||
body: | | ||
bb.0.entry: | ||
successors: %bb.2(0x40000000), %bb.1(0x40000000) | ||
liveins: $x0 | ||
%22:gpr64common = COPY $x0 | ||
CBZW $wzr, %bb.2 | ||
bb.1: | ||
successors: %bb.3(0x80000000) | ||
%1:gpr64 = COPY $xzr | ||
B %bb.3 | ||
bb.2: | ||
successors: %bb.3(0x80000000) | ||
$x0 = IMPLICIT_DEF | ||
%1:gpr64 = COPY $x0 | ||
bb.3: | ||
successors: %bb.4(0x30000000), %bb.5(0x50000000) | ||
%2:gpr64common = COPY %1 | ||
CBNZX %1, %bb.5 | ||
B %bb.4 | ||
bb.4: | ||
successors: %bb.5(0x80000000) | ||
%31:gpr64common = SUBXri $sp, 288, 0 | ||
$sp = ANDXri %31, 7930 | ||
%2:gpr64common = COPY $xzr | ||
bb.5: | ||
successors: %bb.6(0x80000000) | ||
%9:gpr64common = COPY $xzr | ||
%35:gpr64sp = ADDXri %2, 32, 0 | ||
%4:gpr64common = UBFMXri %2, 3, 63 | ||
%37:gpr64 = MOVi64imm -506381209866536712 | ||
%38:gpr32 = MOVi32imm -202116109 | ||
%39:gpr64common = nuw ADDXri %22, 836, 0 | ||
%41:gpr64common = nuw ADDXri %22, 648, 0 | ||
STRXui %37, %4, 1 | ||
STRWui %38, %4, 8 | ||
%8:gpr64common = UBFMXri %39, 3, 63 | ||
%7:gpr64common = UBFMXri %41, 3, 63 | ||
%47:gpr64 = MOVi64imm 0 | ||
bb.6: | ||
successors: %bb.8(0x30000000), %bb.7(0x50000000) | ||
%44:gpr64common = ADDXrr %22, %9 | ||
%10:gpr64common = ADDXri %44, 648, 0 | ||
%11:gpr64common = ANDSXri %10, 4098, implicit-def $nzcv | ||
Bcc 0, %bb.8, implicit killed $nzcv | ||
B %bb.7 | ||
bb.7: | ||
successors: %bb.8(0x80000000) | ||
BL 0, csr_aarch64_aapcs, implicit-def dead $lr | ||
bb.8: | ||
successors: %bb.9(0x04000000), %bb.24(0x7c000000) | ||
CBNZW $wzr, %bb.24 | ||
B %bb.9 | ||
bb.9: | ||
successors: %bb.10(0x7ffff800), %bb.11(0x00000800) | ||
%55:gpr64common = ADDXrr %22, %9 | ||
%56:gpr64sp = ADDXri %55, 648, 0 | ||
CBZX %11, %bb.10 | ||
B %bb.11 | ||
bb.10: | ||
successors: %bb.11(0x80000000) | ||
$x0 = ADDXri %55, 648, 0 | ||
$x2 = IMPLICIT_DEF | ||
$w1 = COPY $wzr | ||
$x1 = nuw ADDXri %35, 32, 0 | ||
BL 0, csr_aarch64_aapcs, implicit-def dead $lr | ||
%66:gpr64sp = nuw ADDXri %35, 48, 0 | ||
$x0 = ADDXri %55, 696, 0 | ||
$x2 = IMPLICIT_DEF | ||
$x1 = COPY %66 | ||
bb.11: | ||
successors: %bb.15(0x7ffff800), %bb.12(0x00000800) | ||
CBZX %11, %bb.15 | ||
B %bb.12 | ||
bb.12: | ||
successors: %bb.13(0x00000000), %bb.14(0x80000000) | ||
CBNZW $wzr, %bb.14 | ||
B %bb.13 | ||
bb.13: | ||
successors: | ||
bb.14: | ||
successors: %bb.18(0x80000000) | ||
$x1 = COPY %56 | ||
B %bb.18 | ||
bb.15: | ||
successors: %bb.16(0x00000000), %bb.17(0x80000000) | ||
%76:gpr32 = LDRBBui %7, 0 | ||
CBZW %76, %bb.17 | ||
B %bb.16 | ||
bb.16: | ||
successors: | ||
%74:gpr64common = ADDXrr %22, %9 | ||
%15:gpr64sp = ADDXri %74, 648, 0 | ||
$x0 = COPY %15 | ||
bb.17: | ||
successors: %bb.18(0x80000000) | ||
STRXui %22, %55, 81 | ||
bb.18: | ||
successors: %bb.19(0x80000000), %bb.20(0x00000000) | ||
%79:gpr32 = LDRBBui %8, 0 | ||
CBNZW %79, %bb.20 | ||
B %bb.19 | ||
bb.19: | ||
successors: %bb.21(0x80000000), %bb.20(0x00000000) | ||
%80:gpr32 = MOVi32imm 1 | ||
CBNZW %80, %bb.21 | ||
B %bb.20 | ||
bb.20: | ||
successors: | ||
%16:gpr64sp = ADDXri %22, 836, 0 | ||
$x0 = COPY %16 | ||
bb.21: | ||
successors: %bb.24(0x00000000), %bb.22(0x80000000) | ||
CBZW $wzr, %bb.24 | ||
bb.22: | ||
successors: %bb.26(0x80000000) | ||
B %bb.26 | ||
bb.24: | ||
successors: %bb.25(0x04000000), %bb.6(0x7c000000) | ||
%8:gpr64common = ADDXri %8, 24, 0 | ||
%7:gpr64common = ADDXri %7, 24, 0 | ||
CBNZW $wzr, %bb.6 | ||
bb.25: | ||
successors: %bb.26(0x80000000) | ||
%56:gpr64sp = COPY $xzr | ||
bb.26: | ||
successors: %bb.28(0x50000000), %bb.27(0x30000000) | ||
undef %83.sub_32:gpr64 = MOVi32imm 1172321806 | ||
STRXui %83, %2, 0 | ||
CBNZX %1, %bb.28 | ||
B %bb.27 | ||
bb.27: | ||
successors: %bb.28(0x80000000) | ||
STRXui $xzr, %4, 0 | ||
bb.28: | ||
$x0 = COPY %56 | ||
RET_ReallyLR implicit $x0 | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s | ||
|
||
# Check that %3 was not rematerialized before the last store since its operand %1 | ||
# is killed by that store. | ||
|
||
# GCN-LABEL: name: global_sextload_v32i32_to_v32i64 | ||
# GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) | ||
# GCN: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr27, killed renamable $vgpr19_vgpr20_vgpr21_vgpr22, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec, implicit killed renamable $vgpr0 | ||
|
||
--- | ||
name: global_sextload_v32i32_to_v32i64 | ||
tracksRegLiveness: true | ||
machineFunctionInfo: | ||
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' | ||
stackPtrOffsetReg: '$sgpr32' | ||
body: | | ||
bb.0: | ||
liveins: $sgpr4_sgpr5 | ||
%0:sgpr_64(p4) = COPY $sgpr4_sgpr5 | ||
%1:vgpr_32 = COPY $m0 | ||
%2:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 0, 0 | ||
%3:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec | ||
%4:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 112, 0, implicit $exec :: (load (s128)) | ||
%5:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 96, 0, implicit $exec, implicit %1 :: (load (s128)) | ||
%6:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 80, 0, implicit $exec :: (load (s128)) | ||
%7:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 64, 0, implicit $exec :: (load (s128)) | ||
%8:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 48, 0, implicit $exec :: (load (s128)) | ||
%9:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 32, 0, implicit $exec :: (load (s128)) | ||
%10:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 16, 0, implicit $exec :: (load (s128)) | ||
%11:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 0, 0, implicit $exec :: (load (s128)) | ||
undef %12.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub3, implicit $exec | ||
%12.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub2, implicit $exec | ||
undef %13.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub1, implicit $exec | ||
%13.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub0, implicit $exec | ||
undef %14.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub3, implicit $exec | ||
%14.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub2, implicit $exec | ||
undef %15.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub1, implicit $exec | ||
%15.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub0, implicit $exec | ||
undef %16.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub3, implicit $exec | ||
%16.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub2, implicit $exec | ||
undef %17.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub1, implicit $exec | ||
%17.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub0, implicit $exec | ||
undef %18.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub3, implicit $exec | ||
%18.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub2, implicit $exec | ||
undef %19.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub1, implicit $exec | ||
%19.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub0, implicit $exec | ||
undef %20.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub3, implicit $exec | ||
%20.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub2, implicit $exec | ||
undef %21.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub1, implicit $exec | ||
%21.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub0, implicit $exec | ||
undef %22.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub3, implicit $exec | ||
%22.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub2, implicit $exec | ||
undef %23.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub1, implicit $exec | ||
%23.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub0, implicit $exec | ||
undef %24.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub3, implicit $exec | ||
%24.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub2, implicit $exec | ||
undef %25.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub1, implicit $exec | ||
%25.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub0, implicit $exec | ||
undef %26.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub3, implicit $exec | ||
%26.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub2, implicit $exec | ||
undef %27.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub1, implicit $exec | ||
%27.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub0, implicit $exec | ||
%27.sub0:vreg_128 = COPY %4.sub0 | ||
%27.sub2:vreg_128 = COPY %4.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %27, %2.sub0_sub1, 224, 0, implicit $exec | ||
%26.sub0:vreg_128 = COPY %4.sub2 | ||
%26.sub2:vreg_128 = COPY %4.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %26, %2.sub0_sub1, 240, 0, implicit $exec | ||
%25.sub0:vreg_128 = COPY %5.sub0 | ||
%25.sub2:vreg_128 = COPY %5.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %25, %2.sub0_sub1, 192, 0, implicit $exec | ||
%24.sub0:vreg_128 = COPY %5.sub2 | ||
%24.sub2:vreg_128 = COPY %5.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %24, %2.sub0_sub1, 208, 0, implicit $exec | ||
%23.sub0:vreg_128 = COPY %6.sub0 | ||
%23.sub2:vreg_128 = COPY %6.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %23, %2.sub0_sub1, 160, 0, implicit $exec | ||
%22.sub0:vreg_128 = COPY %6.sub2 | ||
%22.sub2:vreg_128 = COPY %6.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %22, %2.sub0_sub1, 176, 0, implicit $exec | ||
%21.sub0:vreg_128 = COPY %7.sub0 | ||
%21.sub2:vreg_128 = COPY %7.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %21, %2.sub0_sub1, 128, 0, implicit $exec | ||
%20.sub0:vreg_128 = COPY %7.sub2 | ||
%20.sub2:vreg_128 = COPY %7.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %20, %2.sub0_sub1, 144, 0, implicit $exec | ||
%19.sub0:vreg_128 = COPY %8.sub0 | ||
%19.sub2:vreg_128 = COPY %8.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %19, %2.sub0_sub1, 96, 0, implicit $exec | ||
%18.sub0:vreg_128 = COPY %8.sub2 | ||
%18.sub2:vreg_128 = COPY %8.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %18, %2.sub0_sub1, 112, 0, implicit $exec | ||
%17.sub0:vreg_128 = COPY %9.sub0 | ||
%17.sub2:vreg_128 = COPY %9.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %17, %2.sub0_sub1, 64, 0, implicit $exec | ||
%16.sub0:vreg_128 = COPY %9.sub2 | ||
%16.sub2:vreg_128 = COPY %9.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %16, %2.sub0_sub1, 80, 0, implicit $exec | ||
%15.sub0:vreg_128 = COPY %10.sub0 | ||
%15.sub2:vreg_128 = COPY %10.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %15, %2.sub0_sub1, 32, 0, implicit $exec | ||
%14.sub0:vreg_128 = COPY %10.sub2 | ||
%14.sub2:vreg_128 = COPY %10.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %14, %2.sub0_sub1, 48, 0, implicit $exec | ||
%13.sub0:vreg_128 = COPY %11.sub0 | ||
%13.sub2:vreg_128 = COPY %11.sub1 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %13, %2.sub0_sub1, 0, 0, implicit $exec | ||
%12.sub0:vreg_128 = COPY %11.sub2 | ||
%12.sub2:vreg_128 = COPY %11.sub3 | ||
GLOBAL_STORE_DWORDX4_SADDR %3, %12, %2.sub0_sub1, 16, 0, implicit $exec, implicit killed %1 | ||
S_ENDPGM 0 | ||
... |