Skip to content

Commit

Permalink
PostRAPseudos: Don't preserve kills on some implicit copy operands
Browse files Browse the repository at this point in the history
This fixes a verifier error I ran into at -O0. A subregister copy had
an implicit kill of an overlapping superregister, which was partially
redefined by the copy. The preserved implicit operand killed
subregisters made live earlier in the sequence. AMDGPU already uses
similar logic for whether to preserve the kill of the superregister on
the final instruction if there's overlap.
  • Loading branch information
arsenm committed Jan 18, 2022
1 parent 0d45944 commit 984451e
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 4 deletions.
13 changes: 10 additions & 3 deletions llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
Expand Up @@ -68,9 +68,16 @@ void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) {
MachineBasicBlock::iterator CopyMI = MI;
--CopyMI;

for (const MachineOperand &MO : MI->implicit_operands())
if (MO.isReg())
CopyMI->addOperand(MO);
Register DstReg = MI->getOperand(0).getReg();
for (const MachineOperand &MO : MI->implicit_operands()) {
CopyMI->addOperand(MO);

// Be conservative about preserving kills when subregister defs are
// involved. If there was implicit kill of a super-register overlapping the
// copy result, we would kill the subregisters previous copies defined.
if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
}
}

bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
Expand Down
@@ -0,0 +1,24 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s

# The copy has an implicit def of a superregister which overlaps the
# register it defines. We cannot preserve the kill on the tuple def
# when copying implicit operands to the last inserted v_mov_b32, since
# it kills the subregister defined earlier in the expansion.

---
name: copy_has_implicit_kill_superreg
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: copy_has_implicit_kill_superreg
; CHECK: renamable $vgpr7_vgpr8_vgpr9_vgpr10 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit-def $vgpr7_vgpr8, implicit $vgpr10_vgpr11
; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr10_vgpr11, implicit $vgpr7_vgpr8_vgpr9_vgpr10
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr7
renamable $vgpr7_vgpr8_vgpr9_vgpr10 = IMPLICIT_DEF
renamable $vgpr7_vgpr8 = COPY killed renamable $vgpr10_vgpr11, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10
S_ENDPGM 0, implicit $vgpr7
...
102 changes: 102 additions & 0 deletions llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll
@@ -0,0 +1,102 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s

; Testcase which happened to trigger a liveness verifier error
define amdgpu_kernel void @test_long_add4(<4 x i64> %arg) #0 {
; CHECK-LABEL: test_long_add4:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[0:1], off
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16
; CHECK-NEXT: ; kill: def $vgpr7_vgpr8_vgpr9_vgpr10 killed $vgpr7_vgpr8_vgpr9_vgpr10 def $vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 killed $exec
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v14, v3
; CHECK-NEXT: v_mov_b32_e32 v13, v2
; CHECK-NEXT: v_mov_b32_e32 v12, v1
; CHECK-NEXT: v_mov_b32_e32 v11, v0
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: global_load_dwordx4 v[18:21], v[0:1], off
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16
; CHECK-NEXT: ; kill: def $vgpr18_vgpr19_vgpr20_vgpr21 killed $vgpr18_vgpr19_vgpr20_vgpr21 def $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25 killed $exec
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v25, v3
; CHECK-NEXT: v_mov_b32_e32 v24, v2
; CHECK-NEXT: v_mov_b32_e32 v23, v1
; CHECK-NEXT: v_mov_b32_e32 v22, v0
; CHECK-NEXT: v_mov_b32_e32 v2, v7
; CHECK-NEXT: v_mov_b32_e32 v3, v8
; CHECK-NEXT: v_mov_b32_e32 v0, v9
; CHECK-NEXT: v_mov_b32_e32 v1, v10
; CHECK-NEXT: v_mov_b32_e32 v5, v11
; CHECK-NEXT: v_mov_b32_e32 v6, v12
; CHECK-NEXT: v_mov_b32_e32 v12, v13
; CHECK-NEXT: v_mov_b32_e32 v13, v14
; CHECK-NEXT: v_mov_b32_e32 v8, v18
; CHECK-NEXT: v_mov_b32_e32 v9, v19
; CHECK-NEXT: v_mov_b32_e32 v16, v20
; CHECK-NEXT: v_mov_b32_e32 v17, v21
; CHECK-NEXT: v_mov_b32_e32 v14, v22
; CHECK-NEXT: v_mov_b32_e32 v15, v23
; CHECK-NEXT: v_mov_b32_e32 v10, v24
; CHECK-NEXT: v_mov_b32_e32 v11, v25
; CHECK-NEXT: v_mov_b32_e32 v4, v2
; CHECK-NEXT: v_mov_b32_e32 v2, v3
; CHECK-NEXT: v_mov_b32_e32 v7, v8
; CHECK-NEXT: v_mov_b32_e32 v3, v9
; CHECK-NEXT: v_add_co_u32 v7, s6, v4, v7
; CHECK-NEXT: v_add_co_ci_u32_e64 v2, s6, v2, v3, s6
; CHECK-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; CHECK-NEXT: v_mov_b32_e32 v8, v2
; CHECK-NEXT: v_mov_b32_e32 v2, v0
; CHECK-NEXT: v_mov_b32_e32 v0, v1
; CHECK-NEXT: v_mov_b32_e32 v3, v16
; CHECK-NEXT: v_mov_b32_e32 v1, v17
; CHECK-NEXT: v_add_co_u32 v3, s6, v2, v3
; CHECK-NEXT: v_add_co_ci_u32_e64 v0, s6, v0, v1, s6
; CHECK-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
; CHECK-NEXT: v_mov_b32_e32 v4, v0
; CHECK-NEXT: v_mov_b32_e32 v1, v5
; CHECK-NEXT: v_mov_b32_e32 v0, v6
; CHECK-NEXT: v_mov_b32_e32 v5, v14
; CHECK-NEXT: v_mov_b32_e32 v2, v15
; CHECK-NEXT: v_add_co_u32 v1, s6, v1, v5
; CHECK-NEXT: v_add_co_ci_u32_e64 v0, s6, v0, v2, s6
; CHECK-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
; CHECK-NEXT: v_mov_b32_e32 v2, v0
; CHECK-NEXT: v_mov_b32_e32 v5, v12
; CHECK-NEXT: v_mov_b32_e32 v0, v13
; CHECK-NEXT: v_mov_b32_e32 v9, v10
; CHECK-NEXT: v_mov_b32_e32 v6, v11
; CHECK-NEXT: v_add_co_u32 v5, s6, v5, v9
; CHECK-NEXT: v_add_co_ci_u32_e64 v0, s6, v0, v6, s6
; CHECK-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; CHECK-NEXT: v_mov_b32_e32 v6, v0
; CHECK-NEXT: ; kill: def $vgpr7_vgpr8 killed $vgpr7_vgpr8 def $vgpr7_vgpr8_vgpr9_vgpr10 killed $exec
; CHECK-NEXT: v_mov_b32_e32 v10, v4
; CHECK-NEXT: v_mov_b32_e32 v9, v3
; CHECK-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
; CHECK-NEXT: v_mov_b32_e32 v3, v5
; CHECK-NEXT: v_mov_b32_e32 v4, v6
; CHECK-NEXT: v_mov_b32_e32 v6, s5
; CHECK-NEXT: v_mov_b32_e32 v5, s4
; CHECK-NEXT: global_store_dwordx4 v[5:6], v[7:10], off
; CHECK-NEXT: s_mov_b64 s[4:5], 16
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: global_store_dwordx4 v0, v[1:4], s[4:5]
; CHECK-NEXT: s_endpgm
entry:
%load0 = load <4 x i64>, <4 x i64> addrspace(1)* null, align 32
%load1 = load <4 x i64>, <4 x i64> addrspace(1)* null, align 32
%add = add <4 x i64> %load0, %load1
store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
ret void
}

attributes #0 = { noinline optnone }
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr28560.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=i686-pc-linux -print-after=postrapseudos < %s 2>&1 | FileCheck %s

; CHECK: MOV8rr ${{[a-d]}}l, implicit killed $e[[R:[a-d]]]x, implicit-def $e[[R]]x
; CHECK: MOV8rr ${{[a-d]}}l, implicit $e[[R:[a-d]]]x, implicit-def $e[[R]]x
define i32 @foo(i32 %i, i32 %k, i8* %p) {
%f = icmp ne i32 %i, %k
%s = zext i1 %f to i8
Expand Down

0 comments on commit 984451e

Please sign in to comment.