Skip to content

Commit

Permalink
[AMDGPU] Fix failure in VCC spilling
Browse files Browse the repository at this point in the history
Spills of VCC (SGPR64) will fail with new SGPR spill code,
because super register is not correctly resolved.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D81224
  • Loading branch information
perlfu committed Jun 17, 2020
1 parent 547b6da commit ac8a2f1
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 2 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Expand Up @@ -938,12 +938,12 @@ void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
} else {
SavedExecReg =
getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart]),
AMDGPU::sub0, &AMDGPU::SGPR_64RegClass);
AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass);
// If src/dst is an odd size it is possible subreg0 is not aligned.
if (!SavedExecReg && NumSubRegs > 2)
SavedExecReg =
getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart + 1]),
AMDGPU::sub0, &AMDGPU::SGPR_64RegClass);
AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass);
}

assert(SavedExecReg);
Expand Down
181 changes: 181 additions & 0 deletions llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -0,0 +1,181 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX10 %s

--- |
define amdgpu_kernel void @check_vcc() #0 {
ret void
}

define amdgpu_kernel void @check_exec() #0 {
ret void
}

attributes #0 = { "frame-pointer"="all" }
...
---
name: check_vcc
tracksRegLiveness: true
liveins:
- { reg: '$sgpr4_sgpr5' }
- { reg: '$sgpr6_sgpr7' }
- { reg: '$sgpr8' }
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
machineFunctionInfo:
isEntryFunction: true
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
dispatchPtr: { reg: '$sgpr4_sgpr5' }
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
workGroupIDX: { reg: '$sgpr8' }
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
body: |
bb.0:
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
; CHECK-LABEL: name: check_vcc
; CHECK: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9
; GFX9: $sgpr33 = S_MOV_B32 0
; GFX9: $sgpr12 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr13 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr14 = S_MOV_B32 4294967295, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr15 = S_MOV_B32 14680064, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $vcc = IMPLICIT_DEF
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit $vcc
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 32, $vgpr0
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 33, $vgpr0
; GFX9: $exec = S_MOV_B64 3
; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 32
; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 33
; GFX9: $vcc = IMPLICIT_DEF
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit killed $vcc
; GFX9: $vcc = S_MOV_B64 $exec
; GFX9: $exec = S_MOV_B64 3
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX9: $exec = S_MOV_B64 killed $vcc
; GFX9: $vcc = S_MOV_B64 $exec
; GFX9: $exec = S_MOV_B64 3
; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX9: $exec = S_MOV_B64 killed $vcc
; GFX9: $vcc_lo = V_READLANE_B32_vi $vgpr0, 0, implicit-def $vcc
; GFX9: $vcc_hi = V_READLANE_B32_vi killed $vgpr0, 1
; GFX10: $sgpr33 = S_MOV_B32 0
; GFX10: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr99 = S_MOV_B32 836853760, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $vcc = IMPLICIT_DEF
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit $vcc
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 32, $vgpr0
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 33, $vgpr0
; GFX10: $exec = S_MOV_B64 3
; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 32
; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 33
; GFX10: $vcc = IMPLICIT_DEF
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit killed $vcc
; GFX10: $vcc = S_MOV_B64 $exec
; GFX10: $exec = S_MOV_B64 3
; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX10: $exec = S_MOV_B64 killed $vcc
; GFX10: $vcc = S_MOV_B64 $exec
; GFX10: $exec = S_MOV_B64 3
; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX10: $exec = S_MOV_B64 killed $vcc
; GFX10: $vcc_lo = V_READLANE_B32_gfx10 $vgpr0, 0, implicit-def $vcc
; GFX10: $vcc_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1
$vcc = IMPLICIT_DEF
SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
$vcc = IMPLICIT_DEF
SI_SPILL_S64_SAVE killed $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
$vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
...
---
name: check_exec
tracksRegLiveness: true
liveins:
- { reg: '$sgpr4_sgpr5' }
- { reg: '$sgpr6_sgpr7' }
- { reg: '$sgpr8' }
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
machineFunctionInfo:
isEntryFunction: true
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
dispatchPtr: { reg: '$sgpr4_sgpr5' }
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
workGroupIDX: { reg: '$sgpr8' }
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
body: |
bb.0:
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
; CHECK-LABEL: name: check_exec
; CHECK: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9
; GFX9: $sgpr33 = S_MOV_B32 0
; GFX9: $sgpr12 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr13 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr14 = S_MOV_B32 4294967295, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr15 = S_MOV_B32 14680064, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 0, undef $vgpr0, implicit $exec
; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 1, $vgpr0, implicit $exec
; GFX9: $exec = S_MOV_B64 3
; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 0
; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 1
; GFX9: $exec = S_MOV_B64 3
; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 0, implicit-def $exec
; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 1
; GFX10: $sgpr33 = S_MOV_B32 0
; GFX10: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr99 = S_MOV_B32 836853760, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 0, undef $vgpr0, implicit $exec
; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 1, $vgpr0, implicit $exec
; GFX10: $exec = S_MOV_B64 3
; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 0
; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1
; GFX10: $exec = S_MOV_B64 3
; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 0, implicit-def $exec
; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1
SI_SPILL_S64_SAVE $exec, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
$exec = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
...

0 comments on commit ac8a2f1

Please sign in to comment.