diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 30405059530e99..5b33a49c11a92d 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -88,6 +88,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const CalleeSavedInfo &CS : CSI) { // Insert the spill to the stack frame. MCRegister Reg = CS.getReg(); @@ -96,8 +98,13 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, MVT::i32); - TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, - TRI); + // If this value was already livein, we probably have a direct use of the + // incoming register value, so don't kill at the spill point. This happens + // since we pass some special inputs (workgroup IDs) in the callee saved + // range. + const bool IsLiveIn = MRI.isLiveIn(Reg); + TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), + RC, TRI); if (LIS) { assert(std::distance(MIS.begin(), I) == 1); diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir new file mode 100644 index 00000000000000..1d18bab3d097da --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir @@ -0,0 +1,20 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck %s + +--- +name: spill_csr_sgpr_argument +tracksRegLiveness: true +liveins: + - { reg: '$sgpr50' } +body: | + bb.0: + liveins: $sgpr50 + ; CHECK-LABEL: name: spill_csr_sgpr_argument + ; CHECK: liveins: $sgpr50, $vgpr0 + ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr50, 0, $vgpr0 + ; CHECK: S_NOP 0, implicit $sgpr50 + ; CHECK: $sgpr50 = S_MOV_B32 0 + S_NOP 0, implicit $sgpr50 + $sgpr50 = S_MOV_B32 0 + +...