diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 358abec1e06c2..4084619240c54 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1394,6 +1394,8 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) { return AMDGPU::SI_SPILL_A160_SAVE; case 24: return AMDGPU::SI_SPILL_A192_SAVE; + case 28: + return AMDGPU::SI_SPILL_A224_SAVE; case 32: return AMDGPU::SI_SPILL_A256_SAVE; case 64: @@ -1531,6 +1533,8 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) { return AMDGPU::SI_SPILL_A160_RESTORE; case 24: return AMDGPU::SI_SPILL_A192_RESTORE; + case 28: + return AMDGPU::SI_SPILL_A224_RESTORE; case 32: return AMDGPU::SI_SPILL_A256_RESTORE; case 64: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index d44c8c48a2468..7fd0765cbbe66 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -814,6 +814,13 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { case AMDGPU::SI_SPILL_A256_SAVE: case AMDGPU::SI_SPILL_A256_RESTORE: return 8; + case AMDGPU::SI_SPILL_S224_SAVE: + case AMDGPU::SI_SPILL_S224_RESTORE: + case AMDGPU::SI_SPILL_V224_SAVE: + case AMDGPU::SI_SPILL_V224_RESTORE: + case AMDGPU::SI_SPILL_A224_SAVE: + case AMDGPU::SI_SPILL_A224_RESTORE: + return 7; case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_V192_SAVE: @@ -1473,6 +1480,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S224_SAVE: case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: @@ -1483,6 +1491,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: @@ -1519,6 +1528,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S224_SAVE: case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: @@ -1533,6 +1543,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: @@ -1547,6 +1558,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V224_SAVE: case AMDGPU::SI_SPILL_V192_SAVE: case AMDGPU::SI_SPILL_V160_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: @@ -1556,6 +1568,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_A1024_SAVE: case AMDGPU::SI_SPILL_A512_SAVE: case AMDGPU::SI_SPILL_A256_SAVE: + case AMDGPU::SI_SPILL_A224_SAVE: case AMDGPU::SI_SPILL_A192_SAVE: case AMDGPU::SI_SPILL_A160_SAVE: case AMDGPU::SI_SPILL_A128_SAVE: @@ -1584,6 +1597,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_V192_RESTORE: + case AMDGPU::SI_SPILL_V224_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: case AMDGPU::SI_SPILL_V1024_RESTORE: @@ -1593,6 +1607,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_A128_RESTORE: case AMDGPU::SI_SPILL_A160_RESTORE: case AMDGPU::SI_SPILL_A192_RESTORE: + case AMDGPU::SI_SPILL_A224_RESTORE: case AMDGPU::SI_SPILL_A256_RESTORE: case AMDGPU::SI_SPILL_A512_RESTORE: case AMDGPU::SI_SPILL_A1024_RESTORE: { diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir new file mode 100644 index 0000000000000..e8d6a80e84f95 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill224.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s + +# Make sure spill/restore of 224 bit registers works. + +--- +name: spill_restore_sgpr224 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_sgpr224 + ; SPILLED: bb.0: + ; SPILLED: successors: %bb.1(0x80000000) + ; SPILLED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; SPILLED: SI_SPILL_S224_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, %stack.0, implicit $exec, implicit $sgpr32 :: (store 28 into %stack.0, align 4, addrspace 5) + ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED: bb.1: + ; SPILLED: successors: %bb.2(0x80000000) + ; SPILLED: S_NOP 1 + ; SPILLED: bb.2: + ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 28 from %stack.0, align 4, addrspace 5) + ; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-LABEL: name: spill_restore_sgpr224 + ; EXPANDED: bb.0: + ; EXPANDED: successors: %bb.1(0x80000000) + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 6, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED: bb.1: + ; EXPANDED: successors: %bb.2(0x80000000) + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: S_NOP 1 + ; EXPANDED: bb.2: + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; EXPANDED: $sgpr6 = V_READLANE_B32 $vgpr0, 2 + ; EXPANDED: $sgpr7 = V_READLANE_B32 $vgpr0, 3 + ; EXPANDED: $sgpr8 = V_READLANE_B32 $vgpr0, 4 + ; EXPANDED: $sgpr9 = V_READLANE_B32 $vgpr0, 5 + ; EXPANDED: $sgpr10 = V_READLANE_B32 $vgpr0, 6 + ; EXPANDED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + bb.0: + S_NOP 0, implicit-def %0:sgpr_224 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr224 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr224 + ; SPILLED: bb.0: + ; SPILLED: successors: %bb.1(0x80000000) + ; SPILLED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SPILLED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5) + ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED: bb.1: + ; SPILLED: successors: %bb.2(0x80000000) + ; SPILLED: S_NOP 1 + ; SPILLED: bb.2: + ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5) + ; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; EXPANDED-LABEL: name: spill_restore_vgpr224 + ; EXPANDED: bb.0: + ; EXPANDED: successors: %bb.1(0x80000000) + ; EXPANDED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; EXPANDED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5) + ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED: bb.1: + ; EXPANDED: successors: %bb.2(0x80000000) + ; EXPANDED: S_NOP 1 + ; EXPANDED: bb.2: + ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5) + ; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + bb.0: + S_NOP 0, implicit-def %0:vreg_224 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +...