-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[AMDGPU] Add liverange split instructions into BB Prolog #117544
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/cdevadas/mi-flag-for-lr-split-insn
Are you sure you want to change the base?
[AMDGPU] Add liverange split instructions into BB Prolog #117544
Conversation
The COPY inserted for liverange split during sgpr-regalloc pipeline currently breaks the BB prolog during the subsequent vgpr-regalloc phase while spilling and/or splitting the vector liveranges. This patch fixes it by correctly including the the LR split instructions during sgpr-regalloc and wwm-regalloc pipelines into the BB prolog.
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Christudasan Devadasan (cdevadas) ChangesThe COPY inserted for liverange split during sgpr-regalloc Patch is 38.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117544.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a94d690297949..204a575e2f64c1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8956,6 +8956,30 @@ unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
return AMDGPU::COPY;
}
+bool SIInstrInfo::canAddToBBProlog(const MachineInstr &MI) const {
+ uint16_t Opcode = MI.getOpcode();
+ // Check if it is SGPR spill or wwm-register spill Opcode.
+ if (isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode))
+ return true;
+
+ const MachineFunction *MF = MI.getMF();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ // See if this is Liverange split instruction inserted for SGPR or
+ // wwm-register. The implicit def inserted for wwm-registers should also be
+ // included as they can appear at the bb begin.
+ bool IsLRSplitInst = MI.getFlag(MachineInstr::LRSplit);
+ if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
+ return false;
+
+ Register Reg = MI.getOperand(0).getReg();
+ if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
+ return IsLRSplitInst;
+
+ return MFI->isWWMReg(Reg);
+}
+
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
Register Reg) const {
// We need to handle instructions which may be inserted during register
@@ -8964,20 +8988,16 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
// needed by the prolog. However, the insertions for scalar registers can
// always be placed at the BB top as they are independent of the exec mask
// value.
- const MachineFunction *MF = MI.getParent()->getParent();
bool IsNullOrVectorRegister = true;
if (Reg) {
+ const MachineFunction *MF = MI.getMF();
const MachineRegisterInfo &MRI = MF->getRegInfo();
IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
}
- uint16_t Opcode = MI.getOpcode();
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
return IsNullOrVectorRegister &&
- (isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode) ||
- (Opcode == AMDGPU::IMPLICIT_DEF &&
- MFI->isWWMReg(MI.getOperand(0).getReg())) ||
- (!MI.isTerminator() && Opcode != AMDGPU::COPY &&
+ (canAddToBBProlog(MI) ||
+ (!MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e55418326a4bd0..ea1d16784645e1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1348,6 +1348,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isBasicBlockPrologue(const MachineInstr &MI,
Register Reg = Register()) const override;
+ bool canAddToBBProlog(const MachineInstr &MI) const;
+
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsPt,
const DebugLoc &DL, Register Src,
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 5dff660912e402..d7c38f26957677 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -176,39 +176,39 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s17, v7, 37
; CHECK-NEXT: v_readlane_b32 s18, v7, 38
; CHECK-NEXT: v_readlane_b32 s19, v7, 39
-; CHECK-NEXT: v_writelane_b32 v7, s4, 40
-; CHECK-NEXT: v_writelane_b32 v7, s5, 41
-; CHECK-NEXT: v_writelane_b32 v7, s6, 42
-; CHECK-NEXT: v_writelane_b32 v7, s7, 43
-; CHECK-NEXT: v_writelane_b32 v7, s8, 44
-; CHECK-NEXT: v_writelane_b32 v7, s9, 45
-; CHECK-NEXT: v_writelane_b32 v7, s10, 46
-; CHECK-NEXT: v_writelane_b32 v7, s11, 47
-; CHECK-NEXT: v_writelane_b32 v7, s12, 48
-; CHECK-NEXT: v_writelane_b32 v7, s13, 49
-; CHECK-NEXT: v_writelane_b32 v7, s14, 50
-; CHECK-NEXT: v_writelane_b32 v7, s15, 51
-; CHECK-NEXT: v_writelane_b32 v7, s16, 52
-; CHECK-NEXT: v_writelane_b32 v7, s17, 53
-; CHECK-NEXT: v_writelane_b32 v7, s18, 54
-; CHECK-NEXT: v_writelane_b32 v7, s19, 55
+; CHECK-NEXT: v_writelane_b32 v7, s4, 56
+; CHECK-NEXT: v_writelane_b32 v7, s5, 57
+; CHECK-NEXT: v_writelane_b32 v7, s6, 58
+; CHECK-NEXT: v_writelane_b32 v7, s7, 59
+; CHECK-NEXT: v_writelane_b32 v7, s8, 60
+; CHECK-NEXT: v_writelane_b32 v7, s9, 61
+; CHECK-NEXT: v_writelane_b32 v7, s10, 62
+; CHECK-NEXT: v_writelane_b32 v7, s11, 63
+; CHECK-NEXT: v_writelane_b32 v7, s52, 40
+; CHECK-NEXT: v_writelane_b32 v7, s53, 41
+; CHECK-NEXT: v_writelane_b32 v7, s54, 42
+; CHECK-NEXT: v_writelane_b32 v7, s55, 43
+; CHECK-NEXT: v_writelane_b32 v7, s56, 44
+; CHECK-NEXT: v_writelane_b32 v7, s57, 45
+; CHECK-NEXT: v_writelane_b32 v7, s58, 46
; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
-; CHECK-NEXT: v_writelane_b32 v7, s52, 56
-; CHECK-NEXT: v_writelane_b32 v6, s60, 0
-; CHECK-NEXT: v_writelane_b32 v7, s53, 57
-; CHECK-NEXT: v_writelane_b32 v6, s61, 1
-; CHECK-NEXT: v_writelane_b32 v7, s54, 58
-; CHECK-NEXT: v_writelane_b32 v6, s62, 2
-; CHECK-NEXT: v_writelane_b32 v7, s55, 59
-; CHECK-NEXT: v_writelane_b32 v6, s63, 3
-; CHECK-NEXT: v_writelane_b32 v7, s56, 60
-; CHECK-NEXT: v_writelane_b32 v6, s64, 4
-; CHECK-NEXT: v_writelane_b32 v7, s57, 61
-; CHECK-NEXT: v_writelane_b32 v6, s65, 5
-; CHECK-NEXT: v_writelane_b32 v7, s58, 62
-; CHECK-NEXT: v_writelane_b32 v6, s66, 6
-; CHECK-NEXT: v_writelane_b32 v7, s59, 63
-; CHECK-NEXT: v_writelane_b32 v6, s67, 7
+; CHECK-NEXT: v_writelane_b32 v7, s59, 47
+; CHECK-NEXT: v_writelane_b32 v6, s12, 0
+; CHECK-NEXT: v_writelane_b32 v7, s60, 48
+; CHECK-NEXT: v_writelane_b32 v6, s13, 1
+; CHECK-NEXT: v_writelane_b32 v7, s61, 49
+; CHECK-NEXT: v_writelane_b32 v6, s14, 2
+; CHECK-NEXT: v_writelane_b32 v7, s62, 50
+; CHECK-NEXT: v_writelane_b32 v6, s15, 3
+; CHECK-NEXT: v_writelane_b32 v7, s63, 51
+; CHECK-NEXT: v_writelane_b32 v6, s16, 4
+; CHECK-NEXT: v_writelane_b32 v7, s64, 52
+; CHECK-NEXT: v_writelane_b32 v6, s17, 5
+; CHECK-NEXT: v_writelane_b32 v7, s65, 53
+; CHECK-NEXT: v_writelane_b32 v6, s18, 6
+; CHECK-NEXT: v_writelane_b32 v7, s66, 54
+; CHECK-NEXT: v_writelane_b32 v6, s19, 7
+; CHECK-NEXT: v_writelane_b32 v7, s67, 55
; CHECK-NEXT: s_andn2_saveexec_b64 s[20:21], s[26:27]
; CHECK-NEXT: s_cbranch_execz .LBB0_10
; CHECK-NEXT: ; %bb.4: ; %bb32
@@ -264,35 +264,39 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_6: ; %Flow12
-; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[22:23]
-; CHECK-NEXT: v_readlane_b32 s52, v7, 40
-; CHECK-NEXT: v_readlane_b32 s53, v7, 41
-; CHECK-NEXT: v_readlane_b32 s54, v7, 42
-; CHECK-NEXT: v_readlane_b32 s55, v7, 43
-; CHECK-NEXT: v_readlane_b32 s56, v7, 44
-; CHECK-NEXT: v_readlane_b32 s57, v7, 45
-; CHECK-NEXT: v_readlane_b32 s58, v7, 46
-; CHECK-NEXT: v_readlane_b32 s59, v7, 47
-; CHECK-NEXT: v_readlane_b32 s60, v7, 48
-; CHECK-NEXT: v_readlane_b32 s61, v7, 49
-; CHECK-NEXT: v_readlane_b32 s62, v7, 50
-; CHECK-NEXT: v_readlane_b32 s63, v7, 51
-; CHECK-NEXT: v_readlane_b32 s64, v7, 52
-; CHECK-NEXT: v_readlane_b32 s65, v7, 53
-; CHECK-NEXT: v_readlane_b32 s66, v7, 54
-; CHECK-NEXT: v_readlane_b32 s67, v7, 55
-; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[22:23]
; CHECK-NEXT: s_cbranch_execz .LBB0_9
; CHECK-NEXT: ; %bb.7: ; %bb33.preheader
; CHECK-NEXT: s_mov_b32 s8, 0
; CHECK-NEXT: s_mov_b32 s6, s8
+; CHECK-NEXT: v_readlane_b32 s36, v7, 40
; CHECK-NEXT: s_mov_b32 s7, s8
; CHECK-NEXT: v_mov_b32_e32 v1, s6
-; CHECK-NEXT: v_readlane_b32 s36, v7, 56
+; CHECK-NEXT: v_readlane_b32 s37, v7, 41
; CHECK-NEXT: s_mov_b32 s9, s8
; CHECK-NEXT: s_mov_b32 s10, s8
; CHECK-NEXT: s_mov_b32 s11, s8
; CHECK-NEXT: v_mov_b32_e32 v2, s7
+; CHECK-NEXT: v_readlane_b32 s38, v7, 42
+; CHECK-NEXT: v_readlane_b32 s39, v7, 43
+; CHECK-NEXT: v_readlane_b32 s40, v7, 44
+; CHECK-NEXT: v_readlane_b32 s41, v7, 45
+; CHECK-NEXT: v_readlane_b32 s42, v7, 46
+; CHECK-NEXT: v_readlane_b32 s43, v7, 47
+; CHECK-NEXT: v_readlane_b32 s44, v7, 48
+; CHECK-NEXT: v_readlane_b32 s45, v7, 49
+; CHECK-NEXT: v_readlane_b32 s46, v7, 50
+; CHECK-NEXT: v_readlane_b32 s47, v7, 51
+; CHECK-NEXT: v_readlane_b32 s48, v7, 52
+; CHECK-NEXT: v_readlane_b32 s49, v7, 53
+; CHECK-NEXT: v_readlane_b32 s50, v7, 54
+; CHECK-NEXT: v_readlane_b32 s51, v7, 55
+; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37]
+; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43]
+; CHECK-NEXT: image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1
+; CHECK-NEXT: v_readlane_b32 s36, v7, 56
; CHECK-NEXT: v_readlane_b32 s37, v7, 57
; CHECK-NEXT: v_readlane_b32 s38, v7, 58
; CHECK-NEXT: v_readlane_b32 s39, v7, 59
@@ -300,26 +304,20 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s41, v7, 61
; CHECK-NEXT: v_readlane_b32 s42, v7, 62
; CHECK-NEXT: v_readlane_b32 s43, v7, 63
-; CHECK-NEXT: s_nop 4
-; CHECK-NEXT: image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1
-; CHECK-NEXT: image_sample_lz v4, v[1:2], s[52:59], s[8:11] dmask:0x1
; CHECK-NEXT: ; kill: killed $vgpr1_vgpr2
-; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37]
; CHECK-NEXT: s_and_b64 vcc, exec, 0
; CHECK-NEXT: v_readlane_b32 s44, v6, 0
; CHECK-NEXT: v_readlane_b32 s45, v6, 1
; CHECK-NEXT: v_readlane_b32 s46, v6, 2
; CHECK-NEXT: v_readlane_b32 s47, v6, 3
+; CHECK-NEXT: image_sample_lz v4, v[1:2], s[36:43], s[8:11] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s48, v6, 4
; CHECK-NEXT: v_readlane_b32 s49, v6, 5
; CHECK-NEXT: v_readlane_b32 s50, v6, 6
; CHECK-NEXT: v_readlane_b32 s51, v6, 7
-; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43]
; CHECK-NEXT: ; kill: killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
; CHECK-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11
-; CHECK-NEXT: ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59
+; CHECK-NEXT: ; kill: killed $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_sub_f32_e32 v1, v4, v3
; CHECK-NEXT: v_mul_f32_e32 v0, v1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index c6ee557d970cd7..b376648b29d7d8 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -41,20 +41,22 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr60 = COPY $sgpr15
+ ; CHECK-NEXT: renamable $sgpr62 = COPY $sgpr14
; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: renamable $sgpr36_sgpr37 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: renamable $sgpr38_sgpr39 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: renamable $sgpr40_sgpr41 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr60 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr64 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr42_sgpr43 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, implicit $exec
; CHECK-NEXT: renamable $sgpr44_sgpr45 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr61 = S_MOV_B32 1083786240
+ ; CHECK-NEXT: renamable $sgpr65 = S_MOV_B32 1083786240
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr16, $sgpr60, $sgpr62, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79:0x0000000F00000000
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr44_sgpr45, implicit-def dead $scc
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]]
@@ -63,50 +65,48 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.5(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr65 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr66 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr67 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr68 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr69 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr70 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr71 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr72 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr73 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr74 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr75 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr80 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr81 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr82 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr83 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr84 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr85 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr86 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr92 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr93 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr94 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr95 = COPY renamable $sgpr60
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $exec
+ ; CHECK-NEXT: liveins: $sgpr16, $sgpr60, $sgpr62, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79:0x0000000F00000000
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr68 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr69 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr70 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr71 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr72 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr73 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr74 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr75 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr80 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr81 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr82 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr83 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr84 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr85 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr86 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr92 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr93 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr94 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr95 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr96 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr97 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr98 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr99 = COPY renamable $sgpr64
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16
+ ; CHECK-NEXT: liv...
[truncated]
|
The COPY inserted for liverange split during sgpr-regalloc
pipeline currently breaks the BB prolog during the subsequent
vgpr-regalloc phase while spilling and/or splitting the vector
liveranges. This patch fixes it by correctly including the
LR split instructions during sgpr-regalloc and wwm-regalloc
pipelines into the BB prolog.