-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SystemZ: Use REG_SEQUENCE for PAIR128 #90640
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-systemz Author: Matt Arsenault (arsenm) ChangesPAIR128 should probably just be removed entirely Depends #90638 Full diff: https://github.com/llvm/llvm-project/pull/90640.diff 7 Files Affected:
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index d569a082cebe06..727a98c41bce4c 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -1264,25 +1264,32 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
// If we have a COPY with other uses in the loop, hoist to allow the users to
// also be hoisted.
- Register DefReg;
- if (MI.isCopy() && (DefReg = MI.getOperand(0).getReg()).isVirtual() &&
- MI.getOperand(1).getReg().isVirtual() &&
- IsLoopInvariantInst(MI, CurLoop) &&
- any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
- [&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) {
- if (!CurLoop->contains(&UseMI))
- return false;
-
- // COPY is a cheap instruction, but if moving it won't cause high
- // RP we're fine to hoist it even if the user can't be hoisted
- // later Otherwise we want to check the user if it's hoistable
- if (CanCauseHighRegPressure(Cost, false) &&
- !CurLoop->isLoopInvariant(UseMI, DefReg))
- return false;
-
- return true;
- }))
- return true;
+ // TODO: Handle all isCopyLike?
+ if (MI.isCopy() || MI.isRegSequence()) {
+ Register DefReg = MI.getOperand(0).getReg();
+ if (DefReg.isVirtual() &&
+ all_of(MI.uses(),
+ [](const MachineOperand &UseOp) {
+ return !UseOp.isReg() || UseOp.getReg().isVirtual();
+ }) &&
+ IsLoopInvariantInst(MI, CurLoop) &&
+ any_of(MRI->use_nodbg_instructions(DefReg),
+ [&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) {
+ if (!CurLoop->contains(&UseMI))
+ return false;
+
+ // COPY is a cheap instruction, but if moving it won't cause
+ // high RP we're fine to hoist it even if the user can't be
+ // hoisted later Otherwise we want to check the user if it's
+ // hoistable
+ if (CanCauseHighRegPressure(Cost, false) &&
+ !CurLoop->isLoopInvariant(UseMI, DefReg))
+ return false;
+
+ return true;
+ }))
+ return true;
+ }
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2da4431cf077eb..cef5ca412794e0 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8768,23 +8768,15 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
MachineBasicBlock *
SystemZTargetLowering::emitPair128(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- DebugLoc DL = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
Register Dest = MI.getOperand(0).getReg();
- Register Hi = MI.getOperand(1).getReg();
- Register Lo = MI.getOperand(2).getReg();
- Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
- Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
-
- BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
- BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
- .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
- BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
- .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
-
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
+ .add(MI.getOperand(1))
+ .addImm(SystemZ::subreg_h64)
+ .add(MI.getOperand(2))
+ .addImm(SystemZ::subreg_l64);
MI.eraseFromParent();
return MBB;
}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
index f5c2bd6286cb8e..41a883302e8f70 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
@@ -8907,17 +8907,17 @@ define amdgpu_kernel void @atomic_min_i64(ptr addrspace(1) %out, i64 %in) {
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x0
+; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: v_mov_b32_e32 v4, s3
; SI-NEXT: v_mov_b32_e32 v5, s2
-; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v2, s10
-; SI-NEXT: v_mov_b32_e32 v3, s11
+; SI-NEXT: v_mov_b32_e32 v2, s4
+; SI-NEXT: v_mov_b32_e32 v3, s5
; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: .LBB127_1: ; %atomicrmw.start
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/machinelicm-copy-like-instrs.mir b/llvm/test/CodeGen/AMDGPU/machinelicm-copy-like-instrs.mir
new file mode 100644
index 00000000000000..e9945f005d2645
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machinelicm-copy-like-instrs.mir
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn -run-pass=early-machinelicm -simplify-mir -o - %s | FileCheck %s
+
+# Test to check machine LICM does not hoist convergent instructions,
+# DS_PERMUTE_B32 in this example.
+
+---
+name: licm_reg_sequence
+body: |
+ ; CHECK-LABEL: name: licm_reg_sequence
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]]
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ successors: %bb.1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+
+ %3:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
+ S_NOP 0, implicit %3
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ $vgpr0 = COPY %3
+ S_ENDPGM 0
+
+...
+
+# Don't bother handling reg_sequence with physreg uses (is there any
+# reason for these to be legal)?
+---
+name: licm_reg_sequence_physreg_use
+body: |
+ ; CHECK-LABEL: name: licm_reg_sequence_physreg_use
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, $vgpr1, %subreg.sub1
+ ; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]]
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ successors: %bb.1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ liveins: $vgpr0
+
+ %3:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, $vgpr1, %subreg.sub1
+ S_NOP 0, implicit %3
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ $vgpr0 = COPY %3
+ S_ENDPGM 0
+
+...
+
+---
+name: licm_insert_subreg
+body: |
+ ; CHECK-LABEL: name: licm_insert_subreg
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[INSERT_SUBREG]], [[COPY1]], %subreg.sub1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: S_NOP 0, implicit [[INSERT_SUBREG1]]
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT_SUBREG1]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ successors: %bb.1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+
+ %3:vreg_64 = IMPLICIT_DEF
+ %4:vreg_64 = INSERT_SUBREG %3, %0, %subreg.sub0
+ %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1
+ S_NOP 0, implicit %5
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ $vgpr0_vgpr1 = COPY %5
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
index 7c351d2b8443b1..a50a0766f67c2c 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
@@ -8,9 +8,10 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s10, -1
; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: s_mov_b32 s11, s7
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: s_mov_b32 s8, s4
+; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %loop.exit.guard
@@ -20,7 +21,6 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
; GCN-NEXT: .LBB0_2: ; %bb1
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB0_4 Depth 2
-; GCN-NEXT: s_mov_b32 s11, s7
; GCN-NEXT: buffer_load_dword v1, off, s[8:11], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, v1
diff --git a/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll b/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll
index 867ce3b930f8fc..69ba266227265c 100644
--- a/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll
+++ b/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll
@@ -69,6 +69,7 @@ b18: ; preds = %b16, %b7
br label %b22
b21: ; preds = %b22
+ store volatile <64 x i32> %v20, ptr null
tail call void @sammy() #3
br label %b7
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
index 604fd50baadebb..c088f6d862e7cb 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
@@ -372,8 +372,8 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
; CHECK-NEXT: vlgvg %r1, %v3, 1
; CHECK-NEXT: vlgvg %r0, %v3, 0
-; CHECK-NEXT: vlgvg %r5, %v4, 1
-; CHECK-NEXT: vlgvg %r4, %v4, 0
+; CHECK-NEXT: vlgvg %r5, %v5, 1
+; CHECK-NEXT: vlgvg %r4, %v5, 0
; CHECK-NEXT: cdsg %r0, %r4, 0(%r3)
; CHECK-NEXT: vlvgp %v3, %r0, %r1
; CHECK-NEXT: je .LBB12_8
@@ -386,19 +386,19 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
; CHECK-NEXT: vchlgs %v4, %v3, %v0
; CHECK-NEXT: .LBB12_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
-; CHECK-NEXT: vlr %v5, %v0
+; CHECK-NEXT: vlr %v4, %v0
; CHECK-NEXT: jl .LBB12_6
; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
-; CHECK-NEXT: vaq %v5, %v3, %v1
+; CHECK-NEXT: vaq %v4, %v3, %v1
; CHECK-NEXT: .LBB12_6: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
-; CHECK-NEXT: vceqgs %v4, %v3, %v2
-; CHECK-NEXT: vlr %v4, %v0
+; CHECK-NEXT: vceqgs %v5, %v3, %v2
+; CHECK-NEXT: vlr %v5, %v0
; CHECK-NEXT: je .LBB12_1
; CHECK-NEXT: # %bb.7: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
-; CHECK-NEXT: vlr %v4, %v5
+; CHECK-NEXT: vlr %v5, %v4
; CHECK-NEXT: j .LBB12_1
; CHECK-NEXT: .LBB12_8: # %atomicrmw.end
; CHECK-NEXT: vst %v3, 0(%r2), 3
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, if this gets the same code generated, this is fine with me. Should emitExt128
also use REG_SEQUENCE
, then?
PAIR128 should probably just be removed entirely
2dc6644
to
dd64498
Compare
PAIR128 should probably just be removed entirely
Depends #90638