diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e5f0e3e631988..b1f8fdc210aff 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1313,6 +1313,34 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, return Reg; } +MachineInstr * +SIInstrInfo::pierceThroughRegSequence(const MachineInstr &MI) const { + if (MI.getOpcode() != AMDGPU::REG_SEQUENCE || MI.getNumOperands() != 5) + return nullptr; + + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + int64_t SubRegValues[2]; + bool SubRegIsConst[2]; + MachineInstr *RealDefs[2]; + for (unsigned I : {2, 4}) { + unsigned ArrayIdx = MI.getOperand(I).getImm() == AMDGPU::sub0 ? 0 : 1; + Register Subreg = MI.getOperand(I - 1).getReg(); + RealDefs[ArrayIdx] = MRI.getUniqueVRegDef(Subreg); + SubRegIsConst[ArrayIdx] = getConstValDefinedInReg( + *RealDefs[ArrayIdx], Subreg, SubRegValues[ArrayIdx]); + } + + for (unsigned I : {0, 1}) + if (SubRegIsConst[I] && !SubRegValues[I] && + MRI.getRegClass(RealDefs[(I + 1) % 2]->getOperand(0).getReg()) + ->MC->getSizeInBits() * + 2 == + MRI.getRegClass(MI.getOperand(0).getReg())->MC->getSizeInBits()) + return RealDefs[(I + 1) % 2]; + + return nullptr; +} + bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const { @@ -10698,6 +10726,9 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (!Def) return false; + if (MachineInstr *RegSequenceDef = pierceThroughRegSequence(*Def)) + Def = RegSequenceDef; + // For S_OP that set SCC = DST!=0, do the transformation // // s_cmp_lg_* (S_OP ...), 0 => (S_OP ...) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index c048b85b1e99a..1d5353bd225b2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -711,6 +711,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { } } + MachineInstr *pierceThroughRegSequence(const MachineInstr &MI) const; + static bool setsSCCifResultIsNonZero(const MachineInstr &MI) { switch (MI.getOpcode()) { case AMDGPU::S_ABSDIFF_I32: diff --git a/llvm/test/CodeGen/AMDGPU/redundant-cmp-reg-sequence.ll b/llvm/test/CodeGen/AMDGPU/redundant-cmp-reg-sequence.ll new file mode 100644 index 0000000000000..750a9027d47a4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/redundant-cmp-reg-sequence.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s +define amdgpu_ps i64 @ordertest(i64 inreg %val0) { +; CHECK-LABEL: ordertest: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_lshr_b32 s0, s1, 2 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] +; CHECK-NEXT: v_lshrrev_b64 v[0:1], v2, s[0:1] +; CHECK-NEXT: v_xor_b32_e32 v0, v2, v0 +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %shl = lshr i64 %val0, 34 + %result = and i64 %shl, 4294967295 + %cmp = icmp ne i64 %result, 0 + %zext = zext i1 %cmp to i64 + %param0 = lshr i64 %shl, %zext + %param = and i64 %param0, 4294967295 + %xory = xor i64 %zext, %param + ret i64 %xory +} diff --git a/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll index 0166d7ac7ddc2..1f965c16ef4f2 100644 --- a/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll +++ b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll @@ -403,8 +403,6 @@ define amdgpu_ps i32 @bfe_i64(i64 inreg %val0) { ; CHECK: ; %bb.0: ; CHECK-NEXT: s_bfe_i64 s[2:3], s[0:1], 0x80000 ; CHECK-NEXT: s_and_b32 s0, s0, 0xff -; CHECK-NEXT: s_mov_b32 s1, 0 -; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 @@ -440,7 +438,6 @@ define amdgpu_ps i32 @bfe_u64(i64 inreg %val0) { ; CHECK: ; %bb.0: ; CHECK-NEXT: s_and_b32 s0, s0, 0xff ; CHECK-NEXT: s_mov_b32 s1, 0 -; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:1] ; CHECK-NEXT: ;;#ASMEND @@ -522,7 +519,6 @@ define amdgpu_ps i32 @bcnt164(i64 inreg %val0) { ; CHECK: ; %bb.0: ; CHECK-NEXT: s_bcnt1_i32_b64 s0, s[0:1] ; CHECK-NEXT: s_mov_b32 s1, 0 -; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:1] ; CHECK-NEXT: ;;#ASMEND