diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 2216acf128bfa..60cd9d4c3c35a 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -152,6 +152,13 @@ class SIFixSGPRCopies : public MachineFunctionPass { void processPHINode(MachineInstr &MI); + // Check if MO is an immediate materialized into a VGPR, and if so replace it + // with an SGPR immediate. The VGPR immediate is also deleted if it does not + // have any other uses. + bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst, + MachineBasicBlock *BlockToInsertTo, + MachineBasicBlock::iterator PointToInsertTo); + StringRef getPassName() const override { return "SI Fix SGPR copies"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -662,13 +669,17 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { : MBB; MachineBasicBlock::iterator PointToInsertCopy = MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I; - MachineInstr *NewCopy = - BuildMI(*BlockToInsertCopy, PointToInsertCopy, - PointToInsertCopy->getDebugLoc(), - TII->get(AMDGPU::COPY), NewDst) - .addReg(MO.getReg()); - MO.setReg(NewDst); - analyzeVGPRToSGPRCopy(NewCopy); + + if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy, + PointToInsertCopy)) { + MachineInstr *NewCopy = + BuildMI(*BlockToInsertCopy, PointToInsertCopy, + PointToInsertCopy->getDebugLoc(), + TII->get(AMDGPU::COPY), NewDst) + .addReg(MO.getReg()); + MO.setReg(NewDst); + analyzeVGPRToSGPRCopy(NewCopy); + } } } } @@ -829,6 +840,32 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) { } } +bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR( + MachineOperand &MaybeVGPRConstMO, Register DstReg, + MachineBasicBlock *BlockToInsertTo, + MachineBasicBlock::iterator PointToInsertTo) { + + MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg()); + if (!DefMI || !DefMI->isMoveImmediate()) + return false; + + MachineOperand *SrcConst = TII->getNamedOperand(*DefMI, AMDGPU::OpName::src0); + if (SrcConst->isReg()) + return false; + + const TargetRegisterClass *SrcRC = + MRI->getRegClass(MaybeVGPRConstMO.getReg()); + unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC); + unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; + BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(), + TII->get(MoveOp), DstReg) + .add(*SrcConst); + if (MRI->hasOneUse(MaybeVGPRConstMO.getReg())) + DefMI->eraseFromParent(); + MaybeVGPRConstMO.setReg(DstReg); + return true; +} + bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, MachineBasicBlock::iterator &I) { Register DstReg = MI.getOperand(0).getReg(); @@ -846,25 +883,10 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg) .add(MI.getOperand(1)); MI.getOperand(1).setReg(TmpReg); - } else { - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI && DefMI->isMoveImmediate()) { - MachineOperand SrcConst = DefMI->getOperand(AMDGPU::getNamedOperandIdx( - DefMI->getOpcode(), AMDGPU::OpName::src0)); - if (!SrcConst.isReg()) { - const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg); - unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC); - unsigned MoveOp = - MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; - BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MoveOp), - DstReg) - .add(SrcConst); - I = std::next(I); - if (MRI->hasOneUse(SrcReg)) - DefMI->eraseFromParent(); - MI.eraseFromParent(); - } - } + } else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(), + MI)) { + I = std::next(I); + MI.eraseFromParent(); } return true; } diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir index 36fa95c4c3ab5..ff02c6ba698a3 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir @@ -125,7 +125,62 @@ body: | %2:sreg_32 = S_MOV_B32 1 %3:sreg_32 = COPY %1:vgpr_32 %4:sreg_32 = S_CSELECT_B32 killed %2:sreg_32, killed %3:sreg_32, implicit undef $scc +... + +--- +# Test that the VGPR immediate is replaced with an SGPR one. +# GCN-LABEL: name: reg_sequence_vgpr_immediate +# GCN: [[A_SGPR:%[0-9]+]]:sreg_32 = IMPLICIT_DEF +# GCN-NEXT: [[VGPR_CONST:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 37 +# GCN-NEXT: [[SGPR_CONST:%[0-9]+]]:sgpr_32 = S_MOV_B32 37 +# GCN-NEXT: {{%[0-9]+}}:sreg_64 = REG_SEQUENCE [[SGPR_CONST]], %subreg.sub0, [[A_SGPR]], %subreg.sub1 +name: reg_sequence_vgpr_immediate +body: | + bb.0: + %0:sreg_32 = IMPLICIT_DEF + %1:vgpr_32 = V_MOV_B32_e32 37, implicit $exec + %2:sreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %0:sreg_32, %subreg.sub1 + + %3:vgpr_32 = V_ADD_U32_e32 %1:vgpr_32, %1:vgpr_32, implicit $exec +... + +--- +# GCN-LABEL: name: insert_subreg_vgpr_immediate +# GCN: [[DST:%[0-9]+]]:sgpr_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr0, %subreg.sub2 +# GCN-NEXT: [[SGPR_CONST:%[0-9]+]]:sgpr_32 = S_MOV_B32 43 +# GCN-NEXT: {{%[0-9]+}}:sgpr_128 = INSERT_SUBREG [[DST]], [[SGPR_CONST]], %subreg.sub3 +name: insert_subreg_vgpr_immediate +body: | + bb.0: + %0:sgpr_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr0, %subreg.sub2 + %1:vgpr_32 = V_MOV_B32_e32 43, implicit $exec + %2:sgpr_128 = INSERT_SUBREG %0, %1, %subreg.sub3 +... + --- +# GCN-LABEL: name: phi_vgpr_immediate +# GCN: bb.1: +# GCN: [[SGPR:%[0-9]+]]:sgpr_32 = S_MOV_B32 51 +# GCN: bb.2: +# GCN: IMPLICIT_DEF +# GCN: bb.3: +# GCN: sreg_32 = PHI [[SGPR]], %bb.1 +name: phi_vgpr_immediate +tracksRegLiveness: true +body: | + bb.0: + S_CBRANCH_SCC1 %bb.2, implicit undef $scc + + bb.1: + %0:vgpr_32 = V_MOV_B32_e32 51, implicit $exec + S_BRANCH %bb.3 + + bb.2: + %1:sreg_32 = IMPLICIT_DEF + S_BRANCH %bb.3 + + bb.3: + %2:sreg_32 = PHI %0:vgpr_32, %bb.1, %1:sreg_32, %bb.2 --- name: cmp_f32