diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 1e3562b37d87c..e1647b76702c4 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -902,14 +902,28 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, // really much we can do to fix this. // Some special instructions use M0 as an input. Some even only use // the first lane. Insert a readfirstlane and hope for the best. - if (DstReg == AMDGPU::M0 && - TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) { + const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg); + if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) { Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), - TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg) + + const MCInstrDesc &ReadFirstLaneDesc = + TII->get(AMDGPU::V_READFIRSTLANE_B32); + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), ReadFirstLaneDesc, TmpReg) .add(MI.getOperand(1)); + + unsigned SubReg = MI.getOperand(1).getSubReg(); MI.getOperand(1).setReg(TmpReg); + MI.getOperand(1).setSubReg(AMDGPU::NoSubRegister); + + const TargetRegisterClass *OpRC = TII->getRegClass(ReadFirstLaneDesc, 1); + const TargetRegisterClass *ConstrainRC = + SubReg == AMDGPU::NoSubRegister + ? OpRC + : TRI->getMatchingSuperRegClass(SrcRC, OpRC, SubReg); + + if (!MRI->constrainRegClass(SrcReg, ConstrainRC)) + llvm_unreachable("failed to constrain register"); } else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(), MI, MI.getDebugLoc())) { I = std::next(I); diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll index b05b89fe503f2..116f46df01049 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll @@ -49,4 +49,19 @@ bb16: ; preds = %bb16, %bb br label %bb16 } - +define void @av_class_to_m0(ptr addrspace(1) %ptr) { +; CHECK-LABEL: av_class_to_m0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: global_load_dword v0, v[0:1], off +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s4, v0 +; CHECK-NEXT: s_mov_b32 m0, s4 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use m0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %load = load i32, ptr addrspace(1) %ptr + call void asm sideeffect "; use $0", "{m0}"(i32 %load) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir index ac4f41282ab73..03e3ff95bbad2 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir @@ -90,3 +90,22 @@ body: | S_ENDPGM 0 ... +--- +name: constrain_readfirstlane_av64_subreg_m0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: constrain_readfirstlane_av64_subreg_m0 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec + ; CHECK-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_]] + %0:sreg_32 = IMPLICIT_DEF + %1:av_64 = COPY $vgpr0_vgpr1 + $m0 = COPY %1.sub0 +... +