diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2fb3957a1ca9d..7afae3f683c1a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7198,6 +7198,18 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, Register DstReg = Inst.getOperand(0).getReg(); const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); + // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and + // hope for the best. + if (Inst.isCopy() && DstReg.isPhysical() && + RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { + // TODO: Only works for 32 bit registers. + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), + get(AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand(0).getReg()) + .add(Inst.getOperand(1)); + Inst.eraseFromParent(); + return; + } + if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register diff --git a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll index ac196635b363a..5c1a709372042 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll @@ -4,19 +4,20 @@ define amdgpu_cs <2 x i32> @f() { ; CHECK-LABEL: f: ; CHECK: ; %bb.0: ; %bb -; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s1, s0 -; CHECK-NEXT: s_mov_b32 s2, s0 -; CHECK-NEXT: s_mov_b32 s3, s0 -; CHECK-NEXT: s_mov_b32 s4, s0 -; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; CHECK-NEXT: s_mov_b32 s5, s0 -; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_mov_b32 s5, s4 +; CHECK-NEXT: s_mov_b32 s6, s4 +; CHECK-NEXT: s_mov_b32 s7, s4 +; CHECK-NEXT: s_mov_b32 s0, s4 +; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; CHECK-NEXT: s_mov_b32 s1, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; CHECK-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], 0 +; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v1, s4 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CHECK-NEXT: ; return to shader part epilog bb: %i = call <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir new file mode 100644 index 0000000000000..4292e76f37096 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -march=amdgcn -mcpu=tonga -run-pass=si-fix-sgpr-copies --verify-machineinstrs -o - %s | FileCheck %s + +# Copy to $sgpr0 is disconnected and becomes an IMPLICIT_DEF +# Inserted V_AND_B32 defines virtual register after use. + +--- +name: si_fix_sgpr_copies_breaks_function +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: si_fix_sgpr_copies_breaks_function + ; CHECK: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]] + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]] + ; CHECK-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed [[COPY1]], [[COPY2]], implicit $exec + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_2]], [[V_XOR_B32_e64_]], implicit $exec + ; CHECK-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[V_AND_B32_e64_]], implicit $exec + ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0 + %0:sgpr_32 = COPY $sgpr0 + %2:sreg_32 = S_MOV_B32 16 + %3:sreg_32 = S_LSHR_B32 %0, killed %2, implicit-def dead $scc + %4:sreg_32 = COPY killed %3 + %5:sreg_32 = S_MOV_B32 -32768 + %7:vgpr_32 = COPY killed %5 + %6:vgpr_32 = V_XOR_B32_e64 killed %4, %7, implicit $exec + %8:sreg_32 = S_MOV_B32 65535 + %10:sreg_32 = COPY %6 + %9:sreg_32 = S_AND_B32 killed %8, killed %10, implicit-def dead $scc + $sgpr0 = COPY %9 + SI_RETURN_TO_EPILOG $sgpr0 + +...