diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index ebfea8e5581bf..e17a214b9a27d 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -2051,6 +2051,12 @@ bool RegisterCoalescer::joinCopy( } if (CP.getNewRC()) { + if (RegClassInfo.getNumAllocatableRegs(CP.getNewRC()) == 0) { + LLVM_DEBUG(dbgs() << "\tNo " << TRI->getRegClassName(CP.getNewRC()) + << "are available for allocation\n"); + return false; + } + auto SrcRC = MRI->getRegClass(CP.getSrcReg()); auto DstRC = MRI->getRegClass(CP.getDstReg()); unsigned SrcIdx = CP.getSrcIdx(); diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll new file mode 100644 index 0000000000000..f4665137393b3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; Make sure the coalescer doesn't introduce any uses of +; vreg_1024. None are available to allocate with the register budget +; of this function. + +define void @no_introduce_vreg_1024() #0 { +; CHECK-LABEL: no_introduce_vreg_1024: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def v[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v9, v0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] + %tuple = call <8 x i32> asm sideeffect "; def $0","=v"() + %sub0 = extractelement <8 x i32> %tuple, i32 0 + %insert = insertelement <16 x i32> poison, i32 %sub0, i32 9 + call void asm sideeffect "; use $0","v"(<16 x i32> %insert) + ret void +} + +attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" } diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir new file mode 100644 index 0000000000000..1f414eb2d7868 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-avoid-coalesce-class-with-no-registers.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=register-coalescer -o - %s | FileCheck %s + +# The register budget for this function does not permit using 1024-bit +# registers. The coalescer should not introduce a 1024-bit virtual +# register which will fail to allocate. + +--- | + define void @no_introduce_vreg_1024() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="10,10" } +... +--- +name: no_introduce_vreg_1024 +tracksRegLiveness: true +machineFunctionInfo: + occupancy: 10 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: no_introduce_vreg_1024 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub9:vreg_512 = COPY [[COPY]].sub0 + ; CHECK-NEXT: SI_RETURN implicit [[COPY1]] + %0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + undef %1.sub9:vreg_512 = COPY %0.sub0 + SI_RETURN implicit %1 + +...