diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp index 62172a0bb89db..04d9b818a6287 100644 --- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp +++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp @@ -270,15 +270,15 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) { continue; Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); - if (Dst.isVirtual() && - MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass && - Src.isPhysical() && + const TargetRegisterClass *DstRC = TRI->getRegClassForReg(*MRI, Dst); + bool IsDst16Bit = DstRC == &AMDGPU::VGPR_16RegClass || + DstRC == &AMDGPU::VGPR_16_Lo128RegClass; + if (Dst.isVirtual() && IsDst16Bit && Src.isPhysical() && TRI->getRegClassForReg(*MRI, Src) == &AMDGPU::VGPR_32RegClass) MRI->setRegAllocationHint(Dst, 0, TRI->getSubReg(Src, AMDGPU::lo16)); if (Src.isVirtual() && MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass && - Dst.isPhysical() && - TRI->getRegClassForReg(*MRI, Dst) == &AMDGPU::VGPR_32RegClass) + Dst.isPhysical() && DstRC == &AMDGPU::VGPR_32RegClass) MRI->setRegAllocationHint(Src, 0, TRI->getSubReg(Dst, AMDGPU::lo16)); if (!Dst.isVirtual() || !Src.isVirtual()) continue; @@ -287,8 +287,7 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) { MRI->setRegAllocationHint(Dst, AMDGPURI::Size32, Src); MRI->setRegAllocationHint(Src, AMDGPURI::Size16, Dst); } - if (MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass && - MRI->getRegClass(Src) == &AMDGPU::VGPR_32RegClass) + if (IsDst16Bit && MRI->getRegClass(Src) == &AMDGPU::VGPR_32RegClass) MRI->setRegAllocationHint(Dst, AMDGPURI::Size16, Src); } } diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll index f84dbce8f5b56..8d1d08d1e7c68 100644 --- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll @@ -205,9 +205,7 @@ define half @test_fmaak(half %x, half %y, half %z) { ; GFX11-SDAG-TRUE16-LABEL: test_fmaak: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x4200 +; GFX11-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v1.l, 0x4200 ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: test_fmaak: @@ -235,9 +233,7 @@ define half @test_fmaak(half %x, half %y, half %z) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x4200 +; GFX12-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v1.l, 0x4200 ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-FAKE16-LABEL: test_fmaak: @@ -298,9 +294,7 @@ define half @test_fmamk(half %x, half %y, half %z) { ; GFX11-SDAG-TRUE16-LABEL: test_fmamk: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v2.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: test_fmamk: @@ -330,9 +324,7 @@ define half @test_fmamk(half %x, half %y, half %z) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v2.l ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-FAKE16-LABEL: test_fmamk: diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll index b25b9b994ea09..95d2f07402dd4 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll @@ -3815,8 +3815,7 @@ define half @v_fma_mul_add_32_f16(half %x, half %y) { ; GFX11-SDAG-TRUE16-LABEL: v_fma_mul_add_32_f16: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x5000, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x5000, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: v_fma_mul_add_32_f16: @@ -3915,8 +3914,7 @@ define half @v_fma_mul_add_neg32_f16(half %x, half %y) { ; GFX11-SDAG-TRUE16-LABEL: v_fma_mul_add_neg32_f16: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0xd000, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0xd000, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: v_fma_mul_add_neg32_f16: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index 75721ee3800f6..385d76bc42bda 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -485,9 +485,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_a( ; GFX11-DENORM-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s8, s0 ; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s9, s1 -; GFX11-DENORM-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h +; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v1.l ; GFX11-DENORM-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-DENORM-TRUE16-NEXT: s_endpgm ; @@ -719,9 +717,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; GFX11-DENORM-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s8, s0 ; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s9, s1 -; GFX11-DENORM-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h +; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v1.l ; GFX11-DENORM-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-DENORM-TRUE16-NEXT: s_endpgm ;