diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index 907f8300de6d2..396d64625fb5c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) { if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc)))) return RALSrc; + // RALSrc = G_ANYEXT S16Src + // TruncSrc = G_AMDGPU_READANYLANE RALSrc + // Src = G_TRUNC TruncSrc + if (mi_match(Src, MRI, + m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) { + return RALSrc; + } + // TruncSrc = G_AMDGPU_READANYLANE RALSrc // AextSrc = G_TRUNC TruncSrc // Src = G_ANYEXT AextSrc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll index 4361e5c113708..27005e7aa175e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll @@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a ; GFX11-True16-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-True16-NEXT: ds_load_u16_d16 v1, v1 ; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-True16-NEXT: v_readfirstlane_b32 s0, v1 -; GFX11-True16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-True16-NEXT: v_mov_b16_e32 v1.l, s0 ; GFX11-True16-NEXT: ds_store_b16 v0, v1 ; GFX11-True16-NEXT: s_endpgm ; @@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a ; GFX12-True16-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-True16-NEXT: ds_load_u16_d16 v1, v1 ; GFX12-True16-NEXT: s_wait_dscnt 0x0 -; GFX12-True16-NEXT: v_readfirstlane_b32 s0, v1 -; GFX12-True16-NEXT: s_wait_alu 0xf1ff -; GFX12-True16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-True16-NEXT: v_mov_b16_e32 v1.l, s0 ; GFX12-True16-NEXT: ds_store_b16 v0, v1 ; GFX12-True16-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll index bf36deac33380..9bf140cf744db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll @@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mov_b16_e32 v2.l, s0 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_endpgm ; @@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mov_b16_e32 v2.l, s0 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_endpgm ;