diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1cf95c5b522f12..c56c69d723b514 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -598,12 +598,21 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .widenScalarToNextPow2(0, 32) .widenScalarToNextPow2(1, 32); - getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE}) + getActionDefinitionsBuilder(G_BITREVERSE) .legalFor({S32}) .clampScalar(0, S32, S32) .scalarize(0); if (ST.has16BitInsts()) { + getActionDefinitionsBuilder(G_BSWAP) + .legalFor({S16, S32, V2S16}) + .clampMaxNumElements(0, S16, 2) + // FIXME: Fixing non-power-of-2 before clamp is workaround for + // narrowScalar limitation. + .widenScalarToNextPow2(0) + .clampScalar(0, S16, S32) + .scalarize(0); + if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX}) .legalFor({S32, S16, V2S16}) @@ -620,6 +629,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); } } else { + // TODO: Should have same legality without v_perm_b32 + getActionDefinitionsBuilder(G_BSWAP) + .legalFor({S32}) + .lowerIf(narrowerThan(0, 32)) + // FIXME: Fixing non-power-of-2 before clamp is workaround for + // narrowScalar limitation. + .widenScalarToNextPow2(0) + .maxScalar(0, S32) + .scalarize(0) + .lower(); + getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX}) .legalFor({S32}) .clampScalar(0, S32, S32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll index a374369478d123..b14b984ff9ac10 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll @@ -341,32 +341,26 @@ define <2 x i64> @v_bswap_v2i64(<2 x i64> %src) { define amdgpu_ps i16 @s_bswap_i16(i16 inreg %src) { ; GFX7-LABEL: s_bswap_i16: ; GFX7: ; %bb.0: -; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8 -; GFX7-NEXT: v_alignbit_b32 v1, s0, s0, 24 -; GFX7-NEXT: s_mov_b32 s0, 0xff00ff -; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: s_lshl_b32 s1, s0, 8 +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_lshr_b32 s0, s0, 8 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_bswap_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: s_mov_b32 s0, 0x10203 +; GFX8-NEXT: s_mov_b32 s0, 0xc0c0001 ; GFX8-NEXT: v_perm_b32 v0, 0, v0, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_bswap_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: s_mov_b32 s0, 0x10203 +; GFX9-NEXT: s_mov_b32 s0, 0xc0c0001 ; GFX9-NEXT: v_perm_b32 v0, 0, v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff -; GFX9-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog %bswap = call i16 @llvm.bswap.i16(i16 %src) @@ -380,27 +374,24 @@ define i16 @v_bswap_i16(i16 %src) { ; GFX7-LABEL: v_bswap_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8 -; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24 -; GFX7-NEXT: s_mov_b32 s4, 0xff00ff -; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_bswap_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x10203 +; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001 ; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_bswap_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0x10203 +; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001 ; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] %bswap = call i16 @llvm.bswap.i16(i16 %src) ret i16 %bswap @@ -409,51 +400,34 @@ define i16 @v_bswap_i16(i16 %src) { define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) { ; GFX7-LABEL: s_bswap_v2i16: ; GFX7: ; %bb.0: -; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8 -; GFX7-NEXT: v_alignbit_b32 v1, s0, s0, 24 -; GFX7-NEXT: s_mov_b32 s0, 0xff00ff -; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0 -; GFX7-NEXT: v_alignbit_b32 v1, s1, s1, 8 -; GFX7-NEXT: v_alignbit_b32 v2, s1, s1, 24 -; GFX7-NEXT: v_bfi_b32 v1, s0, v2, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: s_mov_b32 s0, 0xffff -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xffff +; GFX7-NEXT: s_lshl_b32 s2, s0, 8 +; GFX7-NEXT: s_and_b32 s0, s0, s3 +; GFX7-NEXT: s_lshr_b32 s0, s0, 8 +; GFX7-NEXT: s_or_b32 s0, s0, s2 +; GFX7-NEXT: s_lshl_b32 s2, s1, 8 +; GFX7-NEXT: s_and_b32 s1, s1, s3 +; GFX7-NEXT: s_lshr_b32 s1, s1, 8 +; GFX7-NEXT: s_or_b32 s1, s1, s2 +; GFX7-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_bswap_v2i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: s_mov_b32 s0, 0x10203 -; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_mov_b32 s0, 0x2030001 ; GFX8-NEXT: v_perm_b32 v0, 0, v0, s0 -; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX8-NEXT: v_perm_b32 v1, 0, v1, s0 -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_bswap_v2i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_lshr_b32 s1, s0, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: s_mov_b32 s0, 0x10203 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_perm_b32 v1, 0, v1, s0 +; GFX9-NEXT: s_mov_b32 s0, 0x2030001 ; GFX9-NEXT: v_perm_b32 v0, 0, v0, s0 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog %bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src) @@ -466,30 +440,25 @@ define i32 @v_bswap_i16_zext_to_i32(i16 %src) { ; GFX7-LABEL: v_bswap_i16_zext_to_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8 -; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24 -; GFX7-NEXT: s_mov_b32 s4, 0xff00ff -; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 0, 16 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_bswap_i16_zext_to_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x10203 +; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001 ; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_bswap_i16_zext_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0x10203 +; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001 ; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff -; GFX9-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: s_setpc_b64 s[30:31] %bswap = call i16 @llvm.bswap.i16(i16 %src) %zext = zext i16 %bswap to i32 @@ -500,29 +469,26 @@ define i32 @v_bswap_i16_sext_to_i32(i16 %src) { ; GFX7-LABEL: v_bswap_i16_sext_to_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8 -; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24 -; GFX7-NEXT: s_mov_b32 s4, 0xff00ff -; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_bswap_i16_sext_to_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x10203 +; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001 ; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_bswap_i16_sext_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0x10203 +; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001 ; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX9-NEXT: s_setpc_b64 s[30:31] %bswap = call i16 @llvm.bswap.i16(i16 %src) @@ -534,42 +500,29 @@ define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) { ; GFX7-LABEL: v_bswap_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_alignbit_b32 v2, v0, v0, 8 -; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24 -; GFX7-NEXT: s_mov_b32 s4, 0xff00ff -; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v2 -; GFX7-NEXT: v_alignbit_b32 v2, v1, v1, 8 -; GFX7-NEXT: v_alignbit_b32 v1, v1, v1, 24 -; GFX7-NEXT: v_bfi_b32 v1, s4, v1, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_bswap_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: s_mov_b32 s4, 0x10203 +; GFX8-NEXT: s_mov_b32 s4, 0x2030001 ; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4 -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_bswap_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX9-NEXT: s_mov_b32 s4, 0x10203 -; GFX9-NEXT: v_perm_b32 v1, 0, v1, s4 +; GFX9-NEXT: s_mov_b32 s4, 0x2030001 ; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] %bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src) ret <2 x i16> %bswap @@ -581,6 +534,46 @@ define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) { ; ret <3 x i16> %bswap ; } +define i64 @v_bswap_i48(i64 %src) { +; GFX7-LABEL: v_bswap_i48: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_alignbit_b32 v2, v1, v1, 8 +; GFX7-NEXT: v_alignbit_b32 v1, v1, v1, 24 +; GFX7-NEXT: s_mov_b32 s4, 0xff00ff +; GFX7-NEXT: v_bfi_b32 v1, s4, v1, v2 +; GFX7-NEXT: v_alignbit_b32 v2, v0, v0, 8 +; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24 +; GFX7-NEXT: v_bfi_b32 v2, s4, v0, v2 +; GFX7-NEXT: v_lshr_b64 v[0:1], v[1:2], 16 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_bswap_i48: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, 0x10203 +; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4 +; GFX8-NEXT: v_perm_b32 v2, 0, v0, s4 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2] +; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_bswap_i48: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s4, 0x10203 +; GFX9-NEXT: v_perm_b32 v1, 0, v1, s4 +; GFX9-NEXT: v_perm_b32 v2, 0, v0, s4 +; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2] +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %trunc = trunc i64 %src to i48 + %bswap = call i48 @llvm.bswap.i48(i48 %trunc) + %zext = zext i48 %bswap to i64 + ret i64 %zext +} + declare i32 @llvm.amdgcn.readfirstlane(i32) #0 declare i16 @llvm.bswap.i16(i16) #1 declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1 @@ -589,6 +582,7 @@ declare i32 @llvm.bswap.i32(i32) #1 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) #1 declare i64 @llvm.bswap.i64(i64) #1 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #1 +declare i48 @llvm.bswap.i48(i48) #1 attributes #0 = { convergent nounwind readnone } attributes #1 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir index a7c4773c20d1c0..2939c599646a82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s --- name: bswap_s8 @@ -7,14 +8,32 @@ name: bswap_s8 body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: bswap_s8 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY2]](s32) + ; GFX7-LABEL: name: bswap_s8 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32) + ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND1]](s32) + ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] + ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX7: $vgpr0 = COPY [[COPY7]](s32) + ; GFX8-LABEL: name: bswap_s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]] + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[BSWAP]], [[C]](s16) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(s8) = G_BSWAP %1 @@ -28,14 +47,27 @@ name: bswap_s16 body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: bswap_s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY2]](s32) + ; GFX7-LABEL: name: bswap_s16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: bswap_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BSWAP]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_BSWAP %1 @@ -49,14 +81,32 @@ name: bswap_s24 body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: bswap_s24 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY2]](s32) + ; GFX7-LABEL: name: bswap_s24 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32) + ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND1]](s32) + ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] + ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX7: $vgpr0 = COPY [[COPY7]](s32) + ; GFX8-LABEL: name: bswap_s24 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: $vgpr0 = COPY [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s24) = G_TRUNC %0 %2:_(s24) = G_BSWAP %1 @@ -70,10 +120,14 @@ name: bswap_s32 body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: bswap_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] - ; CHECK: $vgpr0 = COPY [[BSWAP]](s32) + ; GFX7-LABEL: name: bswap_s32 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] + ; GFX7: $vgpr0 = COPY [[BSWAP]](s32) + ; GFX8-LABEL: name: bswap_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] + ; GFX8: $vgpr0 = COPY [[BSWAP]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_BSWAP %0 $vgpr0 = COPY %1 @@ -85,45 +139,259 @@ name: bswap_v2s16 body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: bswap_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[COPY3]] - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX7-LABEL: name: bswap_v2s16 + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX7: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX7: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-LABEL: name: bswap_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[COPY]] + ; GFX8: $vgpr0 = COPY [[BSWAP]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_BSWAP %0 $vgpr0 = COPY %1 ... +--- +name: bswap_v3s16 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX7-LABEL: name: bswap_v3s16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY4]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; GFX7: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY8]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] + ; GFX7: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[COPY10]](s32) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; GFX7: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY12]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] + ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX7: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX7: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX7: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX8-LABEL: name: bswap_v3s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX8: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[EXTRACT2]] + ; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[BSWAP]](<2 x s16>), 0 + ; GFX8: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; GFX8: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; GFX8: [[BSWAP1:%[0-9]+]]:_(s16) = G_BSWAP [[COPY6]] + ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT3]](<3 x s16>), 0 + ; GFX8: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT3]], [[BSWAP1]](s16), 32 + ; GFX8: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT4]](<4 x s16>), 0 + ; GFX8: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT4]](<3 x s16>), 0 + ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>) + ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX8: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX8: $vgpr0 = COPY [[COPY7]](s32) + ; GFX8: $vgpr1 = COPY [[COPY8]](s32) + ; GFX8: $vgpr2 = COPY [[COPY9]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + + %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5 + %7:_(<3 x s16>) = G_BSWAP %6 + %8:_(s16), %9:_(s16), %10:_(s16) = G_UNMERGE_VALUES %7 + %11:_(s32) = G_ANYEXT %8 + %12:_(s32) = G_ANYEXT %9 + %13:_(s32) = G_ANYEXT %10 + $vgpr0 = COPY %11 + $vgpr1 = COPY %12 + $vgpr2 = COPY %13 +... + +--- +name: bswap_v4s16 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX7-LABEL: name: bswap_v4s16 + ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] + ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; GFX7: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] + ; GFX7: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[COPY8]](s32) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; GFX7: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; GFX7: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY10]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX7: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] + ; GFX7: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX7: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[COPY12]](s32) + ; GFX7: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; GFX7: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX7: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]] + ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY14]](s32) + ; GFX7: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX7: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]] + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX7: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX7: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX7: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX7: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-LABEL: name: bswap_v4s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV]] + ; GFX8: [[BSWAP1:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV1]] + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BSWAP]](<2 x s16>), [[BSWAP1]](<2 x s16>) + ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = G_BSWAP %0 + $vgpr0_vgpr1 = COPY %1 +... + --- name: bswap_v2s32 body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: bswap_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-LABEL: name: bswap_v2s32 + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: bswap_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_BSWAP %0 $vgpr0_vgpr1 = COPY %1 @@ -135,14 +403,58 @@ name: bswap_s64 body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: bswap_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] - ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX7-LABEL: name: bswap_s64 + ; GFX7: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; GFX7: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX7: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-LABEL: name: bswap_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_BSWAP %0 $vgpr0_vgpr1 = COPY %1 ... + +--- +name: bswap_v2s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7-LABEL: name: bswap_v2s64 + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]] + ; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]] + ; GFX7: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX7: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]] + ; GFX7: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]] + ; GFX7: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: bswap_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]] + ; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]] + ; GFX8: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = G_BSWAP %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +...