diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index cdc1132579d8d..3b924d869a184 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3030,6 +3030,8 @@ def : GCNPat < // Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24) // The 12s emit 0s. +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in { def : GCNPat < (i16 (bswap i16:$a)), (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001))) @@ -3039,6 +3041,19 @@ def : GCNPat < (i32 (zext (bswap i16:$a))), (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001))) >; +} + +let True16Predicate = UseRealTrue16Insts in { +def : GCNPat < + (i16 (bswap i16:$a)), + (EXTRACT_SUBREG (V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001))), lo16) +>; + +def : GCNPat < + (i32 (zext (bswap i16:$a))), + (V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001))) +>; +} // Magic number: 1 | (0 << 8) | (3 << 16) | (2 << 24) def : GCNPat < diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll index 30c8e94c9a27f..a95a1aba0c914 100644 --- a/llvm/test/CodeGen/AMDGPU/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/bswap.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=amdgcn-- -verify-machineinstrs | FileCheck %s --check-prefix=SI ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11 +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 declare i16 @llvm.bswap.i16(i16) nounwind readnone declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone @@ -490,13 +491,21 @@ define float @missing_truncate_promote_bswap(i32 %arg) { ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: missing_truncate_promote_bswap: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-REAL16-LABEL: missing_truncate_promote_bswap: +; GFX11-REAL16: ; %bb.0: ; %bb +; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-REAL16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 +; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l +; GFX11-REAL16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: missing_truncate_promote_bswap: +; GFX11-FAKE16: ; %bb.0: ; %bb +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] bb: %tmp = trunc i32 %arg to i16 %tmp1 = call i16 @llvm.bswap.i16(i16 %tmp)