diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index b81a08de383d9..e36c57ad59bfd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -960,6 +960,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat) .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat); + addRulesForGOpcs({G_IS_FPCLASS}) + .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}}) + .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}}) + .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}}) + .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}}) + .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}}) + .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}}); + using namespace Intrinsic; addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}}); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index dd2cffd7bd161..dd19ba17bb292 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1,16 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s + +; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and +; moving those SGPRs into VGPRs. define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX7SELDAG-LABEL: sgpr_isnan_f16: @@ -34,48 +37,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff +; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00 -; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 -; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm ; -; GFX8CHECK-LABEL: sgpr_isnan_f16: -; GFX8CHECK: ; %bb.0: -; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 -; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 -; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] -; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 -; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 -; GFX8CHECK-NEXT: s_endpgm -; -; GFX9CHECK-LABEL: sgpr_isnan_f16: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f16: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_clause 0x1 -; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm +; GFX8SELDAG-LABEL: sgpr_isnan_f16: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8SELDAG-NEXT: s_endpgm +; +; GFX8GLISEL-LABEL: sgpr_isnan_f16: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GLISEL-NEXT: s_endpgm +; +; GFX9SELDAG-LABEL: sgpr_isnan_f16: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f16: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f16: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_clause 0x1 +; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s2, s2, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f16: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s2, s0, 3 +; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm ; ; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: @@ -103,26 +156,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; ; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: -; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1 -; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c -; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0 ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s0, v0.l +; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 vcc_lo, 0 +; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-TRUE16-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11GLISEL-TRUE16-NEXT: s_endpgm ; ; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16: ; GFX11GLISEL-FAKE16: ; %bb.0: -; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1 -; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s0, 3 ; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-FAKE16-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 ; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3 -; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11GLISEL-FAKE16-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) %sext = sext i1 %result to i32 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll index 4f5432a202058..0a9fe10874c38 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll @@ -1,14 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s + +; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and +; moving those SGPRs into VGPRs. define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) { ; GFX7SELDAG-LABEL: sgpr_isnan_f32: @@ -30,58 +33,132 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) { ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], s3, 3 -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 1 +; GFX7GLISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm ; -; GFX8CHECK-LABEL: sgpr_isnan_f32: -; GFX8CHECK: ; %bb.0: -; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 -; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 -; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] -; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 -; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 -; GFX8CHECK-NEXT: s_endpgm -; -; GFX9CHECK-LABEL: sgpr_isnan_f32: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f32: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_clause 0x1 -; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm -; -; GFX11CHECK-LABEL: sgpr_isnan_f32: -; GFX11CHECK: ; %bb.0: -; GFX11CHECK-NEXT: s_clause 0x1 -; GFX11CHECK-NEXT: s_load_b32 s2, s[4:5], 0x2c -; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 -; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11CHECK-NEXT: s_endpgm +; GFX8SELDAG-LABEL: sgpr_isnan_f32: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8SELDAG-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8SELDAG-NEXT: s_endpgm +; +; GFX8GLISEL-LABEL: sgpr_isnan_f32: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GLISEL-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GLISEL-NEXT: s_endpgm +; +; GFX9SELDAG-LABEL: sgpr_isnan_f32: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f32: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f32: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_clause 0x1 +; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f32: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f32_e64 s2, s0, 3 +; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm +; +; GFX11SELDAG-LABEL: sgpr_isnan_f32: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_clause 0x1 +; GFX11SELDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11SELDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11SELDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11SELDAG-NEXT: s_endpgm +; +; GFX11GLISEL-LABEL: sgpr_isnan_f32: +; GFX11GLISEL: ; %bb.0: +; GFX11GLISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: v_cmp_class_f32_e64 s2, s0, 3 +; GFX11GLISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11GLISEL-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan %sext = sext i1 %result to i32 store i32 %sext, ptr addrspace(1) %out, align 4 @@ -106,9 +183,14 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) { ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX7GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3] +; GFX7GLISEL-NEXT: v_cmp_class_f64_e64 s[4:5], s[2:3], 3 ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 1 +; GFX7GLISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm @@ -131,40 +213,92 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) { ; GFX8GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 ; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GLISEL-NEXT: s_endpgm ; -; GFX9CHECK-LABEL: sgpr_isnan_f64: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f64: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm -; -; GFX11CHECK-LABEL: sgpr_isnan_f64: -; GFX11CHECK: ; %bb.0: -; GFX11CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 -; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11CHECK-NEXT: s_endpgm +; GFX9SELDAG-LABEL: sgpr_isnan_f64: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f64: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f64: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f64: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm +; +; GFX11SELDAG-LABEL: sgpr_isnan_f64: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11SELDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11SELDAG-NEXT: s_endpgm +; +; GFX11GLISEL-LABEL: sgpr_isnan_f64: +; GFX11GLISEL: ; %bb.0: +; GFX11GLISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11GLISEL-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan %sext = sext i1 %result to i32 store i32 %sext, ptr addrspace(1) %out, align 4