From 6e4e5d08dffcf65c2e6abe8accde8cf7a071b7d6 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 19 Nov 2025 17:09:52 +0000 Subject: [PATCH] [AMDGPU] Precommit tests for V_CVT_PK_[IU]16_F32 --- llvm/test/CodeGen/AMDGPU/fp_to_sint.ll | 565 ++++++++++++++++++ llvm/test/CodeGen/AMDGPU/fp_to_uint.ll | 460 ++++++++++++++ llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll | 26 +- 3 files changed, 1047 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll index 0c5ed00b58d90..a2cd6d28e96cb 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn < %s | FileCheck %s --check-prefixes=SI ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s --check-prefixes=VI +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG declare float @llvm.fabs.f32(float) #1 @@ -28,6 +30,28 @@ define amdgpu_kernel void @fp_to_sint_i32(ptr addrspace(1) %out, float %in) { ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] @@ -67,6 +91,28 @@ define amdgpu_kernel void @fp_to_sint_i32_fabs(ptr addrspace(1) %out, float %in) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_i32_fabs: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e64 v1, |s2| +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_i32_fabs: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e64 v0, |s2| +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_i32_fabs: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] @@ -108,6 +154,26 @@ define amdgpu_kernel void @fp_to_sint_v2i32(ptr addrspace(1) %out, <2 x float> % ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_v2i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v1, s3 +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v0, s2 +; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_v2i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v1, s3 +; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_v2i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] @@ -157,6 +223,34 @@ define amdgpu_kernel void @fp_to_sint_v4i32(ptr addrspace(1) %out, ptr addrspace ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_v4i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v3, s7 +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v2, s6 +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v1, s5 +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v0, s4 +; GFX11-SDAG-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_v4i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, s4 +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v1, s5 +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v2, s6 +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v3, s7 +; GFX11-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_v4i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -234,6 +328,56 @@ define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) { ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_i64: +; GFX11-SDAG: ; %bb.0: ; %entry +; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, s0 +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| +; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 +; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_i64: +; GFX11-GISEL: ; %bb.0: ; %entry +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: s_ashr_i32 s2, s2, 31 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| +; GFX11-GISEL-NEXT: v_floor_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v1, |v0| +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, s2 +; GFX11-GISEL-NEXT: v_subrev_co_ci_u32_e64 v1, null, s2, v1, vcc_lo +; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_i64: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] @@ -357,6 +501,81 @@ define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> % ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_v2i64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v6, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, s3 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v1, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0| +; GFX11-SDAG-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1| +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_floor_f32_e32 v2, v2 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v3, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0| +; GFX11-SDAG-NEXT: v_fma_f32 v5, 0xcf800000, v3, |v1| +; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v0, 31, v0 +; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v1, 31, v1 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_xor_b32_e32 v7, v2, v0 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v4, v4, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_xor_b32_e32 v5, v5, v1 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v8, v3, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v0 +; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e64 v3, null, v7, v0, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v5, v1 +; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e64 v1, null, v8, v1, vcc_lo +; GFX11-SDAG-NEXT: global_store_b128 v6, v[0:3], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_v2i64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, s3 +; GFX11-GISEL-NEXT: s_ashr_i32 s2, s2, 31 +; GFX11-GISEL-NEXT: s_ashr_i32 s3, s3, 31 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0| +; GFX11-GISEL-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_floor_f32_e32 v2, v2 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v2, |v0| +; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v3, |v1| +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, s2, v2 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v3, s3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v4, s3, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, s2 +; GFX11-GISEL-NEXT: v_subrev_co_ci_u32_e64 v1, null, s2, v2, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v4, s3 +; GFX11-GISEL-NEXT: v_subrev_co_ci_u32_e64 v3, null, s3, v3, vcc_lo +; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_v2i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 74, @4, KC0[CB0:0-32], KC1[] @@ -559,6 +778,123 @@ define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> % ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_v4i64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v8, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, s1 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v2, s3 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v3, s2 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v1, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v0| +; GFX11-SDAG-NEXT: v_mul_f32_e64 v7, 0x2f800000, |v2| +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_mul_f32_e64 v11, 0x2f800000, |v3| +; GFX11-SDAG-NEXT: v_mul_f32_e64 v6, 0x2f800000, |v1| +; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v4, v4 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v7, v7 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v11, v11 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v6, v6 +; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v10, 31, v2 +; GFX11-SDAG-NEXT: v_fma_f32 v0, 0xcf800000, v4, |v0| +; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v7, |v2| +; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v12, 31, v3 +; GFX11-SDAG-NEXT: v_fma_f32 v3, 0xcf800000, v11, |v3| +; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v9, 31, v1 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: v_fma_f32 v1, 0xcf800000, v6, |v1| +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v13, v4 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v4, v6 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v6, v7 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v5 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v7, v11 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v11, v13, v5 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v13, v4, v9 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v4, v6, v10 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v6, v2, v10 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v15, v3, v12 +; GFX11-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v5 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v14, v7, v12 +; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e64 v3, null, v11, v5, vcc_lo +; GFX11-SDAG-NEXT: v_sub_co_u32 v6, vcc_lo, v6, v10 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e64 v7, null, v4, v10, vcc_lo +; GFX11-SDAG-NEXT: v_sub_co_u32 v4, vcc_lo, v15, v12 +; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e64 v5, null, v14, v12, vcc_lo +; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v9 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e64 v1, null, v13, v9, vcc_lo +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_store_b128 v8, v[4:7], s[4:5] offset:16 +; GFX11-SDAG-NEXT: global_store_b128 v8, v[0:3], s[4:5] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_v4i64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v8, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v0, s0 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, s1 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, s2 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v3, s3 +; GFX11-GISEL-NEXT: s_ashr_i32 s0, s0, 31 +; GFX11-GISEL-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v0| +; GFX11-GISEL-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v1| +; GFX11-GISEL-NEXT: v_mul_f32_e64 v6, 0x2f800000, |v2| +; GFX11-GISEL-NEXT: v_mul_f32_e64 v7, 0x2f800000, |v3| +; GFX11-GISEL-NEXT: s_ashr_i32 s1, s1, 31 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v4, v4 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v5, v5 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v6, v6 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v7, v7 +; GFX11-GISEL-NEXT: s_ashr_i32 s2, s2, 31 +; GFX11-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v4, |v0| +; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v5, |v1| +; GFX11-GISEL-NEXT: v_fma_f32 v2, 0xcf800000, v6, |v2| +; GFX11-GISEL-NEXT: v_fma_f32 v3, 0xcf800000, v7, |v3| +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v4, s0, v4 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v9, s1, v1 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX11-GISEL-NEXT: s_ashr_i32 s3, s3, 31 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v5, s1, v5 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v10, s2, v2 +; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, s0 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v6, s2, v6 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v11, s3, v3 +; GFX11-GISEL-NEXT: v_subrev_co_ci_u32_e64 v1, null, s0, v4, vcc_lo +; GFX11-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v9, s1 +; GFX11-GISEL-NEXT: v_xor_b32_e32 v7, s3, v7 +; GFX11-GISEL-NEXT: v_subrev_co_ci_u32_e64 v3, null, s1, v5, vcc_lo +; GFX11-GISEL-NEXT: v_sub_co_u32 v4, vcc_lo, v10, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_subrev_co_ci_u32_e64 v5, null, s2, v6, vcc_lo +; GFX11-GISEL-NEXT: v_sub_co_u32 v6, vcc_lo, v11, s3 +; GFX11-GISEL-NEXT: v_subrev_co_ci_u32_e64 v7, null, s3, v7, vcc_lo +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_store_b128 v8, v[0:3], s[4:5] +; GFX11-GISEL-NEXT: global_store_b128 v8, v[4:7], s[4:5] offset:16 +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_v4i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[] @@ -754,6 +1090,32 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_f32_to_i1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_f32_e64 s2, -1.0, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 +; GFX11-SDAG-NEXT: global_store_b8 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_f32_to_i1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-GISEL-NEXT: global_store_b8 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_f32_to_i1: ; EG: ; %bb.0: ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -804,6 +1166,32 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, floa ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_fabs_f32_to_i1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_f32_e64 s2, -1.0, |s2| +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 +; GFX11-SDAG-NEXT: global_store_b8 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_fabs_f32_to_i1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e64 v0, |s2| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-GISEL-NEXT: global_store_b8 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_fabs_f32_to_i1: ; EG: ; %bb.0: ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -853,6 +1241,28 @@ define amdgpu_kernel void @fp_to_sint_f32_i16(ptr addrspace(1) %out, float %in) ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_sint_f32_i16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_f32_i16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_sint_f32_i16: ; EG: ; %bb.0: ; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] @@ -879,5 +1289,160 @@ define amdgpu_kernel void @fp_to_sint_f32_i16(ptr addrspace(1) %out, float %in) ret void } +define amdgpu_kernel void @fp_to_sint_v2f32_to_v2i16(ptr addrspace(1) %out, <2 x float> %in) { +; SI-LABEL: fp_to_sint_v2f32_to_v2i16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_cvt_i32_f32_e32 v0, s5 +; SI-NEXT: v_cvt_i32_f32_e32 v1, s4 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fp_to_sint_v2f32_to_v2i16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s3 +; VI-NEXT: v_cvt_i32_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_cvt_i32_f32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fp_to_sint_v2f32_to_v2i16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v0, s3 +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v2, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-SDAG-NEXT: global_store_b32 v1, v2, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_v2f32_to_v2i16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, s3 +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v1, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; +; EG-LABEL: fp_to_sint_v2f32_to_v2i16: +; EG: ; %bb.0: +; EG-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: ALU clause starting at 4: +; EG-NEXT: TRUNC T0.W, KC0[2].W, +; EG-NEXT: TRUNC * T1.W, KC0[3].X, +; EG-NEXT: FLT_TO_INT T1.W, PS, +; EG-NEXT: FLT_TO_INT * T0.W, PV.W, +; EG-NEXT: AND_INT T0.W, PS, literal.x, +; EG-NEXT: LSHL * T1.W, PV.W, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: OR_INT T4.X, PV.W, PS, +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %sint = fptosi <2 x float> %in to <2 x i16> + store <2 x i16> %sint, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fp_to_sint_f32_to_v2i16(ptr addrspace(1) %out, float %in0, float %in1) { +; SI-LABEL: fp_to_sint_f32_to_v2i16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 +; SI-NEXT: v_cvt_i32_f32_e32 v1, s5 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fp_to_sint_f32_to_v2i16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 +; VI-NEXT: v_cvt_i32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fp_to_sint_f32_to_v2i16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v0, s3 +; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v2, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-SDAG-NEXT: global_store_b32 v1, v2, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_sint_f32_to_v2i16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, s3 +; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v1, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; +; EG-LABEL: fp_to_sint_f32_to_v2i16: +; EG: ; %bb.0: +; EG-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: ALU clause starting at 4: +; EG-NEXT: TRUNC T0.W, KC0[2].W, +; EG-NEXT: TRUNC * T1.W, KC0[2].Z, +; EG-NEXT: FLT_TO_INT T1.W, PS, +; EG-NEXT: FLT_TO_INT * T0.W, PV.W, +; EG-NEXT: LSHL T0.W, PS, literal.x, +; EG-NEXT: AND_INT * T1.W, PV.W, literal.y, +; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41) +; EG-NEXT: OR_INT T4.X, PV.W, PS, +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %sint0 = fptosi float %in0 to i16 + %sint1 = fptosi float %in1 to i16 + %res0 = insertelement <2 x i16> poison, i16 %sint0, i32 0 + %res1 = insertelement <2 x i16> %res0, i16 %sint1, i32 1 + store <2 x i16> %res1, ptr addrspace(1) %out + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll index c938475ab7675..32f80ff6c22f8 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn < %s | FileCheck %s -check-prefixes=SI ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s -check-prefixes=VI +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck %s -check-prefixes=EG declare float @llvm.fabs.f32(float) #1 @@ -28,6 +30,28 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i32 (ptr addrspace(1) %out, float % ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_f32_to_i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_f32_to_i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_f32_to_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] @@ -68,6 +92,26 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(ptr addrspace(1) %out, <2 x ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_v2f32_to_v2i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, s3 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, s2 +; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_v2f32_to_v2i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, s3 +; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_v2f32_to_v2i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] @@ -117,6 +161,34 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(ptr addrspace(1) %out, ptr ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_v4f32_to_v4i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, s7 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, s6 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, s5 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, s4 +; GFX11-SDAG-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_v4f32_to_v4i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, s4 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, s5 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, s6 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v3, s7 +; GFX11-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_v4f32_to_v4i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -181,6 +253,42 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_f32_to_i64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, s0 +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_f32_to_i64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v0, s0 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_f32_to_i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] @@ -280,6 +388,54 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_v2f32_to_v2i64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, s3 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v4, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX11-SDAG-NEXT: v_mul_f32_e32 v2, 0x2f800000, v4 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v2, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1 +; GFX11-SDAG-NEXT: v_fmac_f32_e32 v4, 0xcf800000, v2 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v1 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v0 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v4 +; GFX11-SDAG-NEXT: global_store_b128 v5, v[0:3], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_v2f32_to_v2i64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, s3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX11-GISEL-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_floor_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1 +; GFX11-GISEL-NEXT: v_fmac_f32_e32 v2, 0xcf800000, v3 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX11-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_v2f32_to_v2i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 74, @4, KC0[CB0:0-32], KC1[] @@ -440,6 +596,85 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_v4f32_to_v4i64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v10, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, s1 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v8, s0 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v4, s3 +; GFX11-SDAG-NEXT: v_trunc_f32_e32 v9, s2 +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX11-SDAG-NEXT: v_mul_f32_e32 v2, 0x2f800000, v8 +; GFX11-SDAG-NEXT: v_mul_f32_e32 v3, 0x2f800000, v4 +; GFX11-SDAG-NEXT: v_mul_f32_e32 v5, 0x2f800000, v9 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_floor_f32_e32 v6, v1 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v2, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_floor_f32_e32 v7, v3 +; GFX11-SDAG-NEXT: v_floor_f32_e32 v5, v5 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v6 +; GFX11-SDAG-NEXT: v_fmac_f32_e32 v8, 0xcf800000, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_dual_fmac_f32 v4, 0xcf800000, v7 :: v_dual_fmac_f32 v9, 0xcf800000, v5 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v6 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v6, v4 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v4, v9 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v2 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v0 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v8 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_store_b128 v10, v[4:7], s[0:1] offset:16 +; GFX11-SDAG-NEXT: global_store_b128 v10, v[0:3], s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_v4f32_to_v4i64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v8, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v0, s0 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, s1 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v4, s2 +; GFX11-GISEL-NEXT: v_trunc_f32_e32 v6, s3 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX11-GISEL-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; GFX11-GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 +; GFX11-GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_floor_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v3, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_floor_f32_e32 v5, v5 +; GFX11-GISEL-NEXT: v_floor_f32_e32 v7, v7 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1 +; GFX11-GISEL-NEXT: v_fmac_f32_e32 v2, 0xcf800000, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_fmac_f32_e32 v4, 0xcf800000, v5 +; GFX11-GISEL-NEXT: v_fmac_f32_e32 v6, 0xcf800000, v7 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_store_b128 v8, v[0:3], s[0:1] +; GFX11-GISEL-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16 +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_v4f32_to_v4i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[] @@ -635,6 +870,32 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_f32_to_i1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_f32_e64 s2, 1.0, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 +; GFX11-SDAG-NEXT: global_store_b8 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_f32_to_i1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-GISEL-NEXT: global_store_b8 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_f32_to_i1: ; EG: ; %bb.0: ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -685,6 +946,32 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, floa ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_fabs_f32_to_i1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_f32_e64 s2, 1.0, |s2| +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 +; GFX11-SDAG-NEXT: global_store_b8 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_fabs_f32_to_i1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e64 v0, |s2| +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-GISEL-NEXT: global_store_b8 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_fabs_f32_to_i1: ; EG: ; %bb.0: ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -734,6 +1021,28 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i16(ptr addrspace(1) %out, float %i ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; +; GFX11-SDAG-LABEL: fp_to_uint_f32_to_i16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, s2 +; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_f32_to_i16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, s2 +; GFX11-GISEL-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; ; EG-LABEL: fp_to_uint_f32_to_i16: ; EG: ; %bb.0: ; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -759,5 +1068,156 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i16(ptr addrspace(1) %out, float %i ret void } +define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i16(ptr addrspace(1) %out, <2 x float> %in) { +; SI-LABEL: fp_to_uint_v2f32_to_v2i16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_cvt_u32_f32_e32 v0, s5 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v1, s4 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fp_to_uint_v2f32_to_v2i16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s3 +; VI-NEXT: v_cvt_u32_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_cvt_u32_f32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fp_to_uint_v2f32_to_v2i16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, s3 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-SDAG-NEXT: global_store_b32 v1, v2, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_v2f32_to_v2i16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, s3 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; +; EG-LABEL: fp_to_uint_v2f32_to_v2i16: +; EG: ; %bb.0: +; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: ALU clause starting at 4: +; EG-NEXT: TRUNC T0.W, KC0[3].X, +; EG-NEXT: TRUNC * T1.W, KC0[2].W, +; EG-NEXT: FLT_TO_UINT * T0.Y, PV.W, +; EG-NEXT: LSHL T0.W, PS, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.Y, T1.W, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: OR_INT T4.X, PS, PV.W, +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %uint = fptoui <2 x float> %in to <2 x i16> + store <2 x i16> %uint, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fp_to_uint_f32_to_v2i16(ptr addrspace(1) %out, float %in0, float %in1) { +; SI-LABEL: fp_to_uint_f32_to_v2i16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_cvt_u32_f32_e32 v0, s4 +; SI-NEXT: v_cvt_u32_f32_e32 v1, s5 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fp_to_uint_f32_to_v2i16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_cvt_u32_f32_e32 v0, s2 +; VI-NEXT: v_cvt_u32_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fp_to_uint_f32_to_v2i16: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, s3 +; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, s2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-SDAG-NEXT: global_store_b32 v1, v2, s[0:1] +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fp_to_uint_f32_to_v2i16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, s3 +; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm +; +; EG-LABEL: fp_to_uint_f32_to_v2i16: +; EG: ; %bb.0: +; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: ALU clause starting at 4: +; EG-NEXT: TRUNC T0.W, KC0[2].W, +; EG-NEXT: TRUNC * T1.W, KC0[2].Z, +; EG-NEXT: FLT_TO_UINT * T0.Y, PV.W, +; EG-NEXT: LSHL T0.W, PS, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.Y, T1.W, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: OR_INT T4.X, PV.W, PS, +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %uint0 = fptoui float %in0 to i16 + %uint1 = fptoui float %in1 to i16 + %res0 = insertelement <2 x i16> poison, i16 %uint0, i32 0 + %res1 = insertelement <2 x i16> %res0, i16 %uint1, i32 1 + store <2 x i16> %res1, ptr addrspace(1) %out + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll index 689e918c61425..09686e2fa819a 100644 --- a/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll +++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -global-isel < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel < %s | FileCheck %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1150 < %s | FileCheck %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1150 < %s | FileCheck %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s define amdgpu_vs float @sitofp_i32_to_f32(i32 inreg %val) { ; CHECK-LABEL: sitofp_i32_to_f32: @@ -44,6 +44,24 @@ define amdgpu_vs i32 @fptoui_f32_to_u32(float inreg %val) { ret i32 %res } +define amdgpu_vs i16 @fptosi_f32_to_i16(float inreg %val) { +; CHECK-LABEL: fptosi_f32_to_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cvt_i32_f32 s0, s0 +; CHECK-NEXT: ; return to shader part epilog + %res = fptosi float %val to i16 + ret i16 %res +} + +define amdgpu_vs i16 @fptoui_f32_to_u16(float inreg %val) { +; CHECK-LABEL: fptoui_f32_to_u16: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_cvt_u32_f32 s0, s0 +; CHECK-NEXT: ; return to shader part epilog + %res = fptoui float %val to i16 + ret i16 %res +} + define amdgpu_vs float @fpext_f16_to_f32(half inreg %val) { ; CHECK-LABEL: fpext_f16_to_f32: ; CHECK: ; %bb.0: