Skip to content

Conversation

@jayfoad
Copy link
Contributor

@jayfoad jayfoad commented Nov 20, 2025

No description provided.

@jayfoad jayfoad added the skip-precommit-approval PR for CI feedback, not intended for review label Nov 20, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 20, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Patch is 55.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168893.diff

3 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/fp_to_sint.ll (+565)
  • (modified) llvm/test/CodeGen/AMDGPU/fp_to_uint.ll (+460)
  • (modified) llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll (+22-4)
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
index 0c5ed00b58d90..a2cd6d28e96cb 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck %s --check-prefixes=SI
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s --check-prefixes=VI
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG
 
 declare float @llvm.fabs.f32(float) #1
@@ -28,6 +30,28 @@ define amdgpu_kernel void @fp_to_sint_i32(ptr addrspace(1) %out, float %in) {
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_sint_i32:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v1, s2
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: fp_to_sint_i32:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, s2
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+;
 ; EG-LABEL: fp_to_sint_i32:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
@@ -67,6 +91,28 @@ define amdgpu_kernel void @fp_to_sint_i32_fabs(ptr addrspace(1) %out, float %in)
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_sint_i32_fabs:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e64 v1, |s2|
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: fp_to_sint_i32_fabs:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e64 v0, |s2|
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+;
 ; EG-LABEL: fp_to_sint_i32_fabs:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
@@ -108,6 +154,26 @@ define amdgpu_kernel void @fp_to_sint_v2i32(ptr addrspace(1) %out, <2 x float> %
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_sint_v2i32:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v1, s3
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, s2
+; GFX11-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: fp_to_sint_v2i32:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, s2
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, s3
+; GFX11-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+;
 ; EG-LABEL: fp_to_sint_v2i32:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
@@ -157,6 +223,34 @@ define amdgpu_kernel void @fp_to_sint_v4i32(ptr addrspace(1) %out, ptr addrspace
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_sint_v4i32:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v3, s7
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, s6
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v1, s5
+; GFX11-SDAG-NEXT:    v_cvt_i32_f32_e32 v0, s4
+; GFX11-SDAG-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: fp_to_sint_v4i32:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, s4
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, s5
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, s6
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, s7
+; GFX11-GISEL-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+;
 ; EG-LABEL: fp_to_sint_v4i32:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -234,6 +328,56 @@ define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) {
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_sint_i64:
+; GFX11-SDAG:       ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v0, s0
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: fp_to_sint_i64:
+; GFX11-GISEL:       ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v0, s2
+; GFX11-GISEL-NEXT:    s_ashr_i32 s2, s2, 31
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v1, v1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v1, |v0|
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, s2, v1
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, s2
+; GFX11-GISEL-NEXT:    v_subrev_co_ci_u32_e64 v1, null, s2, v1, vcc_lo
+; GFX11-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+;
 ; EG-LABEL: fp_to_sint_i64:
 ; EG:       ; %bb.0: ; %entry
 ; EG-NEXT:    ALU 40, @4, KC0[CB0:0-32], KC1[]
@@ -357,6 +501,81 @@ define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> %
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_sint_v2i64:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v6, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v0, s3
+; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v1, s2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v3, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX11-SDAG-NEXT:    v_fma_f32 v5, 0xcf800000, v3, |v1|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v1, 31, v1
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v7, v2, v0
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v4, v4, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v5, v5, v1
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v8, v3, v1
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v0
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e64 v3, null, v7, v0, vcc_lo
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v5, v1
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e64 v1, null, v8, v1, vcc_lo
+; GFX11-SDAG-NEXT:    global_store_b128 v6, v[0:3], s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: fp_to_sint_v2i64:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v0, s2
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, s3
+; GFX11-GISEL-NEXT:    s_ashr_i32 s2, s2, 31
+; GFX11-GISEL-NEXT:    s_ashr_i32 s3, s3, 31
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v2, |v0|
+; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v3, |v1|
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v2, s2, v2
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v3, s3, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v4, s3, v1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, s2
+; GFX11-GISEL-NEXT:    v_subrev_co_ci_u32_e64 v1, null, s2, v2, vcc_lo
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, s3
+; GFX11-GISEL-NEXT:    v_subrev_co_ci_u32_e64 v3, null, s3, v3, vcc_lo
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-GISEL-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+;
 ; EG-LABEL: fp_to_sint_v2i64:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 74, @4, KC0[CB0:0-32], KC1[]
@@ -559,6 +778,123 @@ define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> %
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_sint_v4i64:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v8, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v0, s1
+; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v2, s3
+; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v3, s2
+; GFX11-SDAG-NEXT:    v_trunc_f32_e32 v1, s0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v7, 0x2f800000, |v2|
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v11, 0x2f800000, |v3|
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v6, 0x2f800000, |v1|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v4, v4
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v7, v7
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v11, v11
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v6, v6
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v10, 31, v2
+; GFX11-SDAG-NEXT:    v_fma_f32 v0, 0xcf800000, v4, |v0|
+; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v7, |v2|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v12, 31, v3
+; GFX11-SDAG-NEXT:    v_fma_f32 v3, 0xcf800000, v11, |v3|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    v_fma_f32 v1, 0xcf800000, v6, |v1|
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v13, v4
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v4, v6
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v6, v7
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v5
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v7, v11
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v11, v13, v5
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v13, v4, v9
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v4, v6, v10
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v6, v2, v10
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v15, v3, v12
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v0, v5
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v14, v7, v12
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v9
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e64 v3, null, v11, v5, vcc_lo
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v6, vcc_lo, v6, v10
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e64 v7, null, v4, v10, vcc_lo
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v4, vcc_lo, v15, v12
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e64 v5, null, v14, v12, vcc_lo
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v9
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e64 v1, null, v13, v9, vcc_lo
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    global_store_b128 v8, v[4:7], s[4:5] offset:16
+; GFX11-SDAG-NEXT:    global_store_b128 v8, v[0:3], s[4:5]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: fp_to_sint_v4i64:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v8, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v0, s0
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, s1
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v2, s2
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v3, s3
+; GFX11-GISEL-NEXT:    s_ashr_i32 s0, s0, 31
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v0|
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v1|
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v6, 0x2f800000, |v2|
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v7, 0x2f800000, |v3|
+; GFX11-GISEL-NEXT:    s_ashr_i32 s1, s1, 31
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v4, v4
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v5, v5
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v6, v6
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v7, v7
+; GFX11-GISEL-NEXT:    s_ashr_i32 s2, s2, 31
+; GFX11-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v4, |v0|
+; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v5, |v1|
+; GFX11-GISEL-NEXT:    v_fma_f32 v2, 0xcf800000, v6, |v2|
+; GFX11-GISEL-NEXT:    v_fma_f32 v3, 0xcf800000, v7, |v3|
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v4, s0, v4
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v9, s1, v1
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GFX11-GISEL-NEXT:    s_ashr_i32 s3, s3, 31
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v5, s1, v5
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v10, s2, v2
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, s0
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v6, s2, v6
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v11, s3, v3
+; GFX11-GISEL-NEXT:    v_subrev_co_ci_u32_e64 v1, null, s0, v4, vcc_lo
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v9, s1
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v7, s3, v7
+; GFX11-GISEL-NEXT:    v_subrev_co_ci_u32_e64 v3, null, s1, v5, vcc_lo
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v4, vcc_lo, v10, s2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_subrev_co_ci_u32_e64 v5, null, s2, v6, vcc_lo
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v6, vcc_lo, v11, s3
+; GFX11-GISEL-NEXT:    v_subrev_co_ci_u32_e64 v7, null, s3, v7, vcc_lo
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    global_store_b128 v8, v[0:3], s[4:5]
+; GFX11-GISEL-NEXT:    global_store_b128 v8, v[4:7], s[4:5] offset:16
+; GFX11-GISEL-NEXT:    s_endpgm
+;
 ; EG-LABEL: fp_to_sint_v4i64:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 99, @6, KC0[CB0:0-32], KC1[]
@@ -754,6 +1090,32 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in
 ; VI-NEXT:    buffer_store_byte v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
+; GFX11-SDAG-LABEL: fp_to_uint_f32_to_i1:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cmp_eq_f32_e64 s2, -1.0, s2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2...
[truncated]

@github-actions
Copy link

🐧 Linux x64 Test Results

  • 186424 tests passed
  • 4867 tests skipped

@jayfoad jayfoad merged commit 6ce4794 into llvm:main Nov 20, 2025
13 checks passed
@jayfoad jayfoad deleted the cvt-pk-16-f32-precommit branch November 20, 2025 16:43
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU skip-precommit-approval PR for CI feedback, not intended for review

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants