From 9b4a9c64277af5a8eb0daebdb2fc1bcae9bac567 Mon Sep 17 00:00:00 2001 From: ningxinr Date: Wed, 24 Sep 2025 20:43:22 -0700 Subject: [PATCH 1/3] Fix Issue #160611 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/test/CodeGen/AArch64/fpclamptosat.ll | 24 ++++++++++++ llvm/test/CodeGen/RISCV/fpclamptosat.ll | 39 +++++++++++++++++++ llvm/test/CodeGen/X86/fpclamptosat.ll | 16 ++++++++ 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a6ba6e518899f..c8c7882895ca6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6047,7 +6047,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, } if (MaxC == 0 && MinCPlus1.isPowerOf2()) { - BW = MinCPlus1.exactLogBase2(); + BW = std::max(MinCPlus1.exactLogBase2(), 1); Unsigned = true; return N02; } diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll index 00de1530fb72c..8b1f4e92fd339 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -1026,6 +1026,30 @@ entry: ret i64 %conv6 } +define i32 @ustest_f16const_i32() { +; CHECK-CVT-LABEL: ustest_f16const_i32: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: mov w8, #2143289344 // =0x7fc00000 +; CHECK-CVT-NEXT: fmov s0, w8 +; CHECK-CVT-NEXT: fcvtzs w8, s0 +; CHECK-CVT-NEXT: and w8, w8, w8, asr #31 +; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: ustest_f16const_i32: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: mov w8, #32256 // =0x7e00 +; CHECK-FP16-NEXT: fmov h0, w8 +; CHECK-FP16-NEXT: fcvtzs w8, h0 +; CHECK-FP16-NEXT: and w8, w8, w8, asr #31 +; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31 +; CHECK-FP16-NEXT: ret + %1 = fptosi half 0xH7E00 to i32 + %2 = call i32 @llvm.smin.i32(i32 0, i32 %1) + %3 = call i32 @llvm.smax.i32(i32 %2, i32 0) + ret i32 %3 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 18d071cc39bb6..7bc58c43beb78 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -3829,6 +3829,45 @@ entry: ret i64 %conv6 } +define i32 @ustest_f16const_i32() { +; CHECK-LABEL: ustest_f16const_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: retq +; RV32-LABEL: ustest_f16const_i32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 523264 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: fcvt.w.s a0, fa5, rtz +; RV32-NEXT: srai a1, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: sgtz a1, a0 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ustest_f16const_i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 523264 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: fcvt.l.s a0, fa5, rtz +; RV64-NEXT: srai a1, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sgtz a1, a0 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret + %1 = fptosi half 0xH7E00 to i32 + %2 = call i32 @llvm.smin.i32(i32 0, i32 %1) + %3 = call i32 @llvm.smax.i32(i32 %2, i32 0) + ret i32 %3 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index 3f5ec7b530fe0..65492e772cebf 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -1170,6 +1170,22 @@ entry: ret i64 %conv6 } +define i32 @ustest_f16const_i32() { +; CHECK-LABEL: ustest_f16const_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: retq + %1 = fptosi half 0xH7E00 to i32 + %2 = call i32 @llvm.smin.i32(i32 0, i32 %1) + %3 = call i32 @llvm.smax.i32(i32 %2, i32 0) + ret i32 %3 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) From f6db9a0106ba4c76215185a7f7c6587bb3bf8e97 Mon Sep 17 00:00:00 2001 From: ningxinr Date: Mon, 29 Sep 2025 13:30:10 -0700 Subject: [PATCH 2/3] Addressed feedbacks and fixed typos --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- llvm/test/CodeGen/AArch64/fpclamptosat.ll | 53 +++-- llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 101 ++++++--- llvm/test/CodeGen/ARM/fpclamptosat.ll | 46 +++- llvm/test/CodeGen/ARM/fpclamptosat_vec.ll | 107 ++++++++-- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 61 +++--- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 202 ++++++++++++++++-- llvm/test/CodeGen/WebAssembly/fpclamptosat.ll | 89 +++++--- .../CodeGen/WebAssembly/fpclamptosat_vec.ll | 78 +++++-- llvm/test/CodeGen/X86/fpclamptosat.ll | 43 ++-- llvm/test/CodeGen/X86/fpclamptosat_vec.ll | 105 ++++++--- 11 files changed, 673 insertions(+), 216 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c8c7882895ca6..4178aca2db828 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6046,8 +6046,8 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, return N02; } - if (MaxC == 0 && MinCPlus1.isPowerOf2()) { - BW = std::max(MinCPlus1.exactLogBase2(), 1); + if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) { + BW = MinCPlus1.exactLogBase2(); Unsigned = true; return N02; } diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll index 8b1f4e92fd339..24be9234515e8 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -111,14 +111,14 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; CHECK-CVT-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; CHECK-CVT-LABEL: utest_f16i32: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i32: +; CHECK-FP16-LABEL: utest_f16i32: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret @@ -298,8 +298,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; CHECK-CVT-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) { +; CHECK-CVT-LABEL: utest_f16i16: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff @@ -308,7 +308,7 @@ define i16 @utesth_f16i16(half %x) { ; CHECK-CVT-NEXT: csel w0, w8, w9, lo ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16-LABEL: utest_f16i16: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff @@ -493,8 +493,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) { -; CHECK-LABEL: utesth_f16i64: +define i64 @utest_f16i64(half %x) { +; CHECK-LABEL: utest_f16i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -636,14 +636,14 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) { -; CHECK-CVT-LABEL: utesth_f16i32_mm: +define i32 @utest_f16i32_mm(half %x) { +; CHECK-CVT-LABEL: utest_f16i32_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i32_mm: +; CHECK-FP16-LABEL: utest_f16i32_mm: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret @@ -808,8 +808,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) { -; CHECK-CVT-LABEL: utesth_f16i16_mm: +define i16 @utest_f16i16_mm(half %x) { +; CHECK-CVT-LABEL: utest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff @@ -818,7 +818,7 @@ define i16 @utesth_f16i16_mm(half %x) { ; CHECK-CVT-NEXT: csel w0, w8, w9, lo ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16-LABEL: utest_f16i16_mm: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff @@ -986,8 +986,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) { -; CHECK-LABEL: utesth_f16i64_mm: +define i64 @utest_f16i64_mm(half %x) { +; CHECK-LABEL: utest_f16i64_mm: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 @@ -1026,28 +1026,27 @@ entry: ret i64 %conv6 } -define i32 @ustest_f16const_i32() { -; CHECK-CVT-LABEL: ustest_f16const_i32: +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; CHECK-CVT-LABEL: ustest_f16i32_nsat: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: mov w8, #2143289344 // =0x7fc00000 -; CHECK-CVT-NEXT: fmov s0, w8 +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzs w8, s0 ; CHECK-CVT-NEXT: and w8, w8, w8, asr #31 ; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31 ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-LABEL: ustest_f16const_i32: +; CHECK-FP16-LABEL: ustest_f16i32_nsat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mov w8, #32256 // =0x7e00 -; CHECK-FP16-NEXT: fmov h0, w8 ; CHECK-FP16-NEXT: fcvtzs w8, h0 ; CHECK-FP16-NEXT: and w8, w8, w8, asr #31 ; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31 ; CHECK-FP16-NEXT: ret - %1 = fptosi half 0xH7E00 to i32 - %2 = call i32 @llvm.smin.i32(i32 0, i32 %1) - %3 = call i32 @llvm.smax.i32(i32 %2, i32 0) - ret i32 %3 + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 } declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index b09a86723bb56..637c02875b84e 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -321,20 +321,20 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i32: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i32: +; CHECK-FP16-SD-LABEL: utest_f16i32: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i32: +; CHECK-CVT-GI-LABEL: utest_f16i32: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff @@ -349,7 +349,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i32: +; CHECK-FP16-GI-LABEL: utest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] @@ -614,8 +614,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-CVT-LABEL: utest_f16i16: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h @@ -625,12 +625,12 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i16: +; CHECK-FP16-SD-LABEL: utest_f16i16: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i16: +; CHECK-FP16-GI-LABEL: utest_f16i16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h ; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h @@ -1746,8 +1746,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 ; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -1777,7 +1777,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-CVT-SD-NEXT: add sp, sp, #48 ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i64: +; CHECK-FP16-SD-LABEL: utest_f16i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 ; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -1807,7 +1807,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i64: +; CHECK-CVT-GI-LABEL: utest_f16i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] @@ -1819,7 +1819,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-CVT-GI-NEXT: mov v0.d[1], x9 ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i64: +; CHECK-FP16-GI-LABEL: utest_f16i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] @@ -2307,20 +2307,20 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i32_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i32_mm: +; CHECK-FP16-SD-LABEL: utest_f16i32_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i32_mm: +; CHECK-CVT-GI-LABEL: utest_f16i32_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff @@ -2335,7 +2335,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i32_mm: +; CHECK-FP16-GI-LABEL: utest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] @@ -2585,8 +2585,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-CVT-LABEL: utest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h ; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h @@ -2596,12 +2596,12 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i16_mm: +; CHECK-FP16-SD-LABEL: utest_f16i16_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i16_mm: +; CHECK-FP16-GI-LABEL: utest_f16i16_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h ; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h @@ -3694,8 +3694,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-CVT-SD-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-CVT-SD-LABEL: utest_f16i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 ; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -3725,7 +3725,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-SD-NEXT: add sp, sp, #48 ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: utesth_f16i64_mm: +; CHECK-FP16-SD-LABEL: utest_f16i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 ; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill @@ -3755,7 +3755,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; -; CHECK-CVT-GI-LABEL: utesth_f16i64_mm: +; CHECK-CVT-GI-LABEL: utest_f16i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] @@ -3767,7 +3767,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-GI-NEXT: mov v0.d[1], x9 ; CHECK-CVT-GI-NEXT: ret ; -; CHECK-FP16-GI-LABEL: utesth_f16i64_mm: +; CHECK-FP16-GI-LABEL: utest_f16i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] @@ -3941,6 +3941,51 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-CVT-SD-LABEL: ustest_f16i32_nsat: +; CHECK-CVT-SD: // %bb.0: // %entry +; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-SD-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: ustest_f16i32_nsat: +; CHECK-FP16-SD: // %bb.0: // %entry +; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-SD-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-FP16-SD-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-FP16-SD-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: ustest_f16i32_nsat: +; CHECK-CVT-GI: // %bb.0: // %entry +; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-GI-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-CVT-GI-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: ustest_f16i32_nsat: +; CHECK-FP16-GI: // %bb.0: // %entry +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-FP16-GI-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-FP16-GI-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-FP16-GI-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: ret +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8ab56b228d2a7..37f230abb7a14 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -383,8 +383,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; SOFT-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; SOFT-LABEL: utest_f16i32: ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r7, lr} ; SOFT-NEXT: push {r7, lr} @@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) { ; SOFT-NEXT: .LBB7_2: @ %entry ; SOFT-NEXT: pop {r7, pc} ; -; VFP2-LABEL: utesth_f16i32: +; VFP2-LABEL: utest_f16i32: ; VFP2: @ %bb.0: @ %entry ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} @@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; -; FULL-LABEL: utesth_f16i32: +; FULL-LABEL: utest_f16i32: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.u32.f16 s0, s0 ; FULL-NEXT: vmov r0, s0 @@ -3985,6 +3985,44 @@ entry: ret i32 %spec.store.select7 } +define i32 @ustest_f16i32_nsat(half %x) { +; SOFT-LABEL: ustest_f16i32_nsat: +; SOFT: @ %bb.0: +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} +; SOFT-NEXT: uxth r0, r0 +; SOFT-NEXT: bl __aeabi_h2f +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: bics r0, r1 +; SOFT-NEXT: pop {r7, pc} +; +; VFP2-LABEL: ustest_f16i32_nsat: +; VFP2: @ %bb.0: +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: bl __aeabi_h2f +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: usat r0, #0, r0 +; VFP2-NEXT: pop {r7, pc} +; +; FULL-LABEL: ustest_f16i32_nsat: +; FULL: @ %bb.0: +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: usat r0, #0, r0 +; FULL-NEXT: bx lr + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 96f009a4da02d..ba31b353ee1f6 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -748,8 +748,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d12, d13} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32: +; CHECK-FP16-LABEL: utest_f16i32: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} @@ -1366,8 +1366,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16-LABEL: utest_f16i16: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -2109,8 +2109,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64: +; CHECK-FP16-LABEL: utest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -2835,8 +2835,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i32_mm: +; CHECK-FP16-LABEL: utest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -3344,8 +3344,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i16_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} @@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16-LABEL: utest_f16i16_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vmovx.f16 s4, s0 ; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0 @@ -4044,8 +4044,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-NEON-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-NEON-LABEL: utest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, lr} @@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; -; CHECK-FP16-LABEL: utesth_f16i64_mm: +; CHECK-FP16-LABEL: utest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, lr} @@ -4215,6 +4215,77 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-NEON-LABEL: ustest_f16i32_nsat: +; CHECK-NEON: @ %bb.0: @ %entry +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} +; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.f32 s16, s3 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov.f32 s20, s1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: mov r4, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s16, r0 +; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vmov r1, s20 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov s18, r4 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d11[0], r0 +; CHECK-NEON-NEXT: mov r0, r1 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s18 +; CHECK-NEON-NEXT: vmov s0, r0 +; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s16 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEON-NEXT: vmov r0, s2 +; CHECK-NEON-NEXT: vmov.32 d10[0], r0 +; CHECK-NEON-NEXT: vmov r0, s4 +; CHECK-NEON-NEXT: vmov.32 d11[1], r0 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.32 d10[1], r0 +; CHECK-NEON-NEXT: vmin.s32 q9, q5, q8 +; CHECK-NEON-NEXT: vmax.s32 q0, q9, q8 +; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEON-NEXT: pop {r4, pc} +; +; CHECK-FP16-LABEL: ustest_f16i32_nsat: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vmovx.f16 s2, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s1 +; CHECK-FP16-NEXT: vmovx.f16 s4, s1 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-FP16-NEXT: vmov.i32 q9, #0x0 +; CHECK-FP16-NEXT: vmov.32 d17[0], r0 +; CHECK-FP16-NEXT: vmov r0, s6 +; CHECK-FP16-NEXT: vmov.32 d16[0], r0 +; CHECK-FP16-NEXT: vmov r0, s4 +; CHECK-FP16-NEXT: vmov.32 d17[1], r0 +; CHECK-FP16-NEXT: vmov r0, s2 +; CHECK-FP16-NEXT: vmov.32 d16[1], r0 +; CHECK-FP16-NEXT: vmin.s32 q8, q8, q9 +; CHECK-FP16-NEXT: vmax.s32 q0, q8, q9 +; CHECK-FP16-NEXT: bx lr +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 7bc58c43beb78..cb2555731e133 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -436,8 +436,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; RV32-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) { +; RV32-LABEL: utest_f16i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 @@ -456,7 +456,7 @@ define i32 @utesth_f16i32(half %x) { ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; -; RV64-LABEL: utesth_f16i32: +; RV64-LABEL: utest_f16i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 @@ -974,8 +974,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; RV32-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) { +; RV32-LABEL: utest_f16i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 @@ -995,7 +995,7 @@ define i16 @utesth_f16i16(half %x) { ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; -; RV64-LABEL: utesth_f16i16: +; RV64-LABEL: utest_f16i16: ; RV64: # %bb.0: # %entry ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 @@ -3829,43 +3829,48 @@ entry: ret i64 %conv6 } -define i32 @ustest_f16const_i32() { -; CHECK-LABEL: ustest_f16const_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: sarl $31, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: andl %ecx, %eax -; CHECK-NEXT: cmovlel %edx, %eax -; CHECK-NEXT: retq -; RV32-LABEL: ustest_f16const_i32: +define i32 @ustest_f16i32_nsat(half %x) { +; RV32-LABEL: ustest_f16i32_nsat: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, 523264 -; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: fcvt.w.s a0, fa5, rtz +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call __extendhfsf2 +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: srai a1, a0, 31 ; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: sgtz a1, a0 ; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; -; RV64-LABEL: ustest_f16const_i32: +; RV64-LABEL: ustest_f16i32_nsat: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 523264 -; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: fcvt.l.s a0, fa5, rtz +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call __extendhfsf2 +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: srai a1, a0, 63 ; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: sgtz a1, a0 ; RV64-NEXT: neg a1, a1 ; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret - %1 = fptosi half 0xH7E00 to i32 - %2 = call i32 @llvm.smin.i32(i32 0, i32 %1) - %3 = call i32 @llvm.smax.i32(i32 %2, i32 0) - ret i32 %3 + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 } declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index aba9d37bfaa04..f597762521006 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -519,8 +519,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i32: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -64 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 @@ -610,7 +610,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: bgeu a3, a1, .LBB7_4 ; CHECK-NOV-NEXT: j .LBB7_5 ; -; CHECK-V-LABEL: utesth_f16i32: +; CHECK-V-LABEL: utest_f16i32: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -48 ; CHECK-V-NEXT: .cfi_def_cfa_offset 48 @@ -1594,8 +1594,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i16: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -128 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128 @@ -1765,7 +1765,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8 ; CHECK-NOV-NEXT: j .LBB16_9 ; -; CHECK-V-LABEL: utesth_f16i16: +; CHECK-V-LABEL: utest_f16i16: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -80 ; CHECK-V-NEXT: .cfi_def_cfa_offset 80 @@ -3332,8 +3332,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i64: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -32 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32 @@ -3373,7 +3373,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NOV-NEXT: ret ; -; CHECK-V-LABEL: utesth_f16i64: +; CHECK-V-LABEL: utest_f16i64: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -32 ; CHECK-V-NEXT: .cfi_def_cfa_offset 32 @@ -4074,8 +4074,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i32_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -64 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 @@ -4165,7 +4165,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: bgeu a3, a1, .LBB34_4 ; CHECK-NOV-NEXT: j .LBB34_5 ; -; CHECK-V-LABEL: utesth_f16i32_mm: +; CHECK-V-LABEL: utest_f16i32_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -48 ; CHECK-V-NEXT: .cfi_def_cfa_offset 48 @@ -5134,8 +5134,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i16_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -128 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128 @@ -5305,7 +5305,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8 ; CHECK-NOV-NEXT: j .LBB43_9 ; -; CHECK-V-LABEL: utesth_f16i16_mm: +; CHECK-V-LABEL: utest_f16i16_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -80 ; CHECK-V-NEXT: .cfi_def_cfa_offset 80 @@ -6837,8 +6837,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-NOV-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-NOV-LABEL: utest_f16i64_mm: ; CHECK-NOV: # %bb.0: # %entry ; CHECK-NOV-NEXT: addi sp, sp, -32 ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32 @@ -6877,7 +6877,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NOV-NEXT: ret ; -; CHECK-V-LABEL: utesth_f16i64_mm: +; CHECK-V-LABEL: utest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry ; CHECK-V-NEXT: addi sp, sp, -32 ; CHECK-V-NEXT: .cfi_def_cfa_offset 32 @@ -7048,6 +7048,172 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-NOV-LABEL: ustest_f16i32_nsat: +; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: addi sp, sp, -64 +; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NOV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NOV-NEXT: .cfi_offset ra, -8 +; CHECK-NOV-NEXT: .cfi_offset s0, -16 +; CHECK-NOV-NEXT: .cfi_offset s1, -24 +; CHECK-NOV-NEXT: .cfi_offset s2, -32 +; CHECK-NOV-NEXT: .cfi_offset s3, -40 +; CHECK-NOV-NEXT: .cfi_offset fs0, -48 +; CHECK-NOV-NEXT: .cfi_offset fs1, -56 +; CHECK-NOV-NEXT: lhu s1, 0(a1) +; CHECK-NOV-NEXT: lhu s2, 8(a1) +; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) +; CHECK-NOV-NEXT: mv s0, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fmv.s fs1, fa0 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fcvt.l.s s1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s s2, fs1, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fcvt.l.s s3, fs0, rtz +; CHECK-NOV-NEXT: call __extendhfsf2 +; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-NOV-NEXT: srai a1, s3, 63 +; CHECK-NOV-NEXT: and a1, a1, s3 +; CHECK-NOV-NEXT: srai a2, s2, 63 +; CHECK-NOV-NEXT: and a2, a2, s2 +; CHECK-NOV-NEXT: srai a3, s1, 63 +; CHECK-NOV-NEXT: and a3, a3, s1 +; CHECK-NOV-NEXT: srai a4, a0, 63 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sgtz a4, a3 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a3, a4, a3 +; CHECK-NOV-NEXT: sgtz a4, a2 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a2, a4, a2 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sgtz a4, a0 +; CHECK-NOV-NEXT: neg a4, a4 +; CHECK-NOV-NEXT: and a0, a4, a0 +; CHECK-NOV-NEXT: sw a3, 0(s0) +; CHECK-NOV-NEXT: sw a2, 4(s0) +; CHECK-NOV-NEXT: sw a1, 8(s0) +; CHECK-NOV-NEXT: sw a0, 12(s0) +; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NOV-NEXT: .cfi_restore ra +; CHECK-NOV-NEXT: .cfi_restore s0 +; CHECK-NOV-NEXT: .cfi_restore s1 +; CHECK-NOV-NEXT: .cfi_restore s2 +; CHECK-NOV-NEXT: .cfi_restore s3 +; CHECK-NOV-NEXT: .cfi_restore fs0 +; CHECK-NOV-NEXT: .cfi_restore fs1 +; CHECK-NOV-NEXT: addi sp, sp, 64 +; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NOV-NEXT: ret +; +; CHECK-V-LABEL: ustest_f16i32_nsat: +; CHECK-V: # %bb.0: # %entry +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: .cfi_offset ra, -8 +; CHECK-V-NEXT: .cfi_offset s0, -16 +; CHECK-V-NEXT: .cfi_offset s1, -24 +; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: sub sp, sp, a1 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: lhu s0, 0(a0) +; CHECK-V-NEXT: lhu s1, 8(a0) +; CHECK-V-NEXT: lhu s2, 16(a0) +; CHECK-V-NEXT: lhu a0, 24(a0) +; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-V-NEXT: call __extendhfsf2 +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vslideup.vi v8, v9, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-V-NEXT: vslideup.vi v8, v9, 2 +; CHECK-V-NEXT: vmin.vx v8, v8, zero +; CHECK-V-NEXT: vmax.vx v8, v8, zero +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: .cfi_def_cfa sp, 48 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: .cfi_restore ra +; CHECK-V-NEXT: .cfi_restore s0 +; CHECK-V-NEXT: .cfi_restore s1 +; CHECK-V-NEXT: .cfi_restore s2 +; CHECK-V-NEXT: addi sp, sp, 48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 0 +; CHECK-V-NEXT: ret +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 137994ceac132..59f3edc7168be 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -136,9 +136,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) { -; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32) -> (i32) +define i32 @utest_f16i32(half %x) { +; CHECK-LABEL: utest_f16i32: +; CHECK: .functype utest_f16i32 (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -153,9 +153,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_cse(half %x) { -; CHECK-LABEL: utesth_f16i32_cse: -; CHECK: .functype utesth_f16i32_cse (f32) -> (i32) +define i32 @utest_f16i32_cse(half %x) { +; CHECK-LABEL: utest_f16i32_cse: +; CHECK: .functype utest_f16i32_cse (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -403,9 +403,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) { -; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32) -> (i32) +define i16 @utest_f16i16(half %x) { +; CHECK-LABEL: utest_f16i16: +; CHECK: .functype utest_f16i16 (f32) -> (i32) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 @@ -427,9 +427,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_cse(half %x) { -; CHECK-LABEL: utesth_f16i16_cse: -; CHECK: .functype utesth_f16i16_cse (f32) -> (i32) +define i16 @utest_f16i16_cse(half %x) { +; CHECK-LABEL: utest_f16i16_cse: +; CHECK: .functype utest_f16i16_cse (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -880,9 +880,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) { -; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32) -> (i64) +define i64 @utest_f16i64(half %x) { +; CHECK-LABEL: utest_f16i64: +; CHECK: .functype utest_f16i64 (f32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -919,9 +919,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_cse(half %x) { -; CHECK-LABEL: utesth_f16i64_cse: -; CHECK: .functype utesth_f16i64_cse (f32) -> (i64) +define i64 @utest_f16i64_cse(half %x) { +; CHECK-LABEL: utest_f16i64_cse: +; CHECK: .functype utest_f16i64_cse (f32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1118,9 +1118,9 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) { -; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32) -> (i32) +define i32 @utest_f16i32_mm(half %x) { +; CHECK-LABEL: utest_f16i32_mm: +; CHECK: .functype utest_f16i32_mm (f32) -> (i32) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 @@ -1353,9 +1353,9 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) { -; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32) -> (i32) +define i16 @utest_f16i16_mm(half %x) { +; CHECK-LABEL: utest_f16i16_mm: +; CHECK: .functype utest_f16i16_mm (f32) -> (i32) ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 @@ -1637,9 +1637,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) { -; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32) -> (i64) +define i64 @utest_f16i64_mm(half %x) { +; CHECK-LABEL: utest_f16i64_mm: +; CHECK: .functype utest_f16i64_mm (f32) -> (i64) ; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1724,9 +1724,9 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm_cse(half %x) { -; CHECK-LABEL: utesth_f16i64_mm_cse: -; CHECK: .functype utesth_f16i64_mm_cse (f32) -> (i64) +define i64 @utest_f16i64_mm_cse(half %x) { +; CHECK-LABEL: utest_f16i64_mm_cse: +; CHECK: .functype utest_f16i64_mm_cse (f32) -> (i64) ; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1754,6 +1754,35 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: .functype ustest_f16i32_nsat (f32) -> (i32) +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32.shr_s +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.and +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.gt_s +; CHECK-NEXT: i32.select +; CHECK-NEXT: # fallthrough-return + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 7190e162eb010..52f57dc7d1a3e 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -209,9 +209,9 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-LABEL: utesth_f16i32: -; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128) +define <4 x i32> @utest_f16i32(<4 x half> %x) { +; CHECK-LABEL: utest_f16i32: +; CHECK: .functype utest_f16i32 (f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 @@ -513,9 +513,9 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16: -; CHECK: .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +define <8 x i16> @utest_f16i16(<8 x half> %x) { +; CHECK-LABEL: utest_f16i16: +; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 @@ -1295,9 +1295,9 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) { -; CHECK-LABEL: utesth_f16i64: -; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128) +define <2 x i64> @utest_f16i64(<2 x half> %x) { +; CHECK-LABEL: utest_f16i64: +; CHECK: .functype utest_f16i64 (f32, f32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -1649,9 +1649,9 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-LABEL: utesth_f16i32_mm: -; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128) +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { +; CHECK-LABEL: utest_f16i32_mm: +; CHECK: .functype utest_f16i32_mm (f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 @@ -1938,9 +1938,9 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { +; CHECK-LABEL: utest_f16i16_mm: +; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 @@ -2673,9 +2673,9 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { -; CHECK-LABEL: utesth_f16i64_mm: -; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128) +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { +; CHECK-LABEL: utest_f16i64_mm: +; CHECK: .functype utest_f16i64_mm (f32, f32) -> (v128) ; CHECK-NEXT: .local i32, i64, i64, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer @@ -2810,6 +2810,48 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: .functype ustest_f16i32_nsat (f32, f32, f32, f32) -> (v128) +; CHECK-NEXT: .local v128 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: local.set 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 1 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 2 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 3 +; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: i32x4.min_s +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32x4.max_s +; CHECK-NEXT: # fallthrough-return +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index 65492e772cebf..67483be7a3fde 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -161,8 +161,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) nounwind { +; CHECK-LABEL: utest_f16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -360,8 +360,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) nounwind { +; CHECK-LABEL: utest_f16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -566,8 +566,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64: +define i64 @utest_f16i64(half %x) nounwind { +; CHECK-LABEL: utest_f16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -762,8 +762,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32_mm: +define i32 @utest_f16i32_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i32_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -946,8 +946,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16_mm: +define i16 @utest_f16i16_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i16_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -1131,8 +1131,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64_mm: +define i64 @utest_f16i64_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i64_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -1170,20 +1170,25 @@ entry: ret i64 %conv6 } -define i32 @ustest_f16const_i32() { -; CHECK-LABEL: ustest_f16const_i32: +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) nounwind { +; CHECK-LABEL: ustest_f16i32_nsat: ; CHECK: # %bb.0: -; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: cvttss2si %xmm0, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: sarl $31, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: andl %ecx, %eax ; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq - %1 = fptosi half 0xH7E00 to i32 - %2 = call i32 @llvm.smin.i32(i32 0, i32 %1) - %3 = call i32 @llvm.smax.i32(i32 %2, i32 0) - ret i32 %3 + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 } declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index 1a2cfd69650b8..991ce33e58b4c 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -747,8 +747,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32: +; AVX2-LABEL: utest_f16i32: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32: +; AVX512-LABEL: utest_f16i32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -1338,8 +1338,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16: +; AVX2-LABEL: utest_f16i16: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16: +; AVX512-LABEL: utest_f16i16: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -2456,8 +2456,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64: +; AVX2-LABEL: utest_f16i64: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64: +; AVX512-LABEL: utest_f16i64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -3359,8 +3359,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32_mm: +; AVX2-LABEL: utest_f16i32_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32_mm: +; AVX512-LABEL: utest_f16i32_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -3935,8 +3935,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16_mm: +; AVX2-LABEL: utest_f16i16_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16_mm: +; AVX512-LABEL: utest_f16i16_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -4820,8 +4820,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64_mm: +; AVX2-LABEL: utest_f16i64_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64_mm: +; AVX512-LABEL: utest_f16i64_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -4974,6 +4974,63 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind { +; SSE-LABEL: ustest_f16i32_nsat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: subq $72, %rsp +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; SSE-NEXT: psrlq $48, %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: cvttps2dq %xmm1, %xmm0 +; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: addq $72, %rsp +; SSE-NEXT: retq +; +; AVX-LABEL: ustest_f16i32_nsat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) From baf5cdeb8a0a8f95c19dcf2e546ee6bac4f437c1 Mon Sep 17 00:00:00 2001 From: ningxinr Date: Mon, 29 Sep 2025 13:36:49 -0700 Subject: [PATCH 3/3] Added the non sat comment --- llvm/test/CodeGen/ARM/fpclamptosat.ll | 2 ++ llvm/test/CodeGen/RISCV/fpclamptosat.ll | 2 ++ 2 files changed, 4 insertions(+) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 37f230abb7a14..a6f0a03fc7e5b 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -3985,6 +3985,8 @@ entry: ret i32 %spec.store.select7 } +; i32 non saturate + define i32 @ustest_f16i32_nsat(half %x) { ; SOFT-LABEL: ustest_f16i32_nsat: ; SOFT: @ %bb.0: diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index cb2555731e133..a0d1ecce74e04 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -3829,6 +3829,8 @@ entry: ret i64 %conv6 } +; i32 non saturate + define i32 @ustest_f16i32_nsat(half %x) { ; RV32-LABEL: ustest_f16i32_nsat: ; RV32: # %bb.0: