Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -1445,6 +1445,49 @@ def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))),
def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))),
(NARROW_U_I16x8 $left, $right)>;

// Recognize a saturating truncation and convert into the corresponding
// narrow_TYPE_s or narrow_TYPE_u instruction.
multiclass SignedSaturatingTruncate<ValueType input, ValueType output,
Instruction narrow, int minval,
int maxval, int mask> {
def : Pat<
(output (wasm_narrow_u
(and (smin (smax (input V128:$a), (splat_vector (i32 minval))),
(splat_vector (i32 maxval))), (splat_vector (i32 mask))),
(and (smin (smax (input V128:$b), (splat_vector (i32 minval))),
(splat_vector (i32 maxval))), (splat_vector (i32 mask)))
)),
(narrow V128:$a, V128:$b)
>;

def : Pat<
(output (wasm_narrow_u
(and (smax (smin (input V128:$a), (splat_vector (i32 maxval))),
(splat_vector (i32 minval))), (splat_vector (i32 mask))),
(and (smax (smin (input V128:$b), (splat_vector (i32 maxval))),
(splat_vector (i32 minval))), (splat_vector (i32 mask)))
)),
(narrow V128:$a, V128:$b)
>;
}

defm : SignedSaturatingTruncate<v8i16, v16i8, NARROW_S_I8x16, -128, 127, 0xFF>;
defm : SignedSaturatingTruncate<v4i32, v8i16, NARROW_S_I16x8, -32768, 32767, 0xFFFF>;

multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output,
Instruction narrow, int maxval> {
def : Pat<
(output (wasm_narrow_u
(umin (input V128:$a), (splat_vector (i32 maxval))),
(umin (input V128:$b), (splat_vector (i32 maxval)))
)),
(narrow V128:$a, V128:$b)
>;
}

defm : UnsignedSaturatingTruncate<v8i16, v16i8, NARROW_U_I8x16, 0xFF>;
defm : UnsignedSaturatingTruncate<v4i32, v8i16, NARROW_U_I16x8, 0xFFFF>;

// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
foreach t1 = AllVecs in
Expand Down
48 changes: 2 additions & 46 deletions llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,6 @@ entry:
define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-LABEL: stest_f16i16:
; CHECK: .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
Expand Down Expand Up @@ -474,15 +473,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
; CHECK-NEXT: local.tee 8
; CHECK-NEXT: i32x4.min_s
; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
; CHECK-NEXT: local.tee 9
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.tee 10
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.splat
Expand All @@ -495,13 +485,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: local.get 8
; CHECK-NEXT: i32x4.min_s
; CHECK-NEXT: local.get 9
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 10
; CHECK-NEXT: v128.and
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
Expand All @@ -516,7 +500,6 @@ entry:
define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-LABEL: utest_f16i16:
; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
Expand Down Expand Up @@ -556,9 +539,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.tee 8
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.splat
Expand All @@ -571,8 +551,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: local.get 8
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
Expand Down Expand Up @@ -1861,7 +1839,6 @@ entry:
define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-LABEL: stest_f16i16_mm:
; CHECK: .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
Expand Down Expand Up @@ -1901,15 +1878,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
; CHECK-NEXT: local.tee 8
; CHECK-NEXT: i32x4.min_s
; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
; CHECK-NEXT: local.tee 9
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.tee 10
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.splat
Expand All @@ -1922,13 +1890,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: local.get 8
; CHECK-NEXT: i32x4.min_s
; CHECK-NEXT: local.get 9
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 10
; CHECK-NEXT: v128.and
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
Expand All @@ -1941,7 +1903,6 @@ entry:
define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-LABEL: utest_f16i16_mm:
; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
Expand Down Expand Up @@ -1981,9 +1942,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.tee 8
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.splat
Expand All @@ -1996,8 +1954,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: local.get 8
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
Expand Down
87 changes: 87 additions & 0 deletions llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5

; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s

target triple = "wasm32-unknown-unknown"

declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2

define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: i16_signed:
; CHECK: .functype i16_signed (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %bb2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.narrow_i16x8_s
; CHECK-NEXT: # fallthrough-return
bb2:
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
%2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
%3 = trunc nsw <16 x i16> %2 to <16 x i8>
ret <16 x i8> %3
ret <16 x i8> %3
}

define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: i32_signed:
; CHECK: .functype i32_signed (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %bb2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
bb2:
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
%2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
%3 = trunc nsw <8 x i32> %2 to <8 x i16>
ret <8 x i16> %3
}

define <8 x i16> @i32_signed_flipped(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: i32_signed_flipped:
; CHECK: .functype i32_signed_flipped (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %bb2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
bb2:
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%1 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> splat (i32 32767), <8 x i32> %0)
%2 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> splat (i32 -32768), <8 x i32> %1)
%3 = trunc nsw <8 x i32> %2 to <8 x i16>
ret <8 x i16> %3
}

define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: i16_unsigned:
; CHECK: .functype i16_unsigned (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %bb2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.narrow_i16x8_u
; CHECK-NEXT: # fallthrough-return
bb2:
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
%2 = trunc nuw <16 x i16> %1 to <16 x i8>
ret <16 x i8> %2
}

define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: i32_unsigned:
; CHECK: .functype i32_unsigned (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0: # %bb2
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: # fallthrough-return
bb2:
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
%2 = trunc nsw <8 x i32> %1 to <8 x i16>
ret <8 x i16> %2
}