diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index b800123ac0fff9..3c97befcea1a4d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -828,12 +828,18 @@ multiclass SIMDBitwise simdop, (!cast(NAME) $lhs, $rhs)>; } -multiclass SIMDUnary simdop> { +multiclass SIMDUnary simdop, list reqs = []> { defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [(set (vec.vt V128:$dst), (vec.vt (node (vec.vt V128:$v))))], vec.prefix#"."#name#"\t$dst, $v", - vec.prefix#"."#name, simdop>; + vec.prefix#"."#name, simdop, reqs>; +} + +multiclass HalfPrecisionUnary simdop> { + defm "" : SIMDUnary; } // Bitwise logic: v128.not @@ -1190,6 +1196,10 @@ defm EXTMUL_HIGH_U : multiclass SIMDUnaryFP baseInst> { defm "" : SIMDUnary; defm "" : SIMDUnary; + // Unlike F32x4 and F64x2 there's not a gap in the opcodes between "neg" and + // "sqrt" so subtract one from the offset. + defm "" : HalfPrecisionUnary; } // Absolute value: abs @@ -1210,14 +1220,20 @@ defm CEIL : SIMDUnary; defm FLOOR : SIMDUnary; defm TRUNC: SIMDUnary; defm NEAREST: SIMDUnary; +defm CEIL : HalfPrecisionUnary; +defm FLOOR : HalfPrecisionUnary; +defm TRUNC : HalfPrecisionUnary; +defm NEAREST : HalfPrecisionUnary; // WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. def : Pat<(v4f32 (frint (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>; def : Pat<(v2f64 (frint (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; +def : Pat<(v8f16 (frint (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>; // WebAssembly always rounds ties-to-even, so map froundeven to fnearbyint. def : Pat<(v4f32 (froundeven (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>; def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; +def : Pat<(v8f16 (froundeven (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>; //===----------------------------------------------------------------------===// // Floating-point binary arithmetic diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll index cca25b485cdf2a..0f0a1590915143 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -157,3 +157,92 @@ define <8 x i1> @compare_oge_v8f16 (<8 x half> %x, <8 x half> %y) { %res = fcmp oge <8 x half> %x, %y ret <8 x i1> %res } + +; CHECK-LABEL: abs_v8f16: +; CHECK-NEXT: .functype abs_v8f16 (v128) -> (v128) +; CHECK-NEXT: f16x8.abs $push0=, $0 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.fabs.v8f16(<8 x half>) nounwind readnone +define <8 x half> @abs_v8f16(<8 x half> %x) { + %a = call <8 x half> @llvm.fabs.v8f16(<8 x half> %x) + ret <8 x half> %a +} + +; CHECK-LABEL: neg_v8f16: +; CHECK-NEXT: .functype neg_v8f16 (v128) -> (v128) +; CHECK-NEXT: f16x8.neg $push0=, $0 +; CHECK-NEXT: return $pop0 +define <8 x half> @neg_v8f16(<8 x half> %x) { + %a = fsub nsz <8 x half> , %x + ret <8 x half> %a +} + +; CHECK-LABEL: sqrt_v8f16: +; CHECK-NEXT: .functype sqrt_v8f16 (v128) -> (v128) +; CHECK-NEXT: f16x8.sqrt $push0=, $0 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.sqrt.v8f16(<8 x half> %x) +define <8 x half> @sqrt_v8f16(<8 x half> %x) { + %a = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %x) + ret <8 x half> %a +} + +; CHECK-LABEL: ceil_v8f16: +; CHECK-NEXT: .functype ceil_v8f16 (v128) -> (v128){{$}} +; CHECK-NEXT: f16x8.ceil $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <8 x half> @llvm.ceil.v8f16(<8 x half>) +define <8 x half> @ceil_v8f16(<8 x half> %a) { + %v = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a) + ret <8 x half> %v +} + +; CHECK-LABEL: floor_v8f16: +; CHECK-NEXT: .functype floor_v8f16 (v128) -> (v128){{$}} +; CHECK-NEXT: f16x8.floor $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <8 x half> @llvm.floor.v8f16(<8 x half>) +define <8 x half> @floor_v8f16(<8 x half> %a) { + %v = call <8 x half> @llvm.floor.v8f16(<8 x half> %a) + ret <8 x half> %v +} + +; CHECK-LABEL: trunc_v8f16: +; CHECK-NEXT: .functype trunc_v8f16 (v128) -> (v128){{$}} +; CHECK-NEXT: f16x8.trunc $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <8 x half> @llvm.trunc.v8f16(<8 x half>) +define <8 x half> @trunc_v8f16(<8 x half> %a) { + %v = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a) + ret <8 x half> %v +} + +; CHECK-LABEL: nearest_v8f16: +; CHECK-NEXT: .functype nearest_v8f16 (v128) -> (v128){{$}} +; CHECK-NEXT: f16x8.nearest $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) +define <8 x half> @nearest_v8f16(<8 x half> %a) { + %v = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a) + ret <8 x half> %v +} + +; CHECK-LABEL: nearest_v8f16_via_rint: +; CHECK-NEXT: .functype nearest_v8f16_via_rint (v128) -> (v128){{$}} +; CHECK-NEXT: f16x8.nearest $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <8 x half> @llvm.rint.v8f16(<8 x half>) +define <8 x half> @nearest_v8f16_via_rint(<8 x half> %a) { + %v = call <8 x half> @llvm.rint.v8f16(<8 x half> %a) + ret <8 x half> %v +} + +; CHECK-LABEL: nearest_v8f16_via_roundeven: +; CHECK-NEXT: .functype nearest_v8f16_via_roundeven (v128) -> (v128){{$}} +; CHECK-NEXT: f16x8.nearest $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) +define <8 x half> @nearest_v8f16_via_roundeven(<8 x half> %a) { + %v = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a) + ret <8 x half> %v +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index aa70815245e5d3..8e4d9301b60264 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -893,4 +893,25 @@ main: # CHECK: f16x8.ge # encoding: [0xfd,0xc5,0x02] f16x8.ge + # CHECK: f16x8.abs # encoding: [0xfd,0xb1,0x02] + f16x8.abs + + # CHECK: f16x8.neg # encoding: [0xfd,0xb2,0x02] + f16x8.neg + + # CHECK: f16x8.sqrt # encoding: [0xfd,0xb3,0x02] + f16x8.sqrt + + # CHECK: f16x8.ceil # encoding: [0xfd,0xbc,0x02] + f16x8.ceil + + # CHECK: f16x8.floor # encoding: [0xfd,0xbd,0x02] + f16x8.floor + + # CHECK: f16x8.trunc # encoding: [0xfd,0xbe,0x02] + f16x8.trunc + + # CHECK: f16x8.nearest # encoding: [0xfd,0xbf,0x02] + f16x8.nearest + end_function