diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 49af78bce68c3..34b8167ee9c07 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1711,6 +1711,26 @@ defm RELAXED_DOT : "i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs", "i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>; +def : Pat< + (v8i16 (add + (wasm_shuffle + (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)), + (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)), + (i32 0), (i32 1), (i32 4), (i32 5), + (i32 8), (i32 9), (i32 12), (i32 13), + (i32 16), (i32 17), (i32 20), (i32 21), + (i32 24), (i32 25), (i32 28), (i32 29)), + (wasm_shuffle + (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)), + (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)), + (i32 2), (i32 3), (i32 6), (i32 7), + (i32 10), (i32 11), (i32 14), (i32 15), + (i32 18), (i32 19), (i32 22), (i32 23), + (i32 26), (i32 27), (i32 30), (i32 31))) + ), + (v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs)) +>; + defm RELAXED_DOT_ADD : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), (outs), (ins), @@ -1719,6 +1739,13 @@ defm RELAXED_DOT_ADD : "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc", "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>; +def : Pat< + (v4i32 (add + (v4i32 (int_wasm_extadd_pairwise_signed + (v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))), + (v4i32 V128:$acc))), + (v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc))) + >; //===----------------------------------------------------------------------===// // Relaxed BFloat16 dot product //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll new file mode 100644 index 0000000000000..9716cbe077080 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s + +target triple = "wasm32" +; relaxed_dot stands for relaxed_dot_i8x16_i7x16_s, as in td +; relaxed_dot_add stands for i32x4.relaxed_dot_i8x16_i7x16_add_s, as in td + +define <8 x i16> @relaxed_dot_sext_1(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: relaxed_dot_sext_1: +; CHECK: .functype relaxed_dot_sext_1 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1 +; CHECK-NEXT: return $pop0 + %sext1 = sext <16 x i8> %a to <16 x i16> + %sext2 = sext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %sext1, %sext2 + %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %res = add <8 x i16> %shuffle1, %shuffle2 + ret <8 x i16> %res +} + + +define <8 x i16> @relaxed_dot_sext_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: relaxed_dot_sext_2: +; CHECK: .functype relaxed_dot_sext_2 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1 +; CHECK-NEXT: return $pop0 + %sext1 = sext <16 x i8> %a to <16 x i16> + %sext2 = sext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %sext1, %sext2 + %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %res = add <8 x i16> %shuffle2, %shuffle1 + ret <8 x i16> %res +} + +define <8 x i16> @relaxed_dot_sext_self(<16 x i8> %v) { +; CHECK-LABEL: relaxed_dot_sext_self: +; CHECK: .functype relaxed_dot_sext_self (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $0 +; CHECK-NEXT: return $pop0 + %sext = sext <16 x i8> %v to <16 x i16> + %mul = mul <16 x i16> %sext, %sext + %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %res = add <8 x i16> %shuffle1, %shuffle2 + ret <8 x i16> %res +} + +define <4 x i32> @relaxed_dot_add_from_relaxed_dot(<16 x i8> %a, <16 x i8> %b, <4 x i32> %c) { +; CHECK-LABEL: relaxed_dot_add_from_relaxed_dot: +; CHECK: .functype relaxed_dot_add_from_relaxed_dot (v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32x4.relaxed_dot_i8x16_i7x16_add_s $push0=, $0, $1, $2 +; CHECK-NEXT: return $pop0 + %relaxed_dot_call = call <8 x i16> @llvm.wasm.relaxed.dot.i8x16.i7x16.signed(<16 x i8> %a, <16 x i8> %b) + %sext = call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> %relaxed_dot_call) + %res = add <4 x i32> %sext, %c + ret <4 x i32> %res +} + +; INFO: Negative test +define <8 x i16> @relaxed_dot_zext(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: relaxed_dot_zext: +; CHECK: .functype relaxed_dot_zext (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i16x8.extmul_low_i8x16_u $push6=, $0, $1 +; CHECK-NEXT: local.tee $push5=, $2=, $pop6 +; CHECK-NEXT: i16x8.extmul_high_i8x16_u $push4=, $0, $1 +; CHECK-NEXT: local.tee $push3=, $1=, $pop4 +; CHECK-NEXT: i8x16.shuffle $push1=, $pop5, $pop3, 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 +; CHECK-NEXT: i8x16.shuffle $push0=, $2, $1, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 +; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %zext1 = zext <16 x i8> %a to <16 x i16> + %zext2 = zext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %zext1, %zext2 + %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %res = add <8 x i16> %shuffle1, %shuffle2 + ret <8 x i16> %res + +} + +; INFO: Negative test +define <8 x i16> @relaxed_dot_wrong_shuffle(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: relaxed_dot_wrong_shuffle: +; CHECK: .functype relaxed_dot_wrong_shuffle (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push1=, $0, $1 +; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push0=, $0, $1 +; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %sext1 = sext <16 x i8> %a to <16 x i16> + %sext2 = sext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %sext1, %sext2 + %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> + %res = add <8 x i16> %shuffle1, %shuffle2 + ret <8 x i16> %res +}