-
Notifications
You must be signed in to change notification settings - Fork 14.8k
Wasm fmuladd relaxed #163177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Wasm fmuladd relaxed #163177
Conversation
Introduce an fmuladd ISD node as an equivalent to the LLVM intrinsic and lower this to madd and nmadd, when we have relaxed-simd.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-webassembly Author: Sam Parker (sparker-arm) ChangesReland #161355, after fixing up the cross-projects-tests for the wasm simd intrinsics. Patch is 86.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163177.diff 15 Files Affected:
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index b601d90cfcc92..0f9ef39e4ca6f 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -1511,13 +1511,13 @@ v128_t test_f16x8_convert_u16x8(v128_t a) {
}
// CHECK-LABEL: test_f16x8_relaxed_madd:
-// CHECK: f16x8.relaxed_madd{{$}}
+// CHECK: f16x8.madd{{$}}
v128_t test_f16x8_relaxed_madd(v128_t a, v128_t b, v128_t c) {
return wasm_f16x8_relaxed_madd(a, b, c);
}
// CHECK-LABEL: test_f16x8_relaxed_nmadd:
-// CHECK: f16x8.relaxed_nmadd{{$}}
+// CHECK: f16x8.nmadd{{$}}
v128_t test_f16x8_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
return wasm_f16x8_relaxed_nmadd(a, b, c);
}
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index c76c83d84b3c7..ff3dd0d4c3c51 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -514,6 +514,12 @@ enum NodeType {
/// separately rounded operations.
FMAD,
+ /// FMULADD - Performs a * b + c, with, or without, intermediate rounding.
+ /// It is expected that this will be illegal for most targets, as it usually
+ /// makes sense to split this or use an FMA. But some targets, such as
+ /// WebAssembly, can directly support these semantics.
+ FMULADD,
+
/// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This
/// DAG node does not require that X and Y have the same type, just that
/// they are both floating point. X and the result must have the same type.
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 632be7ad9e350..07a858fd682fc 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -535,6 +535,7 @@ def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>;
def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>;
+def fmuladd : SDNode<"ISD::FMULADD" , SDTFPTernaryOp, [SDNPCommutative]>;
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b1accdd066dfd..e15384202f758 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -509,6 +509,7 @@ namespace {
SDValue visitFMUL(SDNode *N);
template <class MatchContextClass> SDValue visitFMA(SDNode *N);
SDValue visitFMAD(SDNode *N);
+ SDValue visitFMULADD(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
case ISD::FMAD: return visitFMAD(N);
+ case ISD::FMULADD: return visitFMULADD(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -18444,6 +18446,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFMULADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Constant fold FMULADD.
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
+ return C;
+
+ return SDValue();
+}
+
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 08af74c258899..c1fd052d01f31 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5786,6 +5786,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FCOPYSIGN:
case ISD::FMA:
case ISD::FMAD:
+ case ISD::FMULADD:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
@@ -5904,6 +5905,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
case ISD::FCOSH:
case ISD::FTANH:
case ISD::FMA:
+ case ISD::FMULADD:
case ISD::FMAD: {
if (SNaN)
return true;
@@ -7231,7 +7233,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// Handle fma/fmad special cases.
- if (Opcode == ISD::FMA || Opcode == ISD::FMAD) {
+ if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
Ops[2].getValueType() == VT && "FMA types must match!");
@@ -7242,7 +7244,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
APFloat V1 = C1->getValueAPF();
const APFloat &V2 = C2->getValueAPF();
const APFloat &V3 = C3->getValueAPF();
- if (Opcode == ISD::FMAD) {
+ if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
V1.multiply(V2, APFloat::rmNearestTiesToEven);
V1.add(V3, APFloat::rmNearestTiesToEven);
} else
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c21890a0d856f..0f2b5188fc10a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6996,6 +6996,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
+ } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) {
+ // TODO: Support splitting the vector.
+ setValue(&I, DAG.getNode(ISD::FMULADD, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fcfbfe6c461d3..39cbfad6d0be1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMA: return "fma";
case ISD::STRICT_FMA: return "strict_fma";
case ISD::FMAD: return "fmad";
+ case ISD::FMULADD: return "fmuladd";
case ISD::FREM: return "frem";
case ISD::STRICT_FREM: return "strict_frem";
case ISD::FCOPYSIGN: return "fcopysign";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cc503d324e74b..920dff935daed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
break;
}
case ISD::FMA:
+ case ISD::FMULADD:
case ISD::FMAD: {
if (!Flags.hasNoSignedZeros())
break;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index c23281a820b2b..060b1ddc2ef39 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -815,7 +815,8 @@ void TargetLoweringBase::initActions() {
ISD::FTAN, ISD::FACOS,
ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH,
- ISD::FTANH, ISD::FATAN2},
+ ISD::FTANH, ISD::FATAN2,
+ ISD::FMULADD},
VT, Expand);
// Overflow operations default to expand
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 64723340051b8..47c24fc27f1d6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -317,6 +317,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
}
+ if (Subtarget->hasFP16()) {
+ setOperationAction(ISD::FMA, MVT::v8f16, Legal);
+ }
+
+ if (Subtarget->hasRelaxedSIMD()) {
+ setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
+ }
+
// Partial MLA reductions.
for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
@@ -1120,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
+ if (!Subtarget->hasFP16() || !VT.isVector())
+ return false;
+
+ EVT ScalarVT = VT.getScalarType();
+ if (!ScalarVT.isSimple())
+ return false;
+
+ return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
+}
+
bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
SDValue Op, const TargetLoweringOpt &TLO) const {
// ISel process runs DAGCombiner after legalization; this step is called
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b33a8530310be..472ec678534a4 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -81,6 +81,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 49af78bce68c3..72835b3c6424e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1626,7 +1626,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
// Relaxed (Negative) Multiply-Add (madd/nmadd)
//===----------------------------------------------------------------------===//
-multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
+multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
defm MADD_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
@@ -1640,16 +1641,40 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
- def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
- def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
}
-defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
+defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
+
+//===----------------------------------------------------------------------===//
+// FP16 (Negative) Multiply-Add (madd/nmadd)
+//===----------------------------------------------------------------------===//
+
+multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
+ defm MADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".madd\t$dst, $a, $b, $c",
+ vec.prefix#".madd", simdopA, reqs>;
+ defm NMADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".nmadd\t$dst, $a, $b, $c",
+ vec.prefix#".nmadd", simdopS, reqs>;
+}
+defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
//===----------------------------------------------------------------------===//
// Laneselect
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index e065de38951b1..600241aef99d0 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -2,9 +2,278 @@
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=NOFP16
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NOSIMD
target triple = "wasm32"
+define half @fadd_fmul_contract_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f16:
+; RELAXED: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fadd_fmul_contract_f16:
+; STRICT: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $0
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $1
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fadd_fmul_contract_f16:
+; NOFP16: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fadd_fmul_contract_f16:
+; NOSIMD: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $0
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: return $pop7
+ %mul = fmul contract half %b, %a
+ %add = fadd contract half %mul, %c
+ ret half %add
+}
+
+define half @fmuladd_contract_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fmuladd_contract_f16:
+; RELAXED: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fmuladd_contract_f16:
+; STRICT: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $1
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $0
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fmuladd_contract_f16:
+; NOFP16: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fmuladd_contract_f16:
+; NOSIMD: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0...
[truncated]
|
I've added back the intrinsic patterns for fp16 and noted that I think new intrinsics should, ultimately, be introduced instead. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for following up with the fix.
We can definitely revisit the intrinsics before the proposal is finalized.
Reland #161355, after fixing up the cross-projects-tests for the wasm simd intrinsics.