Skip to content

Commit

Permalink
[AArch64] Use fneg instead of fsub -0.0, X Cin IR expansion of __buil…
Browse files Browse the repository at this point in the history
…tin_neon_vfmsh_f16.

Addresses the FIXME and removes the only in tree use of
llvm::ConstantFP::getZeroValueForNegation for an FP type.

Reviewed By: dmgreen, SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D147497
  • Loading branch information
topperc committed Apr 4, 2023
1 parent 64ad6ea commit 0109f8d
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 7 deletions.
6 changes: 2 additions & 4 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -10965,14 +10965,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
{EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
case NEON::BI__builtin_neon_vfmsh_f16: {
// FIXME: This should be an fneg instruction:
Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");

// NEON intrinsic puts accumulator first, unlike the LLVM fma.
return emitCallMaybeConstrainedFPBuiltin(
*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
{Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
{Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
}
case NEON::BI__builtin_neon_vaddd_s64:
case NEON::BI__builtin_neon_vaddd_u64:
Expand Down
Expand Up @@ -290,8 +290,7 @@ float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
}

// COMMON-LABEL: test_vfmsh_f16
// UNCONSTRAINED: [[SUB:%.*]] = fsub half 0xH8000, %b
// CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fsub.f16(half 0xH8000, half %b, metadata !"round.tonearest", metadata !"fpexcept.strict")
// COMMONIR: [[SUB:%.*]] = fneg half %b
// UNCONSTRAINED: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
// CONSTRAINED: [[ADD:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half %c, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
// COMMONIR: ret half [[ADD]]
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
Expand Up @@ -652,7 +652,7 @@ float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
}

// CHECK-LABEL: test_vfmsh_f16
// CHECK: [[SUB:%.*]] = fsub half 0xH8000, %b
// CHECK: [[SUB:%.*]] = fneg half %b
// CHECK: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
// CHECK: ret half [[ADD]]
float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
Expand Down

0 comments on commit 0109f8d

Please sign in to comment.