Skip to content

Commit

Permalink
[clang] Use llvm.is_fpclass to implement FP classification functions
Browse files Browse the repository at this point in the history
Builtin floating-point number classification functions:

    - __builtin_isnan,
    - __builtin_isinf,
    - __builtin_finite, and
    - __builtin_isnormal

now are implemented using `llvm.is_fpclass`.

This change makes the target callback `TargetCodeGenInfo::testFPKind`
unneeded. It is preserved in this change and should be removed later.

Differential Revision: https://reviews.llvm.org/D112932
  • Loading branch information
spavloff committed Jul 11, 2023
1 parent 8ddd98f commit 7d6c2e1
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 290 deletions.
140 changes: 49 additions & 91 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
Expand Down Expand Up @@ -2239,6 +2240,17 @@ static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
}
}

static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
Value *V) {
if (CGF.Builder.getIsFPConstrained() &&
CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
if (Value *Result =
CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
return Result;
}
return nullptr;
}

RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
Expand Down Expand Up @@ -3122,37 +3134,49 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// ZExt bool to int type.
return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
}

case Builtin::BI__builtin_isnan: {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Value *V = EmitScalarExpr(E->getArg(0));
llvm::Type *Ty = V->getType();
const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
if (!Builder.getIsFPConstrained() ||
Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
!Ty->isIEEE()) {
V = Builder.CreateFCmpUNO(V, V, "cmp");
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
}
if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
return RValue::get(Result);
return RValue::get(
Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
ConvertType(E->getType())));
}

if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
case Builtin::BI__builtin_isinf: {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Value *V = EmitScalarExpr(E->getArg(0));
if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
return RValue::get(Result);
return RValue::get(
Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
ConvertType(E->getType())));
}

// NaN has all exp bits set and a non zero significand. Therefore:
// isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
unsigned bitsize = Ty->getScalarSizeInBits();
llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
Value *IntV = Builder.CreateBitCast(V, IntTy);
APInt AndMask = APInt::getSignedMaxValue(bitsize);
Value *AbsV =
Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
Value *Sub =
Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
// V = sign bit (Sub) <=> V = (Sub < 0)
V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
if (bitsize > 32)
V = Builder.CreateTrunc(V, ConvertType(E->getType()));
return RValue::get(V);
case Builtin::BIfinite:
case Builtin::BI__finite:
case Builtin::BIfinitef:
case Builtin::BI__finitef:
case Builtin::BIfinitel:
case Builtin::BI__finitel:
case Builtin::BI__builtin_isfinite: {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Value *V = EmitScalarExpr(E->getArg(0));
if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
return RValue::get(Result);
return RValue::get(
Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
ConvertType(E->getType())));
}

case Builtin::BI__builtin_isnormal: {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Value *V = EmitScalarExpr(E->getArg(0));
return RValue::get(
Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
ConvertType(E->getType())));
}

case Builtin::BI__builtin_isfpclass: {
Expand Down Expand Up @@ -3388,52 +3412,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}

case Builtin::BIfinite:
case Builtin::BI__finite:
case Builtin::BIfinitef:
case Builtin::BI__finitef:
case Builtin::BIfinitel:
case Builtin::BI__finitel:
case Builtin::BI__builtin_isinf:
case Builtin::BI__builtin_isfinite: {
// isinf(x) --> fabs(x) == infinity
// isfinite(x) --> fabs(x) != infinity
// x != NaN via the ordered compare in either case.
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Value *V = EmitScalarExpr(E->getArg(0));
llvm::Type *Ty = V->getType();
if (!Builder.getIsFPConstrained() ||
Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
!Ty->isIEEE()) {
Value *Fabs = EmitFAbs(*this, V);
Constant *Infinity = ConstantFP::getInfinity(V->getType());
CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
? CmpInst::FCMP_OEQ
: CmpInst::FCMP_ONE;
Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
}

if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
return RValue::get(Result);

// Inf values have all exp bits set and a zero significand. Therefore:
// isinf(V) == ((V << 1) == ((exp mask) << 1))
// isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison
unsigned bitsize = Ty->getScalarSizeInBits();
llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
Value *IntV = Builder.CreateBitCast(V, IntTy);
Value *Shl1 = Builder.CreateShl(IntV, 1);
const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
Value *ExpMaskShl1 = llvm::ConstantInt::get(IntTy, ExpMask.shl(1));
if (BuiltinID == Builtin::BI__builtin_isinf)
V = Builder.CreateICmpEQ(Shl1, ExpMaskShl1);
else
V = Builder.CreateICmpULT(Shl1, ExpMaskShl1);
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
}

case Builtin::BI__builtin_isinf_sign: {
// isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Expand All @@ -3453,26 +3431,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}

case Builtin::BI__builtin_isnormal: {
// isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
// FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *V = EmitScalarExpr(E->getArg(0));
Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");

Value *Abs = EmitFAbs(*this, V);
Value *IsLessThanInf =
Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
APFloat Smallest = APFloat::getSmallestNormalized(
getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
Value *IsNormal =
Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
"isnormal");
V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
V = Builder.CreateAnd(V, IsNormal, "and");
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
}

case Builtin::BI__builtin_flt_rounds: {
Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);

Expand Down
37 changes: 13 additions & 24 deletions clang/test/CodeGen/X86/strictfp_builtins.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 %s -emit-llvm -ffp-exception-behavior=maytrap -o - -triple x86_64-unknown-unknown | FileCheck %s

// Test that the constrained intrinsics are picking up the exception
// metadata from the AST instead of the global default from the command line.
// FIXME: these functions shouldn't trap on SNaN.

#pragma float_control(except, on)

int printf(const char *, ...);
Expand All @@ -17,7 +13,7 @@ int printf(const char *, ...);
// CHECK-NEXT: store i32 [[X:%.*]], ptr [[X_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[STR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef @.str, ptr noundef [[TMP0]], i32 noundef [[TMP1]]) [[ATTR4:#.*]]
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef @.str, ptr noundef [[TMP0]], i32 noundef [[TMP1]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: ret void
//
void p(char *str, int x) {
Expand All @@ -29,13 +25,11 @@ void p(char *str, int x) {
// CHECK-LABEL: @test_long_double_isinf(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
// CHECK-NEXT: store x86_fp80 [[D:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
// CHECK-NEXT: [[SHL1:%.*]] = shl i80 [[BITCAST]], 1
// CHECK-NEXT: [[CMP:%.*]] = icmp eq i80 [[SHL1]], -18446744073709551616
// CHECK-NEXT: [[RES:%.*]] = zext i1 [[CMP]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.[[#STRID:1]], i32 noundef [[RES]]) [[ATTR4]]
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR3]]
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.1, i32 noundef [[TMP2]]) #[[ATTR3]]
// CHECK-NEXT: ret void
//
void test_long_double_isinf(long double ld) {
Expand All @@ -47,13 +41,11 @@ void test_long_double_isinf(long double ld) {
// CHECK-LABEL: @test_long_double_isfinite(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
// CHECK-NEXT: store x86_fp80 [[D:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
// CHECK-NEXT: [[SHL1:%.*]] = shl i80 [[BITCAST]], 1
// CHECK-NEXT: [[CMP:%.*]] = icmp ult i80 [[SHL1]], -18446744073709551616
// CHECK-NEXT: [[RES:%.*]] = zext i1 [[CMP]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]]) [[ATTR4]]
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR3]]
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR3]]
// CHECK-NEXT: ret void
//
void test_long_double_isfinite(long double ld) {
Expand All @@ -65,14 +57,11 @@ void test_long_double_isfinite(long double ld) {
// CHECK-LABEL: @test_long_double_isnan(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
// CHECK-NEXT: store x86_fp80 [[D:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
// CHECK-NEXT: [[ABS:%.*]] = and i80 [[BITCAST]], 604462909807314587353087
// CHECK-NEXT: [[TMP1:%.*]] = sub i80 604453686435277732577280, [[ABS]]
// CHECK-NEXT: [[ISNAN:%.*]] = lshr i80 [[TMP1]], 79
// CHECK-NEXT: [[RES:%.*]] = trunc i80 [[ISNAN]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]]) [[ATTR4]]
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR3]]
// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR3]]
// CHECK-NEXT: ret void
//
void test_long_double_isnan(long double ld) {
Expand Down
19 changes: 6 additions & 13 deletions clang/test/CodeGen/aarch64-strictfp-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,8 @@ void p(char *str, int x) {
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca fp128, align 16
// CHECK-NEXT: store fp128 [[D:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load fp128, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
// CHECK-NEXT: [[SHL1:%.*]] = shl i128 [[BITCAST]], 1
// CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[SHL1]], -10384593717069655257060992658440192
// CHECK-NEXT: [[RES:%.*]] = zext i1 [[CMP]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[TMP0]], i32 516)
// CHECK-NEXT: [[RES:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.[[#STRID:1]], i32 noundef [[RES]]) [[ATTR4]]
// CHECK-NEXT: ret void
//
Expand All @@ -47,10 +45,8 @@ void test_long_double_isinf(long double ld) {
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca fp128, align 16
// CHECK-NEXT: store fp128 [[D:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load fp128, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
// CHECK-NEXT: [[SHL1:%.*]] = shl i128 [[BITCAST]], 1
// CHECK-NEXT: [[CMP:%.*]] = icmp ult i128 [[SHL1]], -10384593717069655257060992658440192
// CHECK-NEXT: [[RES:%.*]] = zext i1 [[CMP]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[TMP0]], i32 504)
// CHECK-NEXT: [[RES:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]]) [[ATTR4]]
// CHECK-NEXT: ret void
//
Expand All @@ -65,11 +61,8 @@ void test_long_double_isfinite(long double ld) {
// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca fp128, align 16
// CHECK-NEXT: store fp128 [[D:%.*]], ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load fp128, ptr [[LD_ADDR]], align 16
// CHECK-NEXT: [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
// CHECK-NEXT: [[ABS:%.*]] = and i128 [[BITCAST]], 170141183460469231731687303715884105727
// CHECK-NEXT: [[TMP1:%.*]] = sub i128 170135991163610696904058773219554885632, [[ABS]]
// CHECK-NEXT: [[ISNAN:%.*]] = lshr i128 [[TMP1]], 127
// CHECK-NEXT: [[RES:%.*]] = trunc i128 [[ISNAN]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[TMP0]], i32 3)
// CHECK-NEXT: [[RES:%.*]] = zext i1 [[TMP1]] to i32
// CHECK-NEXT: call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]])
// CHECK-NEXT: ret void
//
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/builtin_float.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ void test_half(__fp16 *H, __fp16 *H2) {
// CHECK: fcmp ogt float
// CHECK-NEXT: zext i1
(void)__builtin_isinf(*H);
// NOFP16: fcmp oeq float %{{.+}}, 0x7FF
// FP16: fcmp oeq half %{{.+}}, 0xH7C
// FP16: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 516)
// NOFP16: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 516)
}

void test_mixed(double d1, float f2) {
Expand Down
8 changes: 2 additions & 6 deletions clang/test/CodeGen/builtin_float_strictfp.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,11 @@ void test_half(__fp16 *H, __fp16 *H2) {
// NOFP16: [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8
// NOFP16-NEXT: [[IHALF:%.*]] = load i16, ptr [[LDADDR]], align 2
// NOFP16-NEXT: [[CONV:%.*]] = call float @llvm.convert.from.fp16.f32(i16 [[IHALF]])
// NOFP16-NEXT: [[IFLOAT:%.*]] = bitcast float [[CONV]] to i32
// NOFP16-NEXT: [[SHL:%.*]] = shl i32 [[IFLOAT]], 1
// NOFP16-NEXT: [[RES1:%.*]] = icmp eq i32 [[SHL]], -16777216
// NOFP16-NEXT: [[RES1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[CONV]], i32 516)
// NOFP16-NEXT: zext i1 [[RES1]] to i32
// FP16: [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8
// FP16-NEXT: [[HALF:%.*]] = load half, ptr [[LDADDR]], align 2
// FP16-NEXT: [[IHALF:%.*]] = bitcast half [[HALF]] to i16
// FP16-NEXT: [[SHL:%.*]] = shl i16 [[IHALF]], 1
// FP16-NEXT: [[RES1:%.*]] = icmp eq i16 [[SHL]], -2048
// FP16-NEXT: [[RES1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[HALF]], i32 516)
// FP16-NEXT: zext i1 [[RES1]] to i32
}

Expand Down

0 comments on commit 7d6c2e1

Please sign in to comment.