[clang] Use llvm.is_fpclass to implement FP classification functions

Builtin floating-point number classification functions: - __builtin_isnan, - __builtin_isinf, - __builtin_finite, and - __builtin_isnormal now are implemented using `llvm.is_fpclass`. This change makes the target callback `TargetCodeGenInfo::testFPKind` unneeded. It is preserved in this change and should be removed later. Differential Revision: https://reviews.llvm.org/D112932
llvm · Jul 11, 2023 · 7d6c2e1 · 7d6c2e1
1 parent 8ddd98f
commit 7d6c2e1
Show file tree

Hide file tree

Showing 11 changed files with 211 additions and 290 deletions.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -31,6 +31,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -2239,6 +2240,17 @@ static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
   }
 }
 
+static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
+                               Value *V) {
+  if (CGF.Builder.getIsFPConstrained() &&
+      CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
+    if (Value *Result =
+            CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
+      return Result;
+  }
+  return nullptr;
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -3122,37 +3134,49 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     // ZExt bool to int type.
     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
   }
+
   case Builtin::BI__builtin_isnan: {
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
     Value *V = EmitScalarExpr(E->getArg(0));
-    llvm::Type *Ty = V->getType();
-    const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
-    if (!Builder.getIsFPConstrained() ||
-        Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
-        !Ty->isIEEE()) {
-      V = Builder.CreateFCmpUNO(V, V, "cmp");
-      return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
-    }
+    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
+      return RValue::get(Result);
+    return RValue::get(
+        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
+                           ConvertType(E->getType())));
+  }
 
-    if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
+  case Builtin::BI__builtin_isinf: {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+    Value *V = EmitScalarExpr(E->getArg(0));
+    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
       return RValue::get(Result);
+    return RValue::get(
+        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
+                           ConvertType(E->getType())));
+  }
 
-    // NaN has all exp bits set and a non zero significand. Therefore:
-    // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
-    unsigned bitsize = Ty->getScalarSizeInBits();
-    llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
-    Value *IntV = Builder.CreateBitCast(V, IntTy);
-    APInt AndMask = APInt::getSignedMaxValue(bitsize);
-    Value *AbsV =
-        Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
-    APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
-    Value *Sub =
-        Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
-    // V = sign bit (Sub) <=> V = (Sub < 0)
-    V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
-    if (bitsize > 32)
-      V = Builder.CreateTrunc(V, ConvertType(E->getType()));
-    return RValue::get(V);
+  case Builtin::BIfinite:
+  case Builtin::BI__finite:
+  case Builtin::BIfinitef:
+  case Builtin::BI__finitef:
+  case Builtin::BIfinitel:
+  case Builtin::BI__finitel:
+  case Builtin::BI__builtin_isfinite: {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+    Value *V = EmitScalarExpr(E->getArg(0));
+    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
+      return RValue::get(Result);
+    return RValue::get(
+        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
+                           ConvertType(E->getType())));
+  }
+
+  case Builtin::BI__builtin_isnormal: {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+    Value *V = EmitScalarExpr(E->getArg(0));
+    return RValue::get(
+        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
+                           ConvertType(E->getType())));
   }
 
   case Builtin::BI__builtin_isfpclass: {
@@ -3388,52 +3412,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
-  case Builtin::BIfinite:
-  case Builtin::BI__finite:
-  case Builtin::BIfinitef:
-  case Builtin::BI__finitef:
-  case Builtin::BIfinitel:
-  case Builtin::BI__finitel:
-  case Builtin::BI__builtin_isinf:
-  case Builtin::BI__builtin_isfinite: {
-    // isinf(x)    --> fabs(x) == infinity
-    // isfinite(x) --> fabs(x) != infinity
-    // x != NaN via the ordered compare in either case.
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
-    Value *V = EmitScalarExpr(E->getArg(0));
-    llvm::Type *Ty = V->getType();
-    if (!Builder.getIsFPConstrained() ||
-        Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
-        !Ty->isIEEE()) {
-      Value *Fabs = EmitFAbs(*this, V);
-      Constant *Infinity = ConstantFP::getInfinity(V->getType());
-      CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
-                                    ? CmpInst::FCMP_OEQ
-                                    : CmpInst::FCMP_ONE;
-      Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
-      return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
-    }
-
-    if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
-      return RValue::get(Result);
-
-    // Inf values have all exp bits set and a zero significand. Therefore:
-    // isinf(V) == ((V << 1) == ((exp mask) << 1))
-    // isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison
-    unsigned bitsize = Ty->getScalarSizeInBits();
-    llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
-    Value *IntV = Builder.CreateBitCast(V, IntTy);
-    Value *Shl1 = Builder.CreateShl(IntV, 1);
-    const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
-    APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
-    Value *ExpMaskShl1 = llvm::ConstantInt::get(IntTy, ExpMask.shl(1));
-    if (BuiltinID == Builtin::BI__builtin_isinf)
-      V = Builder.CreateICmpEQ(Shl1, ExpMaskShl1);
-    else
-      V = Builder.CreateICmpULT(Shl1, ExpMaskShl1);
-    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
-  }
-
   case Builtin::BI__builtin_isinf_sign: {
     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
@@ -3453,26 +3431,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
-  case Builtin::BI__builtin_isnormal: {
-    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
-    // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
-    Value *V = EmitScalarExpr(E->getArg(0));
-    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
-
-    Value *Abs = EmitFAbs(*this, V);
-    Value *IsLessThanInf =
-      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
-    APFloat Smallest = APFloat::getSmallestNormalized(
-                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
-    Value *IsNormal =
-      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
-                            "isnormal");
-    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
-    V = Builder.CreateAnd(V, IsNormal, "and");
-    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
-  }
-
   case Builtin::BI__builtin_flt_rounds: {
     Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
 

diff --git a/clang/test/CodeGen/X86/strictfp_builtins.c b/clang/test/CodeGen/X86/strictfp_builtins.c
@@ -1,10 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 %s -emit-llvm -ffp-exception-behavior=maytrap -o - -triple x86_64-unknown-unknown | FileCheck %s
 
-// Test that the constrained intrinsics are picking up the exception
-// metadata from the AST instead of the global default from the command line.
-// FIXME: these functions shouldn't trap on SNaN.
-
 #pragma float_control(except, on)
 
 int printf(const char *, ...);
@@ -17,7 +13,7 @@ int printf(const char *, ...);
 // CHECK-NEXT:    store i32 [[X:%.*]], ptr [[X_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[STR_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef @.str, ptr noundef [[TMP0]], i32 noundef [[TMP1]]) [[ATTR4:#.*]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef @.str, ptr noundef [[TMP0]], i32 noundef [[TMP1]]) #[[ATTR3:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void p(char *str, int x) {
@@ -29,13 +25,11 @@ void p(char *str, int x) {
 // CHECK-LABEL: @test_long_double_isinf(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
-// CHECK-NEXT:    store x86_fp80 [[D:%.*]], ptr [[LD_ADDR]], align 16
+// CHECK-NEXT:    store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
-// CHECK-NEXT:    [[SHL1:%.*]] = shl i80 [[BITCAST]], 1
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i80 [[SHL1]], -18446744073709551616
-// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
-// CHECK-NEXT:    call void @p(ptr noundef @.str.[[#STRID:1]], i32 noundef [[RES]]) [[ATTR4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
+// CHECK-NEXT:    call void @p(ptr noundef @.str.1, i32 noundef [[TMP2]]) #[[ATTR3]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isinf(long double ld) {
@@ -47,13 +41,11 @@ void test_long_double_isinf(long double ld) {
 // CHECK-LABEL: @test_long_double_isfinite(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
-// CHECK-NEXT:    store x86_fp80 [[D:%.*]], ptr [[LD_ADDR]], align 16
+// CHECK-NEXT:    store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
-// CHECK-NEXT:    [[SHL1:%.*]] = shl i80 [[BITCAST]], 1
-// CHECK-NEXT:    [[CMP:%.*]] = icmp ult i80 [[SHL1]], -18446744073709551616
-// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
-// CHECK-NEXT:    call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]]) [[ATTR4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
+// CHECK-NEXT:    call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR3]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isfinite(long double ld) {
@@ -65,14 +57,11 @@ void test_long_double_isfinite(long double ld) {
 // CHECK-LABEL: @test_long_double_isnan(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
-// CHECK-NEXT:    store x86_fp80 [[D:%.*]], ptr [[LD_ADDR]], align 16
+// CHECK-NEXT:    store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
-// CHECK-NEXT:    [[ABS:%.*]] = and i80 [[BITCAST]], 604462909807314587353087
-// CHECK-NEXT:    [[TMP1:%.*]] = sub i80 604453686435277732577280, [[ABS]]
-// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i80 [[TMP1]], 79
-// CHECK-NEXT:    [[RES:%.*]] = trunc i80 [[ISNAN]] to i32
-// CHECK-NEXT:    call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]]) [[ATTR4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
+// CHECK-NEXT:    call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR3]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isnan(long double ld) {

diff --git a/clang/test/CodeGen/aarch64-strictfp-builtins.c b/clang/test/CodeGen/aarch64-strictfp-builtins.c
@@ -29,10 +29,8 @@ void p(char *str, int x) {
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca fp128, align 16
 // CHECK-NEXT:    store fp128 [[D:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load fp128, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
-// CHECK-NEXT:    [[SHL1:%.*]] = shl i128 [[BITCAST]], 1
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i128 [[SHL1]], -10384593717069655257060992658440192
-// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[TMP0]], i32 516)
+// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-NEXT:    call void @p(ptr noundef @.str.[[#STRID:1]], i32 noundef [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
@@ -47,10 +45,8 @@ void test_long_double_isinf(long double ld) {
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca fp128, align 16
 // CHECK-NEXT:    store fp128 [[D:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load fp128, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
-// CHECK-NEXT:    [[SHL1:%.*]] = shl i128 [[BITCAST]], 1
-// CHECK-NEXT:    [[CMP:%.*]] = icmp ult i128 [[SHL1]], -10384593717069655257060992658440192
-// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[TMP0]], i32 504)
+// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-NEXT:    call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
@@ -65,11 +61,8 @@ void test_long_double_isfinite(long double ld) {
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca fp128, align 16
 // CHECK-NEXT:    store fp128 [[D:%.*]], ptr [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load fp128, ptr [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
-// CHECK-NEXT:    [[ABS:%.*]] = and i128 [[BITCAST]], 170141183460469231731687303715884105727
-// CHECK-NEXT:    [[TMP1:%.*]] = sub i128 170135991163610696904058773219554885632, [[ABS]]
-// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i128 [[TMP1]], 127
-// CHECK-NEXT:    [[RES:%.*]] = trunc i128 [[ISNAN]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[TMP0]], i32 3)
+// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-NEXT:    call void @p(ptr noundef @.str.[[#STRID:STRID+1]], i32 noundef [[RES]])
 // CHECK-NEXT:    ret void
 //

diff --git a/clang/test/CodeGen/builtin_float.c b/clang/test/CodeGen/builtin_float.c
@@ -49,8 +49,8 @@ void test_half(__fp16 *H, __fp16 *H2) {
   // CHECK: fcmp ogt float
   // CHECK-NEXT: zext i1
   (void)__builtin_isinf(*H);
-  // NOFP16: fcmp oeq float %{{.+}}, 0x7FF
-  // FP16: fcmp oeq half %{{.+}}, 0xH7C
+  // FP16: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 516)
+  // NOFP16: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 516)
 }
 
 void test_mixed(double d1, float f2) {

diff --git a/clang/test/CodeGen/builtin_float_strictfp.c b/clang/test/CodeGen/builtin_float_strictfp.c
@@ -19,15 +19,11 @@ void test_half(__fp16 *H, __fp16 *H2) {
   // NOFP16:       [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8
   // NOFP16-NEXT:  [[IHALF:%.*]]  = load i16, ptr [[LDADDR]], align 2
   // NOFP16-NEXT:  [[CONV:%.*]]   = call float @llvm.convert.from.fp16.f32(i16 [[IHALF]])
-  // NOFP16-NEXT:  [[IFLOAT:%.*]] = bitcast float [[CONV]] to i32
-  // NOFP16-NEXT:  [[SHL:%.*]]    = shl i32 [[IFLOAT]], 1
-  // NOFP16-NEXT:  [[RES1:%.*]]   = icmp eq i32 [[SHL]], -16777216
+  // NOFP16-NEXT:  [[RES1:%.*]]   = call i1 @llvm.is.fpclass.f32(float [[CONV]], i32 516)
   // NOFP16-NEXT:                   zext i1 [[RES1]] to i32
   // FP16:         [[LDADDR:%.*]] = load ptr, ptr %{{.*}}, align 8
   // FP16-NEXT:    [[HALF:%.*]]   = load half, ptr [[LDADDR]], align 2
-  // FP16-NEXT:    [[IHALF:%.*]]  = bitcast half [[HALF]] to i16
-  // FP16-NEXT:    [[SHL:%.*]]    = shl i16 [[IHALF]], 1
-  // FP16-NEXT:    [[RES1:%.*]]   = icmp eq i16 [[SHL]], -2048
+  // FP16-NEXT:    [[RES1:%.*]]   = call i1 @llvm.is.fpclass.f16(half [[HALF]], i32 516)
   // FP16-NEXT:                     zext i1 [[RES1]] to i32
 }