Revert @llvm.isnan intrinsic patchset.

Please refer to https://lists.llvm.org/pipermail/llvm-dev/2021-September/152440.html (and that whole thread.) TLDR: the original patch had no prior RFC, yet it had some changes that really need a proper RFC discussion. It won't be productive to discuss such an RFC, once it's actually posted, while said patch is already committed, because that introduces bias towards already-committed stuff, and the tree is potentially in broken state meanwhile. While the end result of discussion may lead back to the current design, it may also not lead to the current design. Therefore i take it upon myself to revert the tree back to last known good state. This reverts commit 4c4093e. This reverts commit 0a2b1ba. This reverts commit d987371. This reverts commit 791006f. This reverts commit c22b64e. This reverts commit 72ebcd3. This reverts commit 5fa6039. This reverts commit 9efda54. This reverts commit 94d3ff0.
llvm · Sep 2, 2021 · 3f1f08f · 3f1f08f
1 parent 9722e8f
commit 3f1f08f
Show file tree

Hide file tree

Showing 39 changed files with 158 additions and 2,997 deletions.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3068,17 +3068,37 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     // ZExt bool to int type.
     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
   }
-
   case Builtin::BI__builtin_isnan: {
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
     Value *V = EmitScalarExpr(E->getArg(0));
+    llvm::Type *Ty = V->getType();
+    const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
+    if (!Builder.getIsFPConstrained() ||
+        Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
+        !Ty->isIEEE()) {
+      V = Builder.CreateFCmpUNO(V, V, "cmp");
+      return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
+    }
 
     if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM))
       return RValue::get(Result);
 
-    Function *F = CGM.getIntrinsic(Intrinsic::isnan, V->getType());
-    Value *Call = Builder.CreateCall(F, V);
-    return RValue::get(Builder.CreateZExt(Call, ConvertType(E->getType())));
+    // NaN has all exp bits set and a non zero significand. Therefore:
+    // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
+    unsigned bitsize = Ty->getScalarSizeInBits();
+    llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
+    Value *IntV = Builder.CreateBitCast(V, IntTy);
+    APInt AndMask = APInt::getSignedMaxValue(bitsize);
+    Value *AbsV =
+        Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
+    APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
+    Value *Sub =
+        Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
+    // V = sign bit (Sub) <=> V = (Sub < 0)
+    V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
+    if (bitsize > 32)
+      V = Builder.CreateTrunc(V, ConvertType(E->getType()));
+    return RValue::get(V);
   }
 
   case Builtin::BI__builtin_matrix_transpose: {

diff --git a/clang/test/CodeGen/X86/strictfp_builtins.c b/clang/test/CodeGen/X86/strictfp_builtins.c
@@ -17,7 +17,7 @@ int printf(const char *, ...);
 // CHECK-NEXT:    store i32 [[X:%.*]], i32* [[X_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[STR_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X_ADDR]], align 4
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]]) [[ATTR4:#.*]]
 // CHECK-NEXT:    ret void
 //
 void p(char *str, int x) {
@@ -29,13 +29,13 @@ void p(char *str, int x) {
 // CHECK-LABEL: @test_long_double_isinf(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
-// CHECK-NEXT:    store x86_fp80 [[LD:%.*]], x86_fp80* [[LD_ADDR]], align 16
+// CHECK-NEXT:    store x86_fp80 [[D:%.*]], x86_fp80* [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
-// CHECK-NEXT:    [[TMP2:%.*]] = shl i80 [[TMP1]], 1
-// CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i80 [[TMP2]], -18446744073709551616
-// CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), i32 [[TMP4]]) #[[ATTR3]]
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
+// CHECK-NEXT:    [[SHL1:%.*]] = shl i80 [[BITCAST]], 1
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i80 [[SHL1]], -18446744073709551616
+// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.[[#STRID:1]], i64 0, i64 0), i32 [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isinf(long double ld) {
@@ -47,13 +47,13 @@ void test_long_double_isinf(long double ld) {
 // CHECK-LABEL: @test_long_double_isfinite(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
-// CHECK-NEXT:    store x86_fp80 [[LD:%.*]], x86_fp80* [[LD_ADDR]], align 16
+// CHECK-NEXT:    store x86_fp80 [[D:%.*]], x86_fp80* [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
-// CHECK-NEXT:    [[TMP2:%.*]] = shl i80 [[TMP1]], 1
-// CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i80 [[TMP2]], -18446744073709551616
-// CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.2, i64 0, i64 0), i32 [[TMP4]]) #[[ATTR3]]
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
+// CHECK-NEXT:    [[SHL1:%.*]] = shl i80 [[BITCAST]], 1
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult i80 [[SHL1]], -18446744073709551616
+// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.[[#STRID:STRID+1]], i64 0, i64 0), i32 [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isfinite(long double ld) {
@@ -65,11 +65,14 @@ void test_long_double_isfinite(long double ld) {
 // CHECK-LABEL: @test_long_double_isnan(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
-// CHECK-NEXT:    store x86_fp80 [[LD:%.*]], x86_fp80* [[LD_ADDR]], align 16
+// CHECK-NEXT:    store x86_fp80 [[D:%.*]], x86_fp80* [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.isnan.f80(x86_fp80 [[TMP0]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.3, i64 0, i64 0), i32 [[TMP2]]) #[[ATTR3]]
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
+// CHECK-NEXT:    [[ABS:%.*]] = and i80 [[BITCAST]], 604462909807314587353087
+// CHECK-NEXT:    [[TMP1:%.*]] = sub i80 604453686435277732577280, [[ABS]]
+// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i80 [[TMP1]], 79
+// CHECK-NEXT:    [[RES:%.*]] = trunc i80 [[ISNAN]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.[[#STRID:STRID+1]], i64 0, i64 0), i32 [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isnan(long double ld) {

diff --git a/clang/test/CodeGen/aarch64-strictfp-builtins.c b/clang/test/CodeGen/aarch64-strictfp-builtins.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 %s -emit-llvm -ffp-exception-behavior=maytrap -fexperimental-strict-floating-point -o - -triple arm64-none-linux-gnu | FileCheck %s
 
 // Test that the constrained intrinsics are picking up the exception
@@ -16,7 +15,7 @@ int printf(const char *, ...);
 // CHECK-NEXT:    store i32 [[X:%.*]], i32* [[X_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[STR_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X_ADDR]], align 4
-// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]])  [[ATTR4:#.*]]
 // CHECK-NEXT:    ret void
 //
 void p(char *str, int x) {
@@ -28,13 +27,13 @@ void p(char *str, int x) {
 // CHECK-LABEL: @test_long_double_isinf(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca fp128, align 16
-// CHECK-NEXT:    store fp128 [[LD:%.*]], fp128* [[LD_ADDR]], align 16
+// CHECK-NEXT:    store fp128 [[D:%.*]], fp128* [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load fp128, fp128* [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128
-// CHECK-NEXT:    [[TMP2:%.*]] = shl i128 [[TMP1]], 1
-// CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i128 [[TMP2]], -10384593717069655257060992658440192
-// CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), i32 [[TMP4]]) #[[ATTR3]]
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
+// CHECK-NEXT:    [[SHL1:%.*]] = shl i128 [[BITCAST]], 1
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i128 [[SHL1]], -10384593717069655257060992658440192
+// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.[[#STRID:1]], i64 0, i64 0), i32 [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isinf(long double ld) {
@@ -46,13 +45,13 @@ void test_long_double_isinf(long double ld) {
 // CHECK-LABEL: @test_long_double_isfinite(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca fp128, align 16
-// CHECK-NEXT:    store fp128 [[LD:%.*]], fp128* [[LD_ADDR]], align 16
+// CHECK-NEXT:    store fp128 [[D:%.*]], fp128* [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load fp128, fp128* [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128
-// CHECK-NEXT:    [[TMP2:%.*]] = shl i128 [[TMP1]], 1
-// CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i128 [[TMP2]], -10384593717069655257060992658440192
-// CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.2, i64 0, i64 0), i32 [[TMP4]]) #[[ATTR3]]
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
+// CHECK-NEXT:    [[SHL1:%.*]] = shl i128 [[BITCAST]], 1
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult i128 [[SHL1]], -10384593717069655257060992658440192
+// CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.[[#STRID:STRID+1]], i64 0, i64 0), i32 [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isfinite(long double ld) {
@@ -64,11 +63,14 @@ void test_long_double_isfinite(long double ld) {
 // CHECK-LABEL: @test_long_double_isnan(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca fp128, align 16
-// CHECK-NEXT:    store fp128 [[LD:%.*]], fp128* [[LD_ADDR]], align 16
+// CHECK-NEXT:    store fp128 [[D:%.*]], fp128* [[LD_ADDR]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load fp128, fp128* [[LD_ADDR]], align 16
-// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.isnan.f128(fp128 [[TMP0]]) #[[ATTR3]]
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.3, i64 0, i64 0), i32 [[TMP2]]) #[[ATTR3]]
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
+// CHECK-NEXT:    [[ABS:%.*]] = and i128 [[BITCAST]], 170141183460469231731687303715884105727
+// CHECK-NEXT:    [[TMP1:%.*]] = sub i128 170135991163610696904058773219554885632, [[ABS]]
+// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i128 [[TMP1]], 127
+// CHECK-NEXT:    [[RES:%.*]] = trunc i128 [[ISNAN]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.[[#STRID:STRID+1]], i64 0, i64 0), i32 [[RES]])
 // CHECK-NEXT:    ret void
 //
 void test_long_double_isnan(long double ld) {