diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d8bb1bc84daa55..4ff84ce8b79f6e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -26,6 +26,8 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" @@ -2985,10 +2987,32 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_isnan: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); - // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *V = EmitScalarExpr(E->getArg(0)); - V = Builder.CreateFCmpUNO(V, V, "cmp"); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); + llvm::Type *Ty = V->getType(); + const llvm::fltSemantics &Semantics = Ty->getFltSemantics(); + if (!Builder.getIsFPConstrained() || + Builder.getDefaultConstrainedExcept() == fp::ebIgnore || + !Ty->isIEEE()) { + V = Builder.CreateFCmpUNO(V, V, "cmp"); + return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); + } + + // NaN has all exp bits set and a non zero significand. Therefore: + // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0) + unsigned bitsize = Ty->getScalarSizeInBits(); + llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize); + Value *IntV = Builder.CreateBitCast(V, IntTy); + APInt AndMask = APInt::getSignedMaxValue(bitsize); + Value *AbsV = + Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask)); + APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt(); + Value *Sub = + Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV); + // V = sign bit (Sub) <=> V = (Sub < 0) + V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1)); + if (bitsize > 32) + V = Builder.CreateTrunc(V, ConvertType(E->getType())); + return RValue::get(V); } case Builtin::BI__builtin_matrix_transpose: { diff --git a/clang/test/CodeGen/X86/strictfp_builtins.c b/clang/test/CodeGen/X86/strictfp_builtins.c new file mode 100644 index 00000000000000..d7eda34fb45ef2 --- /dev/null +++ b/clang/test/CodeGen/X86/strictfp_builtins.c @@ -0,0 +1,46 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 %s -emit-llvm -ffp-exception-behavior=maytrap -o - -triple x86_64-unknown-unknown | FileCheck %s + +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. +// FIXME: these functions shouldn't trap on SNaN. + +#pragma float_control(except, on) + +int printf(const char *, ...); + +// CHECK-LABEL: @p( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[STR_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[STR:%.*]], i8** [[STR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[X:%.*]], i32* [[X_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[STR_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[X_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]]) [[ATTR4:#.*]] +// CHECK-NEXT: ret void +// +void p(char *str, int x) { + printf("%s: %d\n", str, x); +} + +#define P(n,args) p(#n #args, __builtin_##n args) + +// CHECK-LABEL: @test_long_double_isnan( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16 +// CHECK-NEXT: store x86_fp80 [[D:%.*]], x86_fp80* [[LD_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16 +// CHECK-NEXT: [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80 +// CHECK-NEXT: [[ABS:%.*]] = and i80 [[BITCAST]], 604462909807314587353087 +// CHECK-NEXT: [[TMP1:%.*]] = sub i80 604453686435277732577280, [[ABS]] +// CHECK-NEXT: [[ISNAN:%.*]] = lshr i80 [[TMP1]], 79 +// CHECK-NEXT: [[RES:%.*]] = trunc i80 [[ISNAN]] to i32 +// CHECK-NEXT: call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), i32 [[RES]]) [[ATTR4]] +// CHECK-NEXT: ret void +// +void test_long_double_isnan(long double ld) { + P(isnan, (ld)); + + return; +} diff --git a/clang/test/CodeGen/aarch64-strictfp-builtins.c b/clang/test/CodeGen/aarch64-strictfp-builtins.c new file mode 100644 index 00000000000000..14647c31e64744 --- /dev/null +++ b/clang/test/CodeGen/aarch64-strictfp-builtins.c @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 %s -emit-llvm -ffp-exception-behavior=maytrap -fexperimental-strict-floating-point -o - -triple arm64-none-linux-gnu | FileCheck %s + +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) + +int printf(const char *, ...); + +// CHECK-LABEL: @p( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[STR_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[STR:%.*]], i8** [[STR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[X:%.*]], i32* [[X_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[STR_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[X_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]]) +// CHECK-NEXT: ret void +// +void p(char *str, int x) { + printf("%s: %d\n", str, x); +} + +#define P(n,args) p(#n #args, __builtin_##n args) + +// CHECK-LABEL: @test_long_double_isnan( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca fp128, align 16 +// CHECK-NEXT: store fp128 [[D:%.*]], fp128* [[LD_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load fp128, fp128* [[LD_ADDR]], align 16 +// CHECK-NEXT: [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128 +// CHECK-NEXT: [[ABS:%.*]] = and i128 [[BITCAST]], 170141183460469231731687303715884105727 +// CHECK-NEXT: [[TMP1:%.*]] = sub i128 170135991163610696904058773219554885632, [[ABS]] +// CHECK-NEXT: [[ISNAN:%.*]] = lshr i128 [[TMP1]], 127 +// CHECK-NEXT: [[RES:%.*]] = trunc i128 [[ISNAN]] to i32 +// CHECK-NEXT: call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), i32 [[RES]]) +// CHECK-NEXT: ret void +// +void test_long_double_isnan(long double ld) { + P(isnan, (ld)); + + return; +} diff --git a/clang/test/CodeGen/strictfp_fpclassify.c b/clang/test/CodeGen/strictfp_builtins.c similarity index 83% rename from clang/test/CodeGen/strictfp_fpclassify.c rename to clang/test/CodeGen/strictfp_builtins.c index 404dba431687d2..131c9406fab6ec 100644 --- a/clang/test/CodeGen/strictfp_fpclassify.c +++ b/clang/test/CodeGen/strictfp_builtins.c @@ -92,17 +92,38 @@ void test_isinf_sign(double d) { return; } -// CHECK-LABEL: @test_isnan( +// CHECK-LABEL: @test_float_isnan( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store float [[F:%.*]], float* [[F_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F_ADDR]], align 4 +// CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[TMP0]] to i32 +// CHECK-NEXT: [[ABS:%.*]] = and i32 [[BITCAST]], [[#%u,0x7FFFFFFF]] +// CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[#%u,0x7F800000]], [[ABS]] +// CHECK-NEXT: [[ISNAN:%.*]] = lshr i32 [[TMP1]], 31 +// CHECK-NEXT: call void @p(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.4, i64 0, i64 0), i32 [[ISNAN]]) [[ATTR4]] +// CHECK-NEXT: ret void +// +void test_float_isnan(float f) { + P(isnan, (f)); + + return; +} + +// CHECK-LABEL: @test_double_isnan( // CHECK-NEXT: entry: // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D_ADDR]], align 8 -// CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") [[ATTR4]] -// CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[CMP]] to i32 -// CHECK-NEXT: call void @p(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.4, i64 0, i64 0), i32 [[TMP1]]) [[ATTR4]] +// CHECK-NEXT: [[BITCAST:%.*]] = bitcast double [[TMP0]] to i64 +// CHECK-NEXT: [[ABS:%.*]] = and i64 [[BITCAST]], [[#%u,0x7FFFFFFFFFFFFFFF]] +// CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[#%u,0x7FF0000000000000]], [[ABS]] +// CHECK-NEXT: [[ISNAN:%.*]] = lshr i64 [[TMP1]], 63 +// CHECK-NEXT: [[RES:%.*]] = trunc i64 [[ISNAN]] to i32 +// CHECK-NEXT: call void @p(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.5, i64 0, i64 0), i32 [[RES]]) [[ATTR4]] // CHECK-NEXT: ret void // -void test_isnan(double d) { +void test_double_isnan(double d) { P(isnan, (d)); return; @@ -120,7 +141,7 @@ void test_isnan(double d) { // CHECK-NEXT: [[AND:%.*]] = and i1 [[ISEQ]], [[ISINF]] // CHECK-NEXT: [[AND1:%.*]] = and i1 [[AND]], [[ISNORMAL]] // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[AND1]] to i32 -// CHECK-NEXT: call void @p(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.5, i64 0, i64 0), i32 [[TMP2]]) [[ATTR4]] +// CHECK-NEXT: call void @p(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.6, i64 0, i64 0), i32 [[TMP2]]) [[ATTR4]] // CHECK-NEXT: ret void // void test_isnormal(double d) { diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 8aafd525196d75..331dcd699c01be 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -1218,6 +1218,7 @@ class APFloat : public APFloatBase { bool isSmallest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isSmallest()); } bool isLargest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isLargest()); } bool isInteger() const { APFLOAT_DISPATCH_ON_SEMANTICS(isInteger()); } + bool isIEEE() const { return usesLayout(getSemantics()); } APFloat &operator=(const APFloat &RHS) = default; APFloat &operator=(APFloat &&RHS) = default; diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 756c69dd6ae958..2b1f054b111860 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -308,6 +308,10 @@ class Type { /// ppc long double), this method returns -1. int getFPMantissaWidth() const; + /// Return whether the type is IEEE compatible, as defined by the eponymous + /// method in APFloat. + bool isIEEE() const { return APFloat::getZero(getFltSemantics()).isIEEE(); } + /// If this is a vector type, return the element type, otherwise return /// 'this'. inline Type *getScalarType() const {