-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Clang] Add elementwise ldexp builtin function #166296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This PR adds __builtin_elementwise_ldexp. It can be used for implementing OpenCL ldexp builtin with vector inputs.
|
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Wenju He (wenju-he) ChangesThis PR adds __builtin_elementwise_ldexp. It can be used for implementing OpenCL ldexp builtin with vector inputs. Full diff: https://github.com/llvm/llvm-project/pull/166296.diff 7 Files Affected:
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 495f2ab3926ce..ed3573012d680 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -805,6 +805,8 @@ of different sizes and signs is forbidden in binary and ternary builtins.
T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types
T __builtin_elementwise_exp2(T x) returns the base-2 exponential, 2^x, of the specified value floating point types
T __builtin_elementwise_exp10(T x) returns the base-10 exponential, 10^x, of the specified value floating point types
+ T __builtin_elementwise_ldexp(T x, IntT y) returns the product of x and 2 raised to the power y. The floating point types
+ number of elements in y must equal the number of elements in x.
T __builtin_elementwise_sqrt(T x) return the square root of a floating-point number floating point types
T __builtin_elementwise_roundeven(T x) round x to the nearest integer value in floating point format, floating point types
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 92fc9381a5868..1d9021dfc6017 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -207,6 +207,8 @@ C23 Feature Support
Non-comprehensive list of changes in this release
-------------------------------------------------
+- Added ``__builtin_elementwise_ldexp``.
+
- Added ``__builtin_elementwise_fshl`` and ``__builtin_elementwise_fshr``.
- ``__builtin_elementwise_abs`` can now be used in constant expression.
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 2b400b012d6ed..9b8f7864329a9 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1418,6 +1418,12 @@ def ElementwiseExp10 : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseLdexp : Builtin {
+ let Spellings = ["__builtin_elementwise_ldexp"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ElementwiseFloor : Builtin {
let Spellings = ["__builtin_elementwise_floor"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b81e0d02da2c9..bbcee34b384c0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3992,6 +3992,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_exp10:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, Intrinsic::exp10, "elt.exp10"));
+ case Builtin::BI__builtin_elementwise_ldexp: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ Value *Exp = EmitScalarExpr(E->getArg(1));
+ Value *Result = Builder.CreateLdexp(Src, Exp, {}, "elt.ldexp");
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_elementwise_log:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, Intrinsic::log, "elt.log"));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ad2c2e4a97bb9..a8e3fe6c07b12 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2609,6 +2609,18 @@ static ExprResult BuiltinInvoke(Sema &S, CallExpr *TheCall) {
Args.drop_front(), TheCall->getRParenLoc());
}
+// Performs a similar job to Sema::UsualUnaryConversions, but without any
+// implicit promotion of integral/enumeration types.
+static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) {
+ // First, convert to an r-value.
+ ExprResult Res = S.DefaultFunctionArrayLvalueConversion(E);
+ if (Res.isInvalid())
+ return ExprError();
+
+ // Promote floating-point types.
+ return S.UsualUnaryFPConversions(Res.get());
+}
+
ExprResult
Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
CallExpr *TheCall) {
@@ -3273,6 +3285,46 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
return ExprError();
break;
+ case Builtin::BI__builtin_elementwise_ldexp: {
+ if (checkArgCount(TheCall, 2))
+ return ExprError();
+
+ ExprResult A = BuiltinVectorMathConversions(*this, TheCall->getArg(0));
+ if (A.isInvalid())
+ return ExprError();
+ QualType TyA = A.get()->getType();
+ if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA,
+ EltwiseBuiltinArgTyRestriction::FloatTy, 1))
+ return ExprError();
+
+ ExprResult Exp = UsualUnaryConversions(TheCall->getArg(1));
+ if (Exp.isInvalid())
+ return ExprError();
+ QualType TyExp = Exp.get()->getType();
+ if (checkMathBuiltinElementType(*this, Exp.get()->getBeginLoc(), TyExp,
+ EltwiseBuiltinArgTyRestriction::IntegerTy,
+ 2))
+ return ExprError();
+
+ // Check the two arguments are either scalars or vectors of equal length.
+ const auto *Vec0 = TyA->getAs<VectorType>();
+ const auto *Vec1 = TyExp->getAs<VectorType>();
+ unsigned Arg0Length = Vec0 ? Vec0->getNumElements() : 0;
+ unsigned Arg1Length = Vec1 ? Vec1->getNumElements() : 0;
+ if (Arg0Length != Arg1Length) {
+ Diag(Exp.get()->getBeginLoc(),
+ diag::err_typecheck_vector_lengths_not_equal)
+ << TyA << TyExp << A.get()->getSourceRange()
+ << Exp.get()->getSourceRange();
+ return ExprError();
+ }
+
+ TheCall->setArg(0, A.get());
+ TheCall->setArg(1, Exp.get());
+ TheCall->setType(TyA);
+ break;
+ }
+
// These builtins restrict the element type to floating point
// types only, and take in two arguments.
case Builtin::BI__builtin_elementwise_minnum:
@@ -15992,18 +16044,6 @@ void Sema::CheckAddressOfPackedMember(Expr *rhs) {
_2, _3, _4));
}
-// Performs a similar job to Sema::UsualUnaryConversions, but without any
-// implicit promotion of integral/enumeration types.
-static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) {
- // First, convert to an r-value.
- ExprResult Res = S.DefaultFunctionArrayLvalueConversion(E);
- if (Res.isInvalid())
- return ExprError();
-
- // Promote floating-point types.
- return S.UsualUnaryFPConversions(Res.get());
-}
-
bool Sema::PrepareBuiltinElementwiseMathOneArgCall(
CallExpr *TheCall, EltwiseBuiltinArgTyRestriction ArgTyRestr) {
if (checkArgCount(TheCall, 1))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index e9344d8fe0b8b..2df485f0155c3 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -6,6 +6,7 @@ typedef half half2 __attribute__((ext_vector_type(2)));
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef short int si8 __attribute__((ext_vector_type(8)));
+typedef int int4 __attribute__((ext_vector_type(4)));
typedef unsigned int u4 __attribute__((ext_vector_type(4)));
typedef double double2 __attribute__((ext_vector_type(2)));
typedef double double3 __attribute__((ext_vector_type(3)));
@@ -729,6 +730,36 @@ void test_builtin_elementwise_exp10(float f1, float f2, double d1, double d2,
vf2 = __builtin_elementwise_exp10(vf1);
}
+void test_builtin_elementwise_ldexp(float f1, float f2, double d1, double d2,
+ float4 vf1, float4 vf2, int i1, int4 vi1, short s1, long l1) {
+ // CHECK-LABEL: define void @test_builtin_elementwise_ldexp(
+ // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4
+ // CHECK: [[I1:%.+]] = load i32, ptr %i1.addr, align 4
+ // CHECK-NEXT: call float @llvm.ldexp.f32.i32(float [[F1]], i32 [[I1]])
+ f2 = __builtin_elementwise_ldexp(f1, i1);
+
+ // CHECK: [[F2:%.+]] = load float, ptr %f1.addr, align 4
+ // CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+ // CHECK: [[Ext1:%.+]] = sext i16 [[S1]] to i32
+ // CHECK-NEXT: call float @llvm.ldexp.f32.i32(float [[F2]], i32 [[Ext1]])
+ f2 = __builtin_elementwise_ldexp(f1, s1);
+
+ // CHECK: [[F3:%.+]] = load float, ptr %f1.addr, align 4
+ // CHECK: [[L1:%.+]] = load i64, ptr %l1.addr, align 8
+ // CHECK-NEXT: call float @llvm.ldexp.f32.i64(float [[F3]], i64 [[L1]])
+ f2 = __builtin_elementwise_ldexp(f1, l1);
+
+ // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8
+ // CHECK: [[I2:%.+]] = load i32, ptr %i1.addr, align 4
+ // CHECK-NEXT: call double @llvm.ldexp.f64.i32(double [[D1]], i32 [[I2]])
+ d2 = __builtin_elementwise_ldexp(d1, i1);
+
+ // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+ // CHECK: [[VI1:%.+]] = load <4 x i32>, ptr %vi1.addr, align 16
+ // CHECK-NEXT: call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> [[VF1]], <4 x i32> [[VI1]])
+ vf2 = __builtin_elementwise_ldexp(vf1, vi1);
+}
+
void test_builtin_elementwise_floor(float f1, float f2, double d1, double d2,
float4 vf1, float4 vf2) {
// CHECK-LABEL: define void @test_builtin_elementwise_floor(
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index f9df4a6f93e05..37be0e4ebbd28 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -645,6 +645,42 @@ void test_builtin_elementwise_exp10(int i, float f, double d, float4 v, int3 iv,
// expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned4' (vector of 4 'unsigned int' values))}}
}
+void test_builtin_elementwise_ldexp(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+ struct Foo s = __builtin_elementwise_ldexp(f, i);
+ // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+ f = __builtin_elementwise_ldexp();
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+ f = __builtin_elementwise_ldexp(f);
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+ f = __builtin_elementwise_ldexp(f, i, i);
+ // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+
+ f = __builtin_elementwise_ldexp(i, i);
+ // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+
+ f = __builtin_elementwise_ldexp(f, f);
+ // expected-error@-1 {{2nd argument must be a scalar or vector of integer types (was 'float')}}
+
+ f = __builtin_elementwise_ldexp(v, iv);
+ // expected-error@-1 {{vector operands do not have the same number of elements ('float4' (vector of 4 'float' values) and 'int3' (vector of 3 'int' values))}}
+
+ v = __builtin_elementwise_ldexp(v, i);
+ // expected-error@-1 {{vector operands do not have the same number of elements ('float4' (vector of 4 'float' values) and 'int')}}
+
+ v = __builtin_elementwise_ldexp(f, iv);
+ // expected-error@-1 {{vector operands do not have the same number of elements ('float' and 'int3' (vector of 3 'int' values))}}
+
+ f = __builtin_elementwise_ldexp(u, i);
+ // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned int')}}
+
+ f = __builtin_elementwise_ldexp(uv, i);
+ // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+}
+
void test_builtin_elementwise_floor(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
struct Foo s = __builtin_elementwise_floor(f);
|
AaronBallman
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
This PR adds __builtin_elementwise_ldexp. It can be used for implementing OpenCL ldexp builtin with vector inputs.