-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Clang] Make __builtin_assume_dereferenceable constexpr #169869
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-clang Author: NagaChaitanya Vellanki (chaitanyav) ChangesEnable constant evaluation of __builtin_assume_dereferenceable. During evaluation, the pointer is validated and it is verified whether the requested bytes are dereferenceable. Resolves:#168335 Full diff: https://github.com/llvm/llvm-project/pull/169869.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 6d6104a3ddb8d..226ad1cc03078 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -859,7 +859,7 @@ def BuiltinAssumeAligned : Builtin {
def BuiltinAssumeDereferenceable : Builtin {
let Spellings = ["__builtin_assume_dereferenceable"];
- let Attributes = [NoThrow, Const];
+ let Attributes = [NoThrow, Const, Constexpr];
let Prototype = "void(void const*, size_t)";
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d21f42d94d3a5..a29e7977b9d00 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2215,6 +2215,37 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
return Result;
}
+/// __builtin_assume_dereferenceable(Ptr, Size)
+static bool interp__builtin_assume_dereferenceable(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 2);
+
+ APSInt ReqSize = popToAPSInt(S.Stk, *S.Ctx.classify(Call->getArg(1)));
+ if (ReqSize.getZExtValue() < 1)
+ return false;
+
+ const Pointer &Ptr = S.Stk.pop<Pointer>();
+ if (Ptr.isZero() || !Ptr.isLive() || !Ptr.isBlockPointer() || Ptr.isPastEnd())
+ return false;
+
+ const ASTContext &ASTCtx = S.getASTContext();
+ const Descriptor *DeclDesc = Ptr.getDeclDesc();
+ std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, DeclDesc);
+ if (!FullSize)
+ return false;
+
+ unsigned ByteOffset = computePointerOffset(ASTCtx, Ptr);
+ if (ByteOffset > *FullSize)
+ return false;
+
+ unsigned RemainingSpace = *FullSize - ByteOffset;
+ if (RemainingSpace < ReqSize.getZExtValue())
+ return false;
+
+ return true;
+}
+
/// Does Ptr point to the last subobject?
static bool pointsToLastObject(const Pointer &Ptr) {
Pointer P = Ptr;
@@ -3749,6 +3780,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__assume:
return interp__builtin_assume(S, OpPC, Frame, Call);
+ case Builtin::BI__builtin_assume_dereferenceable:
+ return interp__builtin_assume_dereferenceable(S, OpPC, Frame, Call);
+
case Builtin::BI__builtin_strcmp:
case Builtin::BIstrcmp:
case Builtin::BI__builtin_strncmp:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index cab17ecdc7b29..2100fb91cbd93 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19690,6 +19690,35 @@ class VoidExprEvaluator
// The argument is not evaluated!
return true;
+ case Builtin::BI__builtin_assume_dereferenceable: {
+ assert(E->getType()->isVoidType());
+ assert(E->getNumArgs() == 2);
+
+ APSInt ReqSizeVal;
+ if (!::EvaluateInteger(E->getArg(1), ReqSizeVal, Info))
+ return false;
+ LValue Pointer;
+ if (!EvaluatePointer(E->getArg(0), Pointer, Info))
+ return false;
+ if (Pointer.Designator.Invalid)
+ return false;
+ if (Pointer.isNullPointer())
+ return false;
+
+ uint64_t ReqSize = ReqSizeVal.getZExtValue();
+ if (ReqSize < 1)
+ return false;
+ CharUnits EndOffset;
+ if (!determineEndOffset(Info, E->getExprLoc(), 0, Pointer, EndOffset))
+ return false;
+
+ uint64_t TotalSize =
+ (EndOffset - Pointer.getLValueOffset()).getQuantity();
+ if (TotalSize < ReqSize) {
+ return false;
+ }
+ return true;
+ }
case Builtin::BI__builtin_operator_delete:
return HandleOperatorDeleteCall(Info, E);
diff --git a/clang/test/SemaCXX/builtin-assume-dereferenceable-constexpr.cpp b/clang/test/SemaCXX/builtin-assume-dereferenceable-constexpr.cpp
new file mode 100644
index 0000000000000..158eb78cbdabc
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-assume-dereferenceable-constexpr.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++14 -triple x86_64-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++14 -triple x86_64-unknown-unknown %s -fexperimental-new-constant-interpreter
+
+constexpr int arr[10] = {};
+
+constexpr bool test_constexpr_valid() {
+ __builtin_assume_dereferenceable(arr, 40);
+ return true;
+}
+static_assert(test_constexpr_valid(), "");
+
+constexpr bool test_constexpr_partial() {
+ __builtin_assume_dereferenceable(&arr[5], 20);
+ return true;
+}
+static_assert(test_constexpr_partial(), "");
+
+constexpr bool test_constexpr_nullptr() {
+ __builtin_assume_dereferenceable(nullptr, 4);
+ return true;
+}
+static_assert(test_constexpr_nullptr(), ""); // expected-error {{not an integral constant expression}}
+
+constexpr bool test_constexpr_too_large() {
+ __builtin_assume_dereferenceable(arr, 100);
+ return true;
+}
+static_assert(test_constexpr_too_large(), ""); // expected-error {{not an integral constant expression}}
+
+constexpr int single_var = 42;
+constexpr bool test_single_var() {
+ __builtin_assume_dereferenceable(&single_var, 4);
+ return true;
+}
+static_assert(test_single_var(), "");
+
+constexpr bool test_exact_boundary() {
+ __builtin_assume_dereferenceable(&arr[9], 4);
+ return true;
+}
+static_assert(test_exact_boundary(), "");
+
+constexpr bool test_one_over() {
+ __builtin_assume_dereferenceable(&arr[9], 5);
+ return true;
+}
+static_assert(test_one_over(), ""); // expected-error {{not an integral constant expression}}
+
+constexpr bool test_zero_size() {
+ __builtin_assume_dereferenceable(arr, 0);
+ return true;
+}
+static_assert(test_zero_size(), ""); // expected-error {{not an integral constant expression}}
+
+struct S {
+ int x;
+ int y;
+};
+constexpr S s = {1, 2};
+constexpr bool test_struct_member() {
+ __builtin_assume_dereferenceable(&s.x, 4);
+ return true;
+}
+static_assert(test_struct_member(), "");
|
|
Thanks! I've slightly modified the PR description to associate this PR with the issue to fix. |
clang/lib/AST/ExprConstant.cpp
Outdated
|
|
||
| uint64_t ReqSize = ReqSizeVal.getZExtValue(); | ||
| if (ReqSize < 1) | ||
| return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this return true?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
am returning true when size is 0.
| __builtin_assume_dereferenceable(&s.x, 4); | ||
| return true; | ||
| } | ||
| static_assert(test_struct_member(), ""); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're only testing constant contexts.
For e.g.
const int f = (__builtin_assume_dereferenceable((int*)123, 4), 12);
int a = f;should we get a global initializer? I can read 4 bytes from that pointer, right?
I'm not 100% sure how this builtin is supposed to work in any case. I initially thought it would return the status but if it doesn't, does it make sense to just ignore it in constant expressions? All actual reads will be checked as usual. @philnik777?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be a valid implementation to just ignore the builtin I guess, but I'd be much happier if it actually checked that bytes are dereferenceable. That's quite useful to catch problems in loops that can return early. e.g. find_if requires the whole range to be dereferenceable, but may never actually reach the end of the data. The compiler is still allowed to read past the element that is returned, resulting potentially in crashes.
In general my goal is to catch all the UB possible during constant evaluation that the compiler can optimize on. With __builtin_assume_dereferenceable we currently can produce a program which, with identical inputs, is accepted during constant evaluation by the compiler but crashes during runtime. I'm aware of some other places this is the case, but I'd rather reduce that number, not increase it.
I'm not quite sure what you're asking in your example. It's not a constant expression at this point, is it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, I thought the builtin meant "we can dereference that pointer and read X bytes from it", not "starting from that pointer, X bytes can be read" (the latter meaning we can basically to a memcpy from it).
What about
int b = 10;
const int f = (__builtin_assume_dereferenceable((char*)&b + 1, 3), 12);
int a = f;, shouldn't that work?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that should work if you were able to inspect the byte representation of objects during constant evaluation. It's definitely a correct use of the attribute though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the example is not a valid constexpr
<source>:3:16: note: subexpression not valid in a constant expression
3 | const int f = (__builtin_assume_dereferenceable((char*)&b + 1, 3), 12);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
<source>:10:20: error: constexpr variable 'f' must be initialized by a constant expression
10 | constexpr int f = (__builtin_assume_dereferenceable((char*)&b + 1, 3), 12);
| ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 errors generated.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after making it constexpr
constexpr_assume_dereferenceable.cpp:5:55: note: cast that performs the conversions of a reinterpret_cast is not allowed in a constant expression
5 | constexpr int f = (__builtin_assume_dereferenceable((char*)&b + 1, 3), 12);There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Casts like that are allowed in non-constant contexts and I believe they can also happen when in __builtin_constant_p, so usually they might make their way into such a builtin evaluation function.
You have to leave f const (but not constexpr) and run clang bin/clang++ -c array.cpp -std=c++2c -S -emit-llvm -o - to see that we get a global initializer because f cannot be initialized at compile time.
2ec3765 to
410dfd1
Compare
| const CallExpr *Call) { | ||
| assert(Call->getNumArgs() == 2); | ||
|
|
||
| APSInt ReqSize = popToAPSInt(S.Stk, *S.Ctx.classify(Call->getArg(1))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| APSInt ReqSize = popToAPSInt(S.Stk, *S.Ctx.classify(Call->getArg(1))); | |
| APSInt ReqSize = popToAPSInt(S, Call->getArg(1)); |
| __builtin_assume_dereferenceable(&s.x, 4); | ||
| return true; | ||
| } | ||
| static_assert(test_struct_member(), ""); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Casts like that are allowed in non-constant contexts and I believe they can also happen when in __builtin_constant_p, so usually they might make their way into such a builtin evaluation function.
You have to leave f const (but not constexpr) and run clang bin/clang++ -c array.cpp -std=c++2c -S -emit-llvm -o - to see that we get a global initializer because f cannot be initialized at compile time.
410dfd1 to
93902a8
Compare
|
|
||
| int b = 10; | ||
| const int f = (__builtin_assume_dereferenceable((char*)&b + 1, 3), 12); | ||
| int a = f; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't test anything, the tests would work if the builtin worked or failed... You'd have to test the LLVM IR generated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tbaederr understood. I am fixing the tests.
Enable constant evaluation of __builtin_assume_dereferenceable. During evaluation, we verify the pointer is valid and the requested bytes are dereferenceable. Resolves:llvm#168335
…end, out-of-bounds access. * Enhance test suite to cover more scenarios * Return true when Size is zero.
…onst initialization
93902a8 to
07b9fa6
Compare
Enable constant evaluation of __builtin_assume_dereferenceable. During evaluation, the pointer is validated and it is verified whether the requested bytes are dereferenceable.
Resolves #168335